Mercurial > hg-stable
annotate mercurial/worker.py @ 18638:047110c0e2a8
worker: allow a function to be run in multiple worker processes
If we estimate that it will be worth the cost, we run the function in
multiple processes. Otherwise, we run it in-process.
Children report progress to the parent through a pipe.
Not yet implemented on Windows.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Sat, 09 Feb 2013 15:51:32 -0800 |
parents | ac4dbceeb14a |
children | d1a2b086d058 |
rev | line source |
---|---|
18635
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
1 # worker.py - master-slave parallelism support |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
2 # |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
3 # Copyright 2013 Facebook, Inc. |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
4 # |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
7 |
18636
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
8 from i18n import _ |
18638
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
9 import os, signal, sys, util |
18635
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
10 |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
11 def countcpus(): |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
12 '''try to count the number of CPUs on the system''' |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
13 |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
14 # posix |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
15 try: |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
16 n = int(os.sysconf('SC_NPROCESSORS_ONLN')) |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
17 if n > 0: |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
18 return n |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
19 except (AttributeError, ValueError): |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
20 pass |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
21 |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
22 # windows |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
23 try: |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
24 n = int(os.environ['NUMBER_OF_PROCESSORS']) |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
25 if n > 0: |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
26 return n |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
27 except (KeyError, ValueError): |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
28 pass |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
29 |
fed06dd07665
worker: count the number of CPUs
Bryan O'Sullivan <bryano@fb.com>
parents:
diff
changeset
|
30 return 1 |
18636
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
31 |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
32 def _numworkers(ui): |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
33 s = ui.config('worker', 'numcpus') |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
34 if s: |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
35 try: |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
36 n = int(s) |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
37 if n >= 1: |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
38 return n |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
39 except ValueError: |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
40 raise util.Abort(_('number of cpus must be an integer')) |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
41 return min(max(countcpus(), 4), 32) |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
42 |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
43 if os.name == 'posix': |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
44 _startupcost = 0.01 |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
45 else: |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
46 _startupcost = 1e30 |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
47 |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
48 def worthwhile(ui, costperop, nops): |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
49 '''try to determine whether the benefit of multiple processes can |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
50 outweigh the cost of starting them''' |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
51 linear = costperop * nops |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
52 workers = _numworkers(ui) |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
53 benefit = linear - (_startupcost * workers + linear / workers) |
dcb27c153a40
worker: estimate whether it's worth running a task in parallel
Bryan O'Sullivan <bryano@fb.com>
parents:
18635
diff
changeset
|
54 return benefit >= 0.15 |
18637
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
55 |
18638
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
56 def worker(ui, costperarg, func, staticargs, args): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
57 '''run a function, possibly in parallel in multiple worker |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
58 processes. |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
59 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
60 returns a progress iterator |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
61 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
62 costperarg - cost of a single task |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
63 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
64 func - function to run |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
65 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
66 staticargs - arguments to pass to every invocation of the function |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
67 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
68 args - arguments to split into chunks, to pass to individual |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
69 workers |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
70 ''' |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
71 if worthwhile(ui, costperarg, len(args)): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
72 return _platformworker(ui, func, staticargs, args) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
73 return func(*staticargs + (args,)) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
74 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
75 def _posixworker(ui, func, staticargs, args): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
76 rfd, wfd = os.pipe() |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
77 workers = _numworkers(ui) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
78 for pargs in partition(args, workers): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
79 pid = os.fork() |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
80 if pid == 0: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
81 try: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
82 os.close(rfd) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
83 for i, item in func(*(staticargs + (pargs,))): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
84 os.write(wfd, '%d %s\n' % (i, item)) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
85 os._exit(0) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
86 except KeyboardInterrupt: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
87 os._exit(255) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
88 os.close(wfd) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
89 fp = os.fdopen(rfd, 'rb', 0) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
90 oldhandler = signal.getsignal(signal.SIGINT) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
91 signal.signal(signal.SIGINT, signal.SIG_IGN) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
92 def cleanup(): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
93 # python 2.4 is too dumb for try/yield/finally |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
94 signal.signal(signal.SIGINT, oldhandler) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
95 problems = 0 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
96 for i in xrange(workers): |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
97 problems |= os.wait()[1] |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
98 if problems: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
99 sys.exit(1) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
100 try: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
101 for line in fp: |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
102 l = line.split(' ', 1) |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
103 yield int(l[0]), l[1][:-1] |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
104 except: # re-raises |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
105 cleanup() |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
106 raise |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
107 cleanup() |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
108 |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
109 if os.name != 'nt': |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
110 _platformworker = _posixworker |
047110c0e2a8
worker: allow a function to be run in multiple worker processes
Bryan O'Sullivan <bryano@fb.com>
parents:
18637
diff
changeset
|
111 |
18637
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
112 def partition(lst, nslices): |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
113 '''partition a list into N slices of equal size''' |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
114 n = len(lst) |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
115 chunk, slop = n / nslices, n % nslices |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
116 end = 0 |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
117 for i in xrange(nslices): |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
118 start = end |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
119 end = start + chunk |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
120 if slop: |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
121 end += 1 |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
122 slop -= 1 |
ac4dbceeb14a
worker: partition a list (of tasks) into equal-sized chunks
Bryan O'Sullivan <bryano@fb.com>
parents:
18636
diff
changeset
|
123 yield lst[start:end] |