Mercurial > hg
view mercurial/worker.py @ 25527:262e6ad93885
phases: really fix native phase computation
For some reason (probably rebase issue, leprechaun or badly resolved .rej)
1635579f9baf contains only half of the emailed patches and do not fix the bug.
This patch adds the other half and enable the sweet native computation for real.
As expected this provide massive speedup along the board.
revset #0: not public()
plain first
0) 0.011960 0.010523
1) 0.000465 3% 0.000492 4%
revset #1: (tip~1000::) - public()
plain first
0) 0.025700 0.025169
1) 0.002864 11% 0.001899 7%
revset #2: not public() and branch("default")
plain first
0) 0.022842 0.020863
1) 0.011418 49% 0.010948 52%
However, it has a less impact (even bad) on first result time in simple
situation. This comes from the overhead of building the set and filtering it.
This is especially true on my Mercurial repository (used here) where about 1/3
of the changesets are non public and hidden. This could be mitigated by a
caching of the set and a better usage of smartset in '_notpublic'. (But this
won't happen in this patch because the win is massive everywhere else).
revset #0: not public()
last
0) 0.000081
1) 0.000493 x6.1 <-- bad impact
revset #1: (tip~1000::) - public()
last
0) 0.013966
1) 0.002737 19%
revset #2: not public() and branch("default")
last
0) 0.011021
1) 0.011038
The effect mostly disappear when the number of non-public changesets is small
and/or the repo get bigger. Result for Mozilla central:
Mozilla
revset #0: not public()
plain first last
0) 0.092787 0.084094 0.000080
1) 0.000054 0% 0.000083 0% 0.000083
revset #1: (tip~1000::) - public()
plain first last
0) 0.215607 0.183996 0.124962
1) 0.031620 14% 0.006616 3% 0.031168 24%
revset #2: not public() and branch("default")
plain first last
0) 0.092626 0.082687 0.000162
1) 0.000139 0% 0.000165 0% 0.000167
author | Pierre-Yves David <pierre-yves.david@fb.com> |
---|---|
date | Wed, 10 Jun 2015 19:26:16 -0700 |
parents | b3e51675f98e |
children | 328739ea70c3 |
line wrap: on
line source
# worker.py - master-slave parallelism support # # Copyright 2013 Facebook, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from i18n import _ import errno, os, signal, sys, threading import util def countcpus(): '''try to count the number of CPUs on the system''' # posix try: n = int(os.sysconf('SC_NPROCESSORS_ONLN')) if n > 0: return n except (AttributeError, ValueError): pass # windows try: n = int(os.environ['NUMBER_OF_PROCESSORS']) if n > 0: return n except (KeyError, ValueError): pass return 1 def _numworkers(ui): s = ui.config('worker', 'numcpus') if s: try: n = int(s) if n >= 1: return n except ValueError: raise util.Abort(_('number of cpus must be an integer')) return min(max(countcpus(), 4), 32) if os.name == 'posix': _startupcost = 0.01 else: _startupcost = 1e30 def worthwhile(ui, costperop, nops): '''try to determine whether the benefit of multiple processes can outweigh the cost of starting them''' linear = costperop * nops workers = _numworkers(ui) benefit = linear - (_startupcost * workers + linear / workers) return benefit >= 0.15 def worker(ui, costperarg, func, staticargs, args): '''run a function, possibly in parallel in multiple worker processes. returns a progress iterator costperarg - cost of a single task func - function to run staticargs - arguments to pass to every invocation of the function args - arguments to split into chunks, to pass to individual workers ''' if worthwhile(ui, costperarg, len(args)): return _platformworker(ui, func, staticargs, args) return func(*staticargs + (args,)) def _posixworker(ui, func, staticargs, args): rfd, wfd = os.pipe() workers = _numworkers(ui) oldhandler = signal.getsignal(signal.SIGINT) signal.signal(signal.SIGINT, signal.SIG_IGN) pids, problem = [], [0] for pargs in partition(args, workers): pid = os.fork() if pid == 0: signal.signal(signal.SIGINT, oldhandler) try: os.close(rfd) for i, item in func(*(staticargs + (pargs,))): os.write(wfd, '%d %s\n' % (i, item)) os._exit(0) except KeyboardInterrupt: os._exit(255) # other exceptions are allowed to propagate, we rely # on lock.py's pid checks to avoid release callbacks pids.append(pid) pids.reverse() os.close(wfd) fp = os.fdopen(rfd, 'rb', 0) def killworkers(): # if one worker bails, there's no good reason to wait for the rest for p in pids: try: os.kill(p, signal.SIGTERM) except OSError, err: if err.errno != errno.ESRCH: raise def waitforworkers(): for _pid in pids: st = _exitstatus(os.wait()[1]) if st and not problem[0]: problem[0] = st killworkers() t = threading.Thread(target=waitforworkers) t.start() def cleanup(): signal.signal(signal.SIGINT, oldhandler) t.join() status = problem[0] if status: if status < 0: os.kill(os.getpid(), -status) sys.exit(status) try: for line in fp: l = line.split(' ', 1) yield int(l[0]), l[1][:-1] except: # re-raises killworkers() cleanup() raise cleanup() def _posixexitstatus(code): '''convert a posix exit status into the same form returned by os.spawnv returns None if the process was stopped instead of exiting''' if os.WIFEXITED(code): return os.WEXITSTATUS(code) elif os.WIFSIGNALED(code): return -os.WTERMSIG(code) if os.name != 'nt': _platformworker = _posixworker _exitstatus = _posixexitstatus def partition(lst, nslices): '''partition a list into N slices of equal size''' n = len(lst) chunk, slop = n / nslices, n % nslices end = 0 for i in xrange(nslices): start = end end = start + chunk if slop: end += 1 slop -= 1 yield lst[start:end]