mercurial/worker.py
author Matt Harbison <matt_harbison@yahoo.com>
Sun, 12 Apr 2015 01:39:21 -0400
changeset 24790 baa11dde8c0e
parent 22199 b3e51675f98e
child 25660 328739ea70c3
permissions -rw-r--r--
match: add a subclass for dirstate normalizing of the matched patterns This class is only needed on case insensitive filesystems, and only for wdir context matches. It allows the user to not match the case of the items in the filesystem- especially for naming directories, which dirstate doesn't handle[1]. Making dirstate handle mismatched directory cases is too expensive[2]. Since dirstate doesn't apply to committed csets, this is only created by overriding basectx.match() in workingctx, and only on icasefs. The default arguments have been dropped, because the ctx must be passed to the matcher in order to function. For operations that can apply to both wdir and some other context, this ends up normalizing the filename to the case as it exists in the filesystem, and using that case for the lookup in the other context. See the diff example in the test. Previously, given a directory with an inexact case: - add worked as expected - diff, forget and status would silently ignore the request - files would exit with 1 - commit, revert and remove would fail (even when the commands leading up to them worked): $ hg ci -m "AbCDef" capsdir1/capsdir abort: CapsDir1/CapsDir: no match under directory! $ hg revert -r '.^' capsdir1/capsdir capsdir1\capsdir: no such file in rev 64dae27060b7 $ hg remove capsdir1/capsdir not removing capsdir1\capsdir: no tracked files [1] Globs are normalized, so that the -I and -X don't need to be specified with a case match. Without that, the second last remove (with -X) removes the files, leaving nothing for the last remove. However, specifying the files as 'glob:**.Txt' does not work. Perhaps this requires 're.IGNORECASE'? There are only a handful of places that create matchers directly, instead of being routed through the context.match() method. Some may benefit from changing over to using ctx.match() as a factory function: revset.checkstatus() revset.contains() revset.filelog() revset._matchfiles() localrepository._loadfilter() ignore.ignore() fileset.subrepo() filemerge._picktool() overrides.addlargefiles() lfcommands.lfconvert() kwtemplate.__init__() eolfile.__init__() eolfile.checkrev() acl.buildmatch() Currently, a toplevel subrepo can be named with an inexact case. However, the path auditor gets in the way of naming _anything_ in the subrepo if the top level case doesn't match. That is trickier to handle, because there's the user provided case, the case in the filesystem, and the case stored in .hgsub. This can be fixed next cycle. --- a/tests/test-subrepo-deep-nested-change.t +++ b/tests/test-subrepo-deep-nested-change.t @@ -170,8 +170,15 @@ R sub1/sub2/test.txt $ hg update -Cq $ touch sub1/sub2/folder/bar +#if icasefs + $ hg addremove Sub1/sub2 + abort: path 'Sub1\sub2' is inside nested repo 'Sub1' + [255] + $ hg -q addremove sub1/sub2 +#else $ hg addremove sub1/sub2 adding sub1/sub2/folder/bar (glob) +#endif $ hg status -S A sub1/sub2/folder/bar ? foo/bar/abc The narrowmatcher class may need to be tweaked when that is fixed. [1] http://www.selenic.com/pipermail/mercurial-devel/2015-April/068183.html [2] http://www.selenic.com/pipermail/mercurial-devel/2015-April/068191.html

# worker.py - master-slave parallelism support
#
# Copyright 2013 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from i18n import _
import errno, os, signal, sys, threading
import util

def countcpus():
    '''try to count the number of CPUs on the system'''

    # posix
    try:
        n = int(os.sysconf('SC_NPROCESSORS_ONLN'))
        if n > 0:
            return n
    except (AttributeError, ValueError):
        pass

    # windows
    try:
        n = int(os.environ['NUMBER_OF_PROCESSORS'])
        if n > 0:
            return n
    except (KeyError, ValueError):
        pass

    return 1

def _numworkers(ui):
    s = ui.config('worker', 'numcpus')
    if s:
        try:
            n = int(s)
            if n >= 1:
                return n
        except ValueError:
            raise util.Abort(_('number of cpus must be an integer'))
    return min(max(countcpus(), 4), 32)

if os.name == 'posix':
    _startupcost = 0.01
else:
    _startupcost = 1e30

def worthwhile(ui, costperop, nops):
    '''try to determine whether the benefit of multiple processes can
    outweigh the cost of starting them'''
    linear = costperop * nops
    workers = _numworkers(ui)
    benefit = linear - (_startupcost * workers + linear / workers)
    return benefit >= 0.15

def worker(ui, costperarg, func, staticargs, args):
    '''run a function, possibly in parallel in multiple worker
    processes.

    returns a progress iterator

    costperarg - cost of a single task

    func - function to run

    staticargs - arguments to pass to every invocation of the function

    args - arguments to split into chunks, to pass to individual
    workers
    '''
    if worthwhile(ui, costperarg, len(args)):
        return _platformworker(ui, func, staticargs, args)
    return func(*staticargs + (args,))

def _posixworker(ui, func, staticargs, args):
    rfd, wfd = os.pipe()
    workers = _numworkers(ui)
    oldhandler = signal.getsignal(signal.SIGINT)
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    pids, problem = [], [0]
    for pargs in partition(args, workers):
        pid = os.fork()
        if pid == 0:
            signal.signal(signal.SIGINT, oldhandler)
            try:
                os.close(rfd)
                for i, item in func(*(staticargs + (pargs,))):
                    os.write(wfd, '%d %s\n' % (i, item))
                os._exit(0)
            except KeyboardInterrupt:
                os._exit(255)
                # other exceptions are allowed to propagate, we rely
                # on lock.py's pid checks to avoid release callbacks
        pids.append(pid)
    pids.reverse()
    os.close(wfd)
    fp = os.fdopen(rfd, 'rb', 0)
    def killworkers():
        # if one worker bails, there's no good reason to wait for the rest
        for p in pids:
            try:
                os.kill(p, signal.SIGTERM)
            except OSError, err:
                if err.errno != errno.ESRCH:
                    raise
    def waitforworkers():
        for _pid in pids:
            st = _exitstatus(os.wait()[1])
            if st and not problem[0]:
                problem[0] = st
                killworkers()
    t = threading.Thread(target=waitforworkers)
    t.start()
    def cleanup():
        signal.signal(signal.SIGINT, oldhandler)
        t.join()
        status = problem[0]
        if status:
            if status < 0:
                os.kill(os.getpid(), -status)
            sys.exit(status)
    try:
        for line in fp:
            l = line.split(' ', 1)
            yield int(l[0]), l[1][:-1]
    except: # re-raises
        killworkers()
        cleanup()
        raise
    cleanup()

def _posixexitstatus(code):
    '''convert a posix exit status into the same form returned by
    os.spawnv

    returns None if the process was stopped instead of exiting'''
    if os.WIFEXITED(code):
        return os.WEXITSTATUS(code)
    elif os.WIFSIGNALED(code):
        return -os.WTERMSIG(code)

if os.name != 'nt':
    _platformworker = _posixworker
    _exitstatus = _posixexitstatus

def partition(lst, nslices):
    '''partition a list into N slices of equal size'''
    n = len(lst)
    chunk, slop = n / nslices, n % nslices
    end = 0
    for i in xrange(nslices):
        start = end
        end = start + chunk
        if slop:
            end += 1
            slop -= 1
        yield lst[start:end]