view mercurial/match.py @ 16719:e7bf09acd410

localrepo: add branchtip() method for faster single-branch lookups For the PyPy repo with 744 branches and 843 branch heads, this brings hg log -r default over NFS from: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 3249 0 1.3222 1.3222 <open> 3244 0 0.6211 0.6211 <method 'close' of 'file' objects> 3243 0 0.0800 0.0800 <method 'read' of 'file' objects> 3241 0 0.0660 0.0660 <method 'seek' of 'file' objects> 3905 0 0.0476 0.0476 <zlib.decompress> 3281 0 2.6756 0.0472 mercurial.changelog:182(read) +3281 0 2.5256 0.0453 +mercurial.revlog:881(revision) +3276 0 0.0389 0.0196 +mercurial.changelog:28(decodeextra) +6562 0 0.0123 0.0123 +<method 'split' of 'str' objects> +6562 0 0.0408 0.0073 +mercurial.encoding:61(tolocal) +3281 0 0.0054 0.0054 +<method 'index' of 'str' objects> 3241 0 2.2464 0.0456 mercurial.revlog:818(_loadchunk) +3241 0 0.6205 0.6205 +<method 'close' of 'file' objects> +3241 0 0.0765 0.0765 +<method 'read' of 'file' objects> +3241 0 0.0660 0.0660 +<method 'seek' of 'file' objects> +3241 0 1.4209 0.0135 +mercurial.store:374(__call__) +3241 0 0.0122 0.0107 +mercurial.revlog:810(_addchunk) 3281 0 2.5256 0.0453 mercurial.revlog:881(revision) +3280 0 0.0175 0.0175 +mercurial.revlog:305(rev) +3281 0 2.2819 0.0119 +mercurial.revlog:847(_chunkraw) +3281 0 0.0603 0.0083 +mercurial.revlog:945(_checkhash) +3281 0 0.0051 0.0051 +mercurial.revlog:349(flags) +3281 0 0.0040 0.0040 +<mercurial.mpatch.patches> 13682 0 0.0479 0.0248 <method 'decode' of 'str' objects> +7418 0 0.0228 0.0076 +encodings.utf_8:15(decode) +1 0 0.0003 0.0000 +encodings:71(search_function) 3248 0 1.3995 0.0246 mercurial.scmutil:218(__call__) +3248 0 1.3222 1.3222 +<open> +3248 0 0.0235 0.0184 +os.path:80(split) +3248 0 0.0084 0.0068 +mercurial.scmutil:92(__call__) Time: real 2.750 secs (user 0.680+0.000 sys 0.360+0.000) down to: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 55 31 0.0197 0.0163 <__import__> +1 0 0.0006 0.0002 +mercurial.context:8(<module>) +1 0 0.0042 0.0001 +mercurial.revlog:12(<module>) +1 0 0.0002 0.0001 +mercurial.match:8(<module>) +1 0 0.0003 0.0001 +mercurial.dirstate:7(<module>) +1 0 0.0057 0.0001 +mercurial.changelog:8(<module>) 1 0 0.0117 0.0032 mercurial.localrepo:525(_readbranchcache) +844 0 0.0015 0.0015 +<binascii.unhexlify> +845 0 0.0010 0.0010 +<method 'split' of 'str' objects> +843 0 0.0045 0.0009 +mercurial.encoding:61(tolocal) +843 0 0.0004 0.0004 +<method 'setdefault' of 'dict' objects> +1 0 0.0003 0.0003 +<method 'close' of 'file' objects> 3 0 0.0029 0.0029 <method 'read' of 'file' objects> 9 0 0.0018 0.0018 <open> 990 0 0.0017 0.0017 <binascii.unhexlify> 53 0 0.0016 0.0016 mercurial.demandimport:43(__init__) 862 0 0.0015 0.0015 <_codecs.utf_8_decode> 862 0 0.0037 0.0014 <method 'decode' of 'str' objects> +862 0 0.0023 0.0008 +encodings.utf_8:15(decode) 981 0 0.0011 0.0011 <method 'split' of 'str' objects> 861 0 0.0046 0.0009 mercurial.encoding:61(tolocal) +861 0 0.0037 0.0014 +<method 'decode' of 'str' objects> 862 0 0.0023 0.0008 encodings.utf_8:15(decode) +862 0 0.0015 0.0015 +<_codecs.utf_8_decode> 4 0 0.0008 0.0008 <method 'close' of 'file' objects> 179 154 0.0202 0.0004 mercurial.demandimport:83(__getattribute__) +36 11 0.0199 0.0003 +mercurial.demandimport:55(_load) +72 0 0.0001 0.0001 +mercurial.demandimport:83(__getattribute__) +36 0 0.0000 0.0000 +<getattr> 1 0 0.0015 0.0004 mercurial.tags:148(_readtagcache) Time: real 0.060 secs (user 0.030+0.000 sys 0.010+0.000)
author Brodie Rao <brodie@sf.io>
date Sun, 13 May 2012 14:04:04 +0200
parents e34106fa0dc3
children 977c80123835
line wrap: on
line source

# match.py - filename matching
#
#  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import re
import scmutil, util, fileset
from i18n import _

def _expandsets(pats, ctx):
    '''convert set: patterns into a list of files in the given context'''
    fset = set()
    other = []

    for kind, expr in pats:
        if kind == 'set':
            if not ctx:
                raise util.Abort("fileset expression with no context")
            s = fileset.getfileset(ctx, expr)
            fset.update(s)
            continue
        other.append((kind, expr))
    return fset, other

class match(object):
    def __init__(self, root, cwd, patterns, include=[], exclude=[],
                 default='glob', exact=False, auditor=None, ctx=None):
        """build an object to match a set of file patterns

        arguments:
        root - the canonical root of the tree you're matching against
        cwd - the current working directory, if relevant
        patterns - patterns to find
        include - patterns to include
        exclude - patterns to exclude
        default - if a pattern in names has no explicit type, assume this one
        exact - patterns are actually literals

        a pattern is one of:
        'glob:<glob>' - a glob relative to cwd
        're:<regexp>' - a regular expression
        'path:<path>' - a path relative to canonroot
        'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
        'relpath:<path>' - a path relative to cwd
        'relre:<regexp>' - a regexp that needn't match the start of a name
        'set:<fileset>' - a fileset expression
        '<something>' - a pattern of the specified default type
        """

        self._root = root
        self._cwd = cwd
        self._files = []
        self._anypats = bool(include or exclude)
        self._ctx = ctx

        if include:
            pats = _normalize(include, 'glob', root, cwd, auditor)
            self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
        if exclude:
            pats = _normalize(exclude, 'glob', root, cwd, auditor)
            self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
        if exact:
            self._files = patterns
            pm = self.exact
        elif patterns:
            pats = _normalize(patterns, default, root, cwd, auditor)
            self._files = _roots(pats)
            self._anypats = self._anypats or _anypats(pats)
            self.patternspat, pm = _buildmatch(ctx, pats, '$')

        if patterns or exact:
            if include:
                if exclude:
                    m = lambda f: im(f) and not em(f) and pm(f)
                else:
                    m = lambda f: im(f) and pm(f)
            else:
                if exclude:
                    m = lambda f: not em(f) and pm(f)
                else:
                    m = pm
        else:
            if include:
                if exclude:
                    m = lambda f: im(f) and not em(f)
                else:
                    m = im
            else:
                if exclude:
                    m = lambda f: not em(f)
                else:
                    m = lambda f: True

        self.matchfn = m
        self._fmap = set(self._files)

    def __call__(self, fn):
        return self.matchfn(fn)
    def __iter__(self):
        for f in self._files:
            yield f
    def bad(self, f, msg):
        '''callback for each explicit file that can't be
        found/accessed, with an error message
        '''
        pass
    def dir(self, f):
        pass
    def missing(self, f):
        pass
    def exact(self, f):
        return f in self._fmap
    def rel(self, f):
        return util.pathto(self._root, self._cwd, f)
    def files(self):
        return self._files
    def anypats(self):
        return self._anypats
    def always(self):
        return False

class exact(match):
    def __init__(self, root, cwd, files):
        match.__init__(self, root, cwd, files, exact = True)

class always(match):
    def __init__(self, root, cwd):
        match.__init__(self, root, cwd, [])
    def always(self):
        return True

class narrowmatcher(match):
    """Adapt a matcher to work on a subdirectory only.

    The paths are remapped to remove/insert the path as needed:

    >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
    >>> m2 = narrowmatcher('sub', m1)
    >>> bool(m2('a.txt'))
    False
    >>> bool(m2('b.txt'))
    True
    >>> bool(m2.matchfn('a.txt'))
    False
    >>> bool(m2.matchfn('b.txt'))
    True
    >>> m2.files()
    ['b.txt']
    >>> m2.exact('b.txt')
    True
    >>> m2.rel('b.txt')
    'b.txt'
    >>> def bad(f, msg):
    ...     print "%s: %s" % (f, msg)
    >>> m1.bad = bad
    >>> m2.bad('x.txt', 'No such file')
    sub/x.txt: No such file
    """

    def __init__(self, path, matcher):
        self._root = matcher._root
        self._cwd = matcher._cwd
        self._path = path
        self._matcher = matcher

        self._files = [f[len(path) + 1:] for f in matcher._files
                       if f.startswith(path + "/")]
        self._anypats = matcher._anypats
        self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
        self._fmap = set(self._files)

    def bad(self, f, msg):
        self._matcher.bad(self._path + "/" + f, msg)

def patkind(pat):
    return _patsplit(pat, None)[0]

def _patsplit(pat, default):
    """Split a string into an optional pattern kind prefix and the
    actual pattern."""
    if ':' in pat:
        kind, val = pat.split(':', 1)
        if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                    'listfile', 'listfile0', 'set'):
            return kind, val
    return default, pat

def _globre(pat):
    "convert a glob pattern into a regexp"
    i, n = 0, len(pat)
    res = ''
    group = 0
    escape = re.escape
    def peek():
        return i < n and pat[i]
    while i < n:
        c = pat[i]
        i += 1
        if c not in '*?[{},\\':
            res += escape(c)
        elif c == '*':
            if peek() == '*':
                i += 1
                res += '.*'
            else:
                res += '[^/]*'
        elif c == '?':
            res += '.'
        elif c == '[':
            j = i
            if j < n and pat[j] in '!]':
                j += 1
            while j < n and pat[j] != ']':
                j += 1
            if j >= n:
                res += '\\['
            else:
                stuff = pat[i:j].replace('\\','\\\\')
                i = j + 1
                if stuff[0] == '!':
                    stuff = '^' + stuff[1:]
                elif stuff[0] == '^':
                    stuff = '\\' + stuff
                res = '%s[%s]' % (res, stuff)
        elif c == '{':
            group += 1
            res += '(?:'
        elif c == '}' and group:
            res += ')'
            group -= 1
        elif c == ',' and group:
            res += '|'
        elif c == '\\':
            p = peek()
            if p:
                i += 1
                res += escape(p)
            else:
                res += escape(c)
        else:
            res += escape(c)
    return res

def _regex(kind, name, tail):
    '''convert a pattern into a regular expression'''
    if not name:
        return ''
    if kind == 're':
        return name
    elif kind == 'path':
        return '^' + re.escape(name) + '(?:/|$)'
    elif kind == 'relglob':
        return '(?:|.*/)' + _globre(name) + tail
    elif kind == 'relpath':
        return re.escape(name) + '(?:/|$)'
    elif kind == 'relre':
        if name.startswith('^'):
            return name
        return '.*' + name
    return _globre(name) + tail

def _buildmatch(ctx, pats, tail):
    fset, pats = _expandsets(pats, ctx)
    if not pats:
        return "", fset.__contains__

    pat, mf = _buildregexmatch(pats, tail)
    if fset:
        return pat, lambda f: f in fset or mf(f)
    return pat, mf

def _buildregexmatch(pats, tail):
    """build a matching function from a set of patterns"""
    try:
        pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
        if len(pat) > 20000:
            raise OverflowError
        return pat, re.compile(pat).match
    except OverflowError:
        # We're using a Python with a tiny regex engine and we
        # made it explode, so we'll divide the pattern list in two
        # until it works
        l = len(pats)
        if l < 2:
            raise
        pata, a = _buildregexmatch(pats[:l//2], tail)
        patb, b = _buildregexmatch(pats[l//2:], tail)
        return pat, lambda s: a(s) or b(s)
    except re.error:
        for k, p in pats:
            try:
                re.compile('(?:%s)' % _regex(k, p, tail))
            except re.error:
                raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
        raise util.Abort(_("invalid pattern"))

def _normalize(names, default, root, cwd, auditor):
    pats = []
    for kind, name in [_patsplit(p, default) for p in names]:
        if kind in ('glob', 'relpath'):
            name = scmutil.canonpath(root, cwd, name, auditor)
        elif kind in ('relglob', 'path'):
            name = util.normpath(name)
        elif kind in ('listfile', 'listfile0'):
            try:
                files = util.readfile(name)
                if kind == 'listfile0':
                    files = files.split('\0')
                else:
                    files = files.splitlines()
                files = [f for f in files if f]
            except EnvironmentError:
                raise util.Abort(_("unable to read file list (%s)") % name)
            pats += _normalize(files, default, root, cwd, auditor)
            continue

        pats.append((kind, name))
    return pats

def _roots(patterns):
    r = []
    for kind, name in patterns:
        if kind == 'glob': # find the non-glob prefix
            root = []
            for p in name.split('/'):
                if '[' in p or '{' in p or '*' in p or '?' in p:
                    break
                root.append(p)
            r.append('/'.join(root) or '.')
        elif kind in ('relpath', 'path'):
            r.append(name or '.')
        elif kind == 'relglob':
            r.append('.')
    return r

def _anypats(patterns):
    for kind, name in patterns:
        if kind in ('glob', 're', 'relglob', 'relre', 'set'):
            return True