match: unnest functions in _matcher
authorMatt Mackall <mpm@selenic.com>
Sun, 24 May 2009 02:56:14 -0500
changeset 8574 63a7ed2128d5
parent 8573 62c996d543e6
child 8575 aa4fcb5c46f1
match: unnest functions in _matcher
mercurial/match.py
--- a/mercurial/match.py	Sun May 24 02:56:14 2009 -0500
+++ b/mercurial/match.py	Sun May 24 02:56:14 2009 -0500
@@ -117,11 +117,90 @@
             res += re.escape(c)
     return head + res + tail
 
-def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
+def _regex(kind, name, tail):
+    '''convert a pattern into a regular expression'''
+    if not name:
+        return ''
+    if kind == 're':
+        return name
+    elif kind == 'path':
+        return '^' + re.escape(name) + '(?:/|$)'
+    elif kind == 'relglob':
+        return _globre(name, '(?:|.*/)', tail)
+    elif kind == 'relpath':
+        return re.escape(name) + '(?:/|$)'
+    elif kind == 'relre':
+        if name.startswith('^'):
+            return name
+        return '.*' + name
+    return _globre(name, '', tail)
+
+def _matchfn(pats, tail):
+    """build a matching function from a set of patterns"""
+    try:
+        pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
+        if len(pat) > 20000:
+            raise OverflowError()
+        return re.compile(pat).match
+    except OverflowError:
+        # We're using a Python with a tiny regex engine and we
+        # made it explode, so we'll divide the pattern list in two
+        # until it works
+        l = len(pats)
+        if l < 2:
+            raise
+        a, b = _matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
+        return lambda s: a(s) or b(s)
+    except re.error:
+        for k, p in pats:
+            try:
+                re.compile('(?:%s)' % _regex(k, p, tail))
+            except re.error:
+                raise util.Abort("invalid pattern (%s): %s" % (k, p))
+        raise util.Abort("invalid pattern")
+
+def _containsglob(name):
+    for c in name:
+        if c in _globchars: return True
+    return False
+
+def _globprefix(pat):
+    '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
+    root = []
+    for p in pat.split('/'):
+        if _containsglob(p): break
+        root.append(p)
+    return '/'.join(root) or '.'
+
+def _normalizepats(names, default, canonroot, cwd):
+    pats = []
+    roots = []
+    anypats = False
+    for kind, name in [_patsplit(p, default) for p in names]:
+        if kind in ('glob', 'relpath'):
+            name = util.canonpath(canonroot, cwd, name)
+        elif kind in ('relglob', 'path'):
+            name = util.normpath(name)
+
+        pats.append((kind, name))
+
+        if kind in ('glob', 're', 'relglob', 'relre'):
+            anypats = True
+
+        if kind == 'glob':
+            root = _globprefix(name)
+            roots.append(root)
+        elif kind in ('relpath', 'path'):
+            roots.append(name or '.')
+        elif kind == 'relglob':
+            roots.append('.')
+    return roots, pats, anypats
+
+def _matcher(root, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
     """build a function to match a set of file patterns
 
     arguments:
-    canonroot - the canonical root of the tree you're matching against
+    root - the canonical root of the tree you're matching against
     cwd - the current working directory, if relevant
     names - patterns to find
     inc - patterns to include
@@ -150,95 +229,16 @@
     if not names and not inc and not exc:
         return [], lambda f: True, False
 
-    def contains_glob(name):
-        for c in name:
-            if c in _globchars: return True
-        return False
-
-    def regex(kind, name, tail):
-        '''convert a pattern into a regular expression'''
-        if not name:
-            return ''
-        if kind == 're':
-            return name
-        elif kind == 'path':
-            return '^' + re.escape(name) + '(?:/|$)'
-        elif kind == 'relglob':
-            return _globre(name, '(?:|.*/)', tail)
-        elif kind == 'relpath':
-            return re.escape(name) + '(?:/|$)'
-        elif kind == 'relre':
-            if name.startswith('^'):
-                return name
-            return '.*' + name
-        return _globre(name, '', tail)
-
-    def matchfn(pats, tail):
-        """build a matching function from a set of patterns"""
-        try:
-            pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
-            if len(pat) > 20000:
-                raise OverflowError()
-            return re.compile(pat).match
-        except OverflowError:
-            # We're using a Python with a tiny regex engine and we
-            # made it explode, so we'll divide the pattern list in two
-            # until it works
-            l = len(pats)
-            if l < 2:
-                raise
-            a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
-            return lambda s: a(s) or b(s)
-        except re.error:
-            for k, p in pats:
-                try:
-                    re.compile('(?:%s)' % regex(k, p, tail))
-                except re.error:
-                    raise util.Abort("invalid pattern (%s): %s" % (k, p))
-            raise util.Abort("invalid pattern")
-
-    def globprefix(pat):
-        '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
-        root = []
-        for p in pat.split('/'):
-            if contains_glob(p): break
-            root.append(p)
-        return '/'.join(root) or '.'
-
-    def normalizepats(names, default):
-        pats = []
-        roots = []
-        anypats = False
-        for kind, name in [_patsplit(p, default) for p in names]:
-            if kind in ('glob', 'relpath'):
-                name = util.canonpath(canonroot, cwd, name)
-            elif kind in ('relglob', 'path'):
-                name = util.normpath(name)
-
-            pats.append((kind, name))
-
-            if kind in ('glob', 're', 'relglob', 'relre'):
-                anypats = True
-
-            if kind == 'glob':
-                root = globprefix(name)
-                roots.append(root)
-            elif kind in ('relpath', 'path'):
-                roots.append(name or '.')
-            elif kind == 'relglob':
-                roots.append('.')
-        return roots, pats, anypats
-
-    roots, pats, anypats = normalizepats(names, dflt_pat)
+    roots, pats, anypats = _normalizepats(names, dflt_pat, root, cwd)
 
     if names:
-        patmatch = matchfn(pats, '$')
+        patmatch = _matchfn(pats, '$')
     if inc:
-        dummy, inckinds, dummy = normalizepats(inc, 'glob')
-        incmatch = matchfn(inckinds, '(?:/|$)')
+        dummy, inckinds, dummy = _normalizepats(inc, 'glob', root, cwd)
+        incmatch = _matchfn(inckinds, '(?:/|$)')
     if exc:
-        dummy, exckinds, dummy = normalizepats(exc, 'glob')
-        excmatch = matchfn(exckinds, '(?:/|$)')
+        dummy, exckinds, dummy = _normalizepats(exc, 'glob', root, cwd)
+        excmatch = _matchfn(exckinds, '(?:/|$)')
 
     if names:
         if inc: