--- a/mercurial/match.py Sun May 24 02:56:14 2009 -0500
+++ b/mercurial/match.py Sun May 24 02:56:14 2009 -0500
@@ -5,7 +5,7 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
-import util
+import util, re
class _match(object):
def __init__(self, root, cwd, files, mf, ap):
@@ -50,10 +50,203 @@
class match(_match):
def __init__(self, root, cwd, patterns, include=[], exclude=[],
default='glob'):
- f, mf, ap = util.matcher(root, cwd, patterns, include, exclude,
- default)
+ f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
_match.__init__(self, root, cwd, f, mf, ap)
def patkind(pat):
- return util._patsplit(pat, None)[0]
+ return _patsplit(pat, None)[0]
+
+def _patsplit(pat, default):
+ """Split a string into an optional pattern kind prefix and the
+ actual pattern."""
+ for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
+ if pat.startswith(prefix + ':'): return pat.split(':', 1)
+ return default, pat
+
+_globchars = set('[{*?')
+
+def _globre(pat, head='^', tail='$'):
+ "convert a glob pattern into a regexp"
+ i, n = 0, len(pat)
+ res = ''
+ group = 0
+ def peek(): return i < n and pat[i]
+ while i < n:
+ c = pat[i]
+ i = i+1
+ if c == '*':
+ if peek() == '*':
+ i += 1
+ res += '.*'
+ else:
+ res += '[^/]*'
+ elif c == '?':
+ res += '.'
+ elif c == '[':
+ j = i
+ if j < n and pat[j] in '!]':
+ j += 1
+ while j < n and pat[j] != ']':
+ j += 1
+ if j >= n:
+ res += '\\['
+ else:
+ stuff = pat[i:j].replace('\\','\\\\')
+ i = j + 1
+ if stuff[0] == '!':
+ stuff = '^' + stuff[1:]
+ elif stuff[0] == '^':
+ stuff = '\\' + stuff
+ res = '%s[%s]' % (res, stuff)
+ elif c == '{':
+ group += 1
+ res += '(?:'
+ elif c == '}' and group:
+ res += ')'
+ group -= 1
+ elif c == ',' and group:
+ res += '|'
+ elif c == '\\':
+ p = peek()
+ if p:
+ i += 1
+ res += re.escape(p)
+ else:
+ res += re.escape(c)
+ else:
+ res += re.escape(c)
+ return head + res + tail
+
+def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
+ """build a function to match a set of file patterns
+
+ arguments:
+ canonroot - the canonical root of the tree you're matching against
+ cwd - the current working directory, if relevant
+ names - patterns to find
+ inc - patterns to include
+ exc - patterns to exclude
+ dflt_pat - if a pattern in names has no explicit type, assume this one
+
+ a pattern is one of:
+ 'glob:<glob>' - a glob relative to cwd
+ 're:<regexp>' - a regular expression
+ 'path:<path>' - a path relative to canonroot
+ 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
+ 'relpath:<path>' - a path relative to cwd
+ 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
+ '<something>' - one of the cases above, selected by the dflt_pat argument
+
+ returns:
+ a 3-tuple containing
+ - list of roots (places where one should start a recursive walk of the fs);
+ this often matches the explicit non-pattern names passed in, but also
+ includes the initial part of glob: patterns that has no glob characters
+ - a bool match(filename) function
+ - a bool indicating if any patterns were passed in
+ """
+
+ # a common case: no patterns at all
+ if not names and not inc and not exc:
+ return [], util.always, False
+ def contains_glob(name):
+ for c in name:
+ if c in _globchars: return True
+ return False
+
+ def regex(kind, name, tail):
+ '''convert a pattern into a regular expression'''
+ if not name:
+ return ''
+ if kind == 're':
+ return name
+ elif kind == 'path':
+ return '^' + re.escape(name) + '(?:/|$)'
+ elif kind == 'relglob':
+ return _globre(name, '(?:|.*/)', tail)
+ elif kind == 'relpath':
+ return re.escape(name) + '(?:/|$)'
+ elif kind == 'relre':
+ if name.startswith('^'):
+ return name
+ return '.*' + name
+ return _globre(name, '', tail)
+
+ def matchfn(pats, tail):
+ """build a matching function from a set of patterns"""
+ if not pats:
+ return
+ try:
+ pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
+ if len(pat) > 20000:
+ raise OverflowError()
+ return re.compile(pat).match
+ except OverflowError:
+ # We're using a Python with a tiny regex engine and we
+ # made it explode, so we'll divide the pattern list in two
+ # until it works
+ l = len(pats)
+ if l < 2:
+ raise
+ a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
+ return lambda s: a(s) or b(s)
+ except re.error:
+ for k, p in pats:
+ try:
+ re.compile('(?:%s)' % regex(k, p, tail))
+ except re.error:
+ raise util.Abort("invalid pattern (%s): %s" % (k, p))
+ raise util.Abort("invalid pattern")
+
+ def globprefix(pat):
+ '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
+ root = []
+ for p in pat.split('/'):
+ if contains_glob(p): break
+ root.append(p)
+ return '/'.join(root) or '.'
+
+ def normalizepats(names, default):
+ pats = []
+ roots = []
+ anypats = False
+ for kind, name in [_patsplit(p, default) for p in names]:
+ if kind in ('glob', 'relpath'):
+ name = util.canonpath(canonroot, cwd, name)
+ elif kind in ('relglob', 'path'):
+ name = util.normpath(name)
+
+ pats.append((kind, name))
+
+ if kind in ('glob', 're', 'relglob', 'relre'):
+ anypats = True
+
+ if kind == 'glob':
+ root = globprefix(name)
+ roots.append(root)
+ elif kind in ('relpath', 'path'):
+ roots.append(name or '.')
+ elif kind == 'relglob':
+ roots.append('.')
+ return roots, pats, anypats
+
+ roots, pats, anypats = normalizepats(names, dflt_pat)
+
+ patmatch = matchfn(pats, '$') or util.always
+ incmatch = util.always
+ if inc:
+ dummy, inckinds, dummy = normalizepats(inc, 'glob')
+ incmatch = matchfn(inckinds, '(?:/|$)')
+ excmatch = util.never
+ if exc:
+ dummy, exckinds, dummy = normalizepats(exc, 'glob')
+ excmatch = matchfn(exckinds, '(?:/|$)')
+
+ if not names and inc and not exc:
+ # common case: hgignore patterns
+ matcher = incmatch
+ else:
+ matcher = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
+
+ return (roots, matcher, (inc or exc or anypats) and True)
--- a/mercurial/util.py Sun May 24 02:56:14 2009 -0500
+++ b/mercurial/util.py Sun May 24 02:56:14 2009 -0500
@@ -207,67 +207,6 @@
def always(fn): return True
def never(fn): return False
-def _patsplit(pat, default):
- """Split a string into an optional pattern kind prefix and the
- actual pattern."""
- for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
- if pat.startswith(prefix + ':'): return pat.split(':', 1)
- return default, pat
-
-def _globre(pat, head='^', tail='$'):
- "convert a glob pattern into a regexp"
- i, n = 0, len(pat)
- res = ''
- group = 0
- def peek(): return i < n and pat[i]
- while i < n:
- c = pat[i]
- i = i+1
- if c == '*':
- if peek() == '*':
- i += 1
- res += '.*'
- else:
- res += '[^/]*'
- elif c == '?':
- res += '.'
- elif c == '[':
- j = i
- if j < n and pat[j] in '!]':
- j += 1
- while j < n and pat[j] != ']':
- j += 1
- if j >= n:
- res += '\\['
- else:
- stuff = pat[i:j].replace('\\','\\\\')
- i = j + 1
- if stuff[0] == '!':
- stuff = '^' + stuff[1:]
- elif stuff[0] == '^':
- stuff = '\\' + stuff
- res = '%s[%s]' % (res, stuff)
- elif c == '{':
- group += 1
- res += '(?:'
- elif c == '}' and group:
- res += ')'
- group -= 1
- elif c == ',' and group:
- res += '|'
- elif c == '\\':
- p = peek()
- if p:
- i += 1
- res += re.escape(p)
- else:
- res += re.escape(c)
- else:
- res += re.escape(c)
- return head + res + tail
-
-_globchars = set('[{*?')
-
def pathto(root, n1, n2):
'''return the relative path from one place to another.
root should use os.sep to separate directories
@@ -342,140 +281,6 @@
raise Abort('%s not under root' % myname)
-def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
- """build a function to match a set of file patterns
-
- arguments:
- canonroot - the canonical root of the tree you're matching against
- cwd - the current working directory, if relevant
- names - patterns to find
- inc - patterns to include
- exc - patterns to exclude
- dflt_pat - if a pattern in names has no explicit type, assume this one
-
- a pattern is one of:
- 'glob:<glob>' - a glob relative to cwd
- 're:<regexp>' - a regular expression
- 'path:<path>' - a path relative to canonroot
- 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
- 'relpath:<path>' - a path relative to cwd
- 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
- '<something>' - one of the cases above, selected by the dflt_pat argument
-
- returns:
- a 3-tuple containing
- - list of roots (places where one should start a recursive walk of the fs);
- this often matches the explicit non-pattern names passed in, but also
- includes the initial part of glob: patterns that has no glob characters
- - a bool match(filename) function
- - a bool indicating if any patterns were passed in
- """
-
- # a common case: no patterns at all
- if not names and not inc and not exc:
- return [], always, False
-
- def contains_glob(name):
- for c in name:
- if c in _globchars: return True
- return False
-
- def regex(kind, name, tail):
- '''convert a pattern into a regular expression'''
- if not name:
- return ''
- if kind == 're':
- return name
- elif kind == 'path':
- return '^' + re.escape(name) + '(?:/|$)'
- elif kind == 'relglob':
- return _globre(name, '(?:|.*/)', tail)
- elif kind == 'relpath':
- return re.escape(name) + '(?:/|$)'
- elif kind == 'relre':
- if name.startswith('^'):
- return name
- return '.*' + name
- return _globre(name, '', tail)
-
- def matchfn(pats, tail):
- """build a matching function from a set of patterns"""
- if not pats:
- return
- try:
- pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
- if len(pat) > 20000:
- raise OverflowError()
- return re.compile(pat).match
- except OverflowError:
- # We're using a Python with a tiny regex engine and we
- # made it explode, so we'll divide the pattern list in two
- # until it works
- l = len(pats)
- if l < 2:
- raise
- a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
- return lambda s: a(s) or b(s)
- except re.error:
- for k, p in pats:
- try:
- re.compile('(?:%s)' % regex(k, p, tail))
- except re.error:
- raise Abort("invalid pattern (%s): %s" % (k, p))
- raise Abort("invalid pattern")
-
- def globprefix(pat):
- '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
- root = []
- for p in pat.split('/'):
- if contains_glob(p): break
- root.append(p)
- return '/'.join(root) or '.'
-
- def normalizepats(names, default):
- pats = []
- roots = []
- anypats = False
- for kind, name in [_patsplit(p, default) for p in names]:
- if kind in ('glob', 'relpath'):
- name = canonpath(canonroot, cwd, name)
- elif kind in ('relglob', 'path'):
- name = normpath(name)
-
- pats.append((kind, name))
-
- if kind in ('glob', 're', 'relglob', 'relre'):
- anypats = True
-
- if kind == 'glob':
- root = globprefix(name)
- roots.append(root)
- elif kind in ('relpath', 'path'):
- roots.append(name or '.')
- elif kind == 'relglob':
- roots.append('.')
- return roots, pats, anypats
-
- roots, pats, anypats = normalizepats(names, dflt_pat)
-
- patmatch = matchfn(pats, '$') or always
- incmatch = always
- if inc:
- dummy, inckinds, dummy = normalizepats(inc, 'glob')
- incmatch = matchfn(inckinds, '(?:/|$)')
- excmatch = never
- if exc:
- dummy, exckinds, dummy = normalizepats(exc, 'glob')
- excmatch = matchfn(exckinds, '(?:/|$)')
-
- if not names and inc and not exc:
- # common case: hgignore patterns
- match = incmatch
- else:
- match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
-
- return (roots, match, (inc or exc or anypats) and True)
-
_hgexecutable = None
def main_is_frozen():