Clean up walk and changes code to use normalised names properly.
authorBryan O'Sullivan <bos@serpentine.com>
Sun, 31 Jul 2005 17:42:46 -0800
changeset 820 89985a1b3427
parent 819 77e121a0d870
child 821 72d9bd4841f3
Clean up walk and changes code to use normalised names properly. New function: commands.pathto returns the relative path from one path to another. For example, given foo/bar and baz/quux, it will return ../../baz/quux. This new function is used by the walk and status code to print relative paths correctly. New command: debugwalk exercises the walk code without doing anything more. hg.dirstate.walk now yields normalised names. For example, if you're in the baz directory and you ask it to walk ../foo/bar/.., it will yield names starting with foo/. As a result of this change, all of the other walk and changes methods in this module also return normalised names. The util.matcher function now normalises globs and path names, so that it will match normalised names properly. Finally, util.matcher uses the non-glob prefix of a glob to tell walk which directories to scan. Perviously, a glob like foo/* would scan everything, but only return matches for foo/*. Now, foo/* only scans under foo (using the globprefix function), which is much faster.
mercurial/commands.py
mercurial/hg.py
mercurial/util.py
--- a/mercurial/commands.py	Sun Jul 31 17:31:15 2005 -0800
+++ b/mercurial/commands.py	Sun Jul 31 17:42:46 2005 -0800
@@ -40,16 +40,24 @@
     return args
 
 def matchpats(cwd, pats = [], opts = {}, head = ''):
-    return util.matcher(cwd, pats, opts.get('include'),
+    return util.matcher(cwd, pats or ['.'], opts.get('include'),
                         opts.get('exclude'), head)
 
+def pathto(n1, n2):
+    '''return the relative path from one place to another'''
+    if not n1: return n2
+    a, b = n1.split(os.sep), n2.split(os.sep)
+    a.reverse(), b.reverse()
+    while a and b and a[-1] == b[-1]:
+        a.pop(), b.pop()
+    b.reverse()
+    return os.sep.join((['..'] * len(a)) + b)
+
 def walk(repo, pats, opts, head = ''):
     cwd = repo.getcwd()
-    c = 0
-    if cwd: c = len(cwd) + 1
     files, matchfn = matchpats(cwd, pats, opts, head)
     for src, fn in repo.walk(files = files, match = matchfn):
-        yield src, fn, fn[c:]
+        yield src, fn, pathto(cwd, fn)
 
 revrangesep = ':'
 
@@ -565,6 +573,11 @@
             ui.write("\t%d -> %d\n" % (r.rev(e[5]), i))
     ui.write("}\n")
 
+def debugwalk(ui, repo, *pats, **opts):
+    items = list(walk(repo, pats, opts))
+    fmt = '%%s  %%-%ds  %%s' % max([len(abs) for (src, abs, rel) in items])
+    for i in items: print fmt % i
+
 def diff(ui, repo, *pats, **opts):
     """diff working directory (or selected files)"""
     revs = []
@@ -1015,9 +1028,10 @@
     R = removed
     ? = not tracked'''
 
-    files, matchfn = matchpats(repo.getcwd(), pats, opts)
+    cwd = repo.getcwd()
+    files, matchfn = matchpats(cwd, pats, opts)
     (c, a, d, u) = repo.changes(files = files, match = matchfn)
-    (c, a, d, u) = map(lambda x: relfilter(repo, x), (c, a, d, u))
+    (c, a, d, u) = [map(lambda x: pathto(cwd, x), n) for n in c, a, d, u]
 
     for f in c:
         ui.write("M ", f, "\n")
@@ -1160,6 +1174,10 @@
     "debugstate": (debugstate, [], 'debugstate'),
     "debugindex": (debugindex, [], 'debugindex FILE'),
     "debugindexdot": (debugindexdot, [], 'debugindexdot FILE'),
+    "debugwalk": (debugwalk,
+                  [('I', 'include', [], 'include path in search'),
+                   ('X', 'exclude', [], 'exclude path from search')],
+                  'debugwalk [OPTIONS]... [FILE]...'),
     "^diff":
         (diff,
          [('r', 'rev', [], 'revision'),
--- a/mercurial/hg.py	Sun Jul 31 17:31:15 2005 -0800
+++ b/mercurial/hg.py	Sun Jul 31 17:42:46 2005 -0800
@@ -446,11 +446,12 @@
                 if os.path.isdir(f):
                     for dir, subdirs, fl in os.walk(f):
                         d = dir[len(self.root) + 1:]
-                        if d == '.hg':
+                        nd = os.path.normpath(d)
+                        if nd == '.hg':
                             subdirs[:] = []
                             continue
                         for sd in subdirs:
-                            ds = os.path.join(d, sd +'/')
+                            ds = os.path.join(nd, sd +'/')
                             if self.ignore(ds) or not match(ds):
                                 subdirs.remove(sd)
                         for fn in fl:
@@ -466,6 +467,7 @@
         # not in .hgignore
 
         for src, fn in util.unique(traverse()):
+            fn = os.path.normpath(fn)
             if fn in dc:
                 del dc[fn]
             elif self.ignore(fn):
@@ -868,7 +870,7 @@
     def walk(self, node = None, files = [], match = util.always):
         if node:
             for fn in self.manifest.read(self.changelog.read(node)[0]):
-                yield 'm', fn
+                if match(fn): yield 'm', fn
         else:
             for src, fn in self.dirstate.walk(files, match):
                 yield src, fn
--- a/mercurial/util.py	Sun Jul 31 17:31:15 2005 -0800
+++ b/mercurial/util.py	Sun Jul 31 17:42:46 2005 -0800
@@ -69,24 +69,27 @@
 _globchars = {'[': 1, '{': 1, '*': 1, '?': 1}
 
 def matcher(cwd, names, inc, exc, head = ''):
-    def patlike(name):
+    def patkind(name):
         for prefix in 're:', 'glob:', 'path:':
-            if name.startswith(prefix): return True
+            if name.startswith(prefix): return name.split(':', 1)
         for c in name:
-            if c in _globchars: return True
+            if c in _globchars: return 'glob', name
+        return 'relpath', name
+
+    cwdsep = cwd + os.sep
 
     def regex(name, tail):
         '''convert a pattern into a regular expression'''
-        if name.startswith('re:'):
-            return name[3:]
-        elif name.startswith('path:'):
-            return '^' + re.escape(name[5:]) + '$'
-        elif name.startswith('glob:'):
-            return head + globre(name[5:], '', tail)
+        kind, name = patkind(name)
+        if kind == 're':
+            return name
+        elif kind == 'path':
+            return '^' + re.escape(name) + '$'
+        if cwd: name = os.path.join(cwdsep, name)
+        name = os.path.normpath(name)
+        if name == '.': name = '**'
         return head + globre(name, '', tail)
 
-    cwdsep = cwd + os.sep
-
     def under(fn):
         """check if fn is under our cwd"""
         return not cwd or fn.startswith(cwdsep)
@@ -95,22 +98,28 @@
         """build a matching function from a set of patterns"""
         if pats:
             pat = '(?:%s)' % '|'.join([regex(p, tail) for p in pats])
-            if cwd:
-                pat = re.escape(cwdsep) + pat
             return re.compile(pat).match
 
-    pats = filter(patlike, names)
-    files = [n for n in names if not patlike(n)]
-    if pats: plain = []
-    elif cwd: plain = [cwdsep + f for f in files]
-    else: plain = files
+    def globprefix(pat):
+        '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
+        root = []
+        for p in pat.split(os.sep):
+            if patkind(p)[0] == 'glob': break
+            root.append(p)
+        return os.sep.join(root)
+
+    patkinds = map(patkind, names)
+    pats = [name for (kind, name) in patkinds if kind != 'relpath']
+    files = [name for (kind, name) in patkinds if kind == 'relpath']
+    roots = filter(None, map(globprefix, pats)) + files
+    if cwd: roots = [cwdsep + r for r in roots]
         
-    patmatch = matchfn(pats, '$')
-    filematch = matchfn(files, '(?:/|$)')
-    incmatch = matchfn(inc, '(?:/|$)') or under
+    patmatch = matchfn(pats, '$') or always
+    filematch = matchfn(files, '(?:/|$)') or always
+    incmatch = matchfn(inc, '(?:/|$)') or always
     excmatch = matchfn(exc, '(?:/|$)') or (lambda fn: False)
 
-    return plain, lambda fn: (incmatch(fn) and not excmatch(fn) and
+    return roots, lambda fn: (incmatch(fn) and not excmatch(fn) and
                               (fn.endswith('/') or
                                (not pats and not files) or
                                (pats and patmatch(fn)) or