dirstate walking optimizations
authormason@suse.com
Fri, 12 Aug 2005 09:57:56 -0800
changeset 872 9a0af739cf55
parent 859 6390c377a9e6
child 873 4480e035d838
dirstate walking optimizations The repo walking code introduces a number of calls to dirstate.map.copy(), significantly slowing down the walk on large trees. When a list of files is passed to the walking code, we should only look at map entries relevant to the file list passed in. dirstate.filterfiles() is added to return a subset of the dirstate map. The subset includes in files passed in, and if one of the files requested is actually a directory, it includes any files inside that directory tree. This brings the time for hg diff Makefile down from 1.7s to .3s on a linux kernel repo. Also, the diff command was unconditionally calling makewalk, leading to an extra pass through repo.changes. This patch avoids the call to makewalk when commands.diff isn't given a list of patterns, cutting the time for hg diff (with no args) in half. Index: mine/mercurial/hg.py ===================================================================
mercurial/commands.py
mercurial/hg.py
--- a/mercurial/commands.py	Tue Aug 09 09:36:34 2005 -0800
+++ b/mercurial/commands.py	Fri Aug 12 09:57:56 2005 -0800
@@ -634,9 +634,11 @@
         raise Abort("too many revisions to diff")
 
     files = []
-    roots, match, results = makewalk(repo, pats, opts)
-    for src, abs, rel in results:
-        files.append(abs)
+    match = util.always
+    if pats:
+        roots, match, results = makewalk(repo, pats, opts)
+        for src, abs, rel in results:
+            files.append(abs)
     dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match})
 
 def doexport(ui, repo, changeset, seqno, total, revwidth, opts):
--- a/mercurial/hg.py	Tue Aug 09 09:36:34 2005 -0800
+++ b/mercurial/hg.py	Fri Aug 12 09:57:56 2005 -0800
@@ -435,11 +435,50 @@
             st.write(e + f)
         self.dirty = 0
 
-    def walk(self, files = None, match = util.always):
+    def filterfiles(self, files):
+        ret = {}
+        unknown = []
+
+        for x in files:
+            if x is '.':
+                return self.map.copy()
+            if x not in self.map:
+                unknown.append(x)
+            else:
+                ret[x] = self.map[x]
+                
+        if not unknown:
+            return ret
+
+        b = self.map.keys()
+        b.sort()
+        blen = len(b)
+
+        for x in unknown:
+            bs = bisect.bisect(b, x)
+            if bs != 0 and  b[bs-1] == x: 
+                ret[x] = self.map[x]
+                continue
+            while bs < blen:
+                s = b[bs]
+                if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
+                    ret[s] = self.map[s]
+                else:
+                    break
+                bs += 1
+        return ret
+
+    def walk(self, files = None, match = util.always, dc=None):
         self.read()
-        dc = self.map.copy()
+
         # walk all files by default
-        if not files: files = [self.root]
+        if not files:
+            files = [self.root]
+            if not dc:
+                dc = self.map.copy()
+        elif not dc:
+            dc = self.filterfiles(files)
+                    
         known = {'.hg': 1}
         def seen(fn):
             if fn in known: return True
@@ -477,19 +516,20 @@
         for src, fn in util.unique(traverse()):
             fn = os.path.normpath(fn)
             if seen(fn): continue
-            if fn in dc:
-                del dc[fn]
-            elif self.ignore(fn):
+            if fn not in dc and self.ignore(fn):
                 continue
             if match(fn):
                 yield src, fn
 
     def changes(self, files = None, match = util.always):
         self.read()
-        dc = self.map.copy()
+        if not files:
+            dc = self.map.copy()
+        else:
+            dc = self.filterfiles(files)
         lookup, changed, added, unknown = [], [], [], []
 
-        for src, fn in self.walk(files, match):
+        for src, fn in self.walk(files, match, dc=dc):
             try: s = os.stat(os.path.join(self.root, fn))
             except: continue