dirstate walking optimizations
The repo walking code introduces a number of calls to dirstate.map.copy(),
significantly slowing down the walk on large trees. When a list of
files is passed to the walking code, we should only look at map entries
relevant to the file list passed in.
dirstate.filterfiles() is added to return a subset of the dirstate map.
The subset includes in files passed in, and if one of the files requested
is actually a directory, it includes any files inside that directory tree.
This brings the time for hg diff Makefile down from 1.7s to .3s on
a linux kernel repo.
Also, the diff command was unconditionally calling makewalk, leading
to an extra pass through repo.changes. This patch avoids the call
to makewalk when commands.diff isn't given a list of patterns, cutting
the time for hg diff (with no args) in half.
Index: mine/mercurial/hg.py
===================================================================
--- a/mercurial/commands.py Tue Aug 09 09:36:34 2005 -0800
+++ b/mercurial/commands.py Fri Aug 12 09:57:56 2005 -0800
@@ -634,9 +634,11 @@
raise Abort("too many revisions to diff")
files = []
- roots, match, results = makewalk(repo, pats, opts)
- for src, abs, rel in results:
- files.append(abs)
+ match = util.always
+ if pats:
+ roots, match, results = makewalk(repo, pats, opts)
+ for src, abs, rel in results:
+ files.append(abs)
dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match})
def doexport(ui, repo, changeset, seqno, total, revwidth, opts):
--- a/mercurial/hg.py Tue Aug 09 09:36:34 2005 -0800
+++ b/mercurial/hg.py Fri Aug 12 09:57:56 2005 -0800
@@ -435,11 +435,50 @@
st.write(e + f)
self.dirty = 0
- def walk(self, files = None, match = util.always):
+ def filterfiles(self, files):
+ ret = {}
+ unknown = []
+
+ for x in files:
+ if x is '.':
+ return self.map.copy()
+ if x not in self.map:
+ unknown.append(x)
+ else:
+ ret[x] = self.map[x]
+
+ if not unknown:
+ return ret
+
+ b = self.map.keys()
+ b.sort()
+ blen = len(b)
+
+ for x in unknown:
+ bs = bisect.bisect(b, x)
+ if bs != 0 and b[bs-1] == x:
+ ret[x] = self.map[x]
+ continue
+ while bs < blen:
+ s = b[bs]
+ if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
+ ret[s] = self.map[s]
+ else:
+ break
+ bs += 1
+ return ret
+
+ def walk(self, files = None, match = util.always, dc=None):
self.read()
- dc = self.map.copy()
+
# walk all files by default
- if not files: files = [self.root]
+ if not files:
+ files = [self.root]
+ if not dc:
+ dc = self.map.copy()
+ elif not dc:
+ dc = self.filterfiles(files)
+
known = {'.hg': 1}
def seen(fn):
if fn in known: return True
@@ -477,19 +516,20 @@
for src, fn in util.unique(traverse()):
fn = os.path.normpath(fn)
if seen(fn): continue
- if fn in dc:
- del dc[fn]
- elif self.ignore(fn):
+ if fn not in dc and self.ignore(fn):
continue
if match(fn):
yield src, fn
def changes(self, files = None, match = util.always):
self.read()
- dc = self.map.copy()
+ if not files:
+ dc = self.map.copy()
+ else:
+ dc = self.filterfiles(files)
lookup, changed, added, unknown = [], [], [], []
- for src, fn in self.walk(files, match):
+ for src, fn in self.walk(files, match, dc=dc):
try: s = os.stat(os.path.join(self.root, fn))
except: continue