Mercurial > hg-stable
changeset 879:953ccddd57bd
dirstate walking optimizations
The repo walking code introduces a number of calls to dirstate.map.copy(),
significantly slowing down the walk on large trees. When a list of
files is passed to the walking code, we should only look at map entries
relevant to the file list passed in.
dirstate.filterfiles() is added to return a subset of the dirstate map.
The subset includes in files passed in, and if one of the files requested
is actually a directory, it includes any files inside that directory tree.
This brings the time for hg diff Makefile down from 1.7s to .3s on
a linux kernel repo.
Also, the diff command was unconditionally calling makewalk, leading
to an extra pass through repo.changes. This patch avoids the call
to makewalk when commands.diff isn't given a list of patterns, cutting
the time for hg diff (with no args) in half.
Index: mine/mercurial/hg.py
===================================================================
author | mason@suse.com |
---|---|
date | Fri, 12 Aug 2005 07:10:21 -0800 |
parents | c2e77581bc84 |
children | 409a9a7b0da2 |
files | mercurial/commands.py mercurial/hg.py |
diffstat | 2 files changed, 53 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/commands.py Tue Aug 09 17:24:38 2005 -0800 +++ b/mercurial/commands.py Fri Aug 12 07:10:21 2005 -0800 @@ -632,9 +632,11 @@ raise util.Abort("too many revisions to diff") files = [] - roots, match, results = makewalk(repo, pats, opts) - for src, abs, rel in results: - files.append(abs) + match = util.always + if pats: + roots, match, results = makewalk(repo, pats, opts) + for src, abs, rel in results: + files.append(abs) dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match}) def doexport(ui, repo, changeset, seqno, total, revwidth, opts):
--- a/mercurial/hg.py Tue Aug 09 17:24:38 2005 -0800 +++ b/mercurial/hg.py Fri Aug 12 07:10:21 2005 -0800 @@ -440,11 +440,50 @@ st.write(e + f) self.dirty = 0 - def walk(self, files = None, match = util.always): + def filterfiles(self, files): + ret = {} + unknown = [] + + for x in files: + if x is '.': + return self.map.copy() + if x not in self.map: + unknown.append(x) + else: + ret[x] = self.map[x] + + if not unknown: + return ret + + b = self.map.keys() + b.sort() + blen = len(b) + + for x in unknown: + bs = bisect.bisect(b, x) + if bs != 0 and b[bs-1] == x: + ret[x] = self.map[x] + continue + while bs < blen: + s = b[bs] + if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/': + ret[s] = self.map[s] + else: + break + bs += 1 + return ret + + def walk(self, files = None, match = util.always, dc=None): self.read() - dc = self.map.copy() + # walk all files by default - if not files: files = [self.root] + if not files: + files = [self.root] + if not dc: + dc = self.map.copy() + elif not dc: + dc = self.filterfiles(files) + known = {'.hg': 1} def seen(fn): if fn in known: return True @@ -482,19 +521,20 @@ for src, fn in util.unique(traverse()): fn = os.path.normpath(fn) if seen(fn): continue - if fn in dc: - del dc[fn] - elif self.ignore(fn): + if fn not in dc and self.ignore(fn): continue if match(fn): yield src, fn def changes(self, files = None, match = util.always): self.read() - dc = self.map.copy() + if not files: + dc = self.map.copy() + else: + dc = self.filterfiles(files) lookup, changed, added, unknown = [], [], [], [] - for src, fn in self.walk(files, match): + for src, fn in self.walk(files, match, dc=dc): try: s = os.stat(os.path.join(self.root, fn)) except: continue