log: make file log slow path usable on large repos
Running "hg log <pattern or directory>" on large repos took a very, very long
time because it first read ctx.files() for every commit before even starting to
process the results.
This change makes the ctx.files() check lazy, which makes the command start
producing results immediately.
--- a/mercurial/cmdutil.py Fri Sep 13 15:40:04 2013 -0500
+++ b/mercurial/cmdutil.py Tue Sep 10 19:49:34 2013 -0700
@@ -1172,12 +1172,34 @@
'filenames'))
# The slow path checks files modified in every changeset.
- for i in sorted(revs):
- ctx = change(i)
- matches = filter(match, ctx.files())
- if matches:
- fncache[i] = matches
- wanted.add(i)
+ # This is really slow on large repos, so compute the set lazily.
+ class lazywantedset(object):
+ def __init__(self):
+ self.set = set()
+ self.revs = set(revs)
+
+ # No need to worry about locality here because it will be accessed
+ # in the same order as the increasing window below.
+ def __contains__(self, value):
+ if value in self.set:
+ return True
+ elif not value in self.revs:
+ return False
+ else:
+ self.revs.discard(value)
+ ctx = change(value)
+ matches = filter(match, ctx.files())
+ if matches:
+ fncache[value] = matches
+ self.set.add(value)
+ return True
+ return False
+
+ def discard(self, value):
+ self.revs.discard(value)
+ self.set.discard(value)
+
+ wanted = lazywantedset()
class followfilter(object):
def __init__(self, onlyfirst=False):