ancestor: add lazy membership testing to lazyancestors
This also makes the perfancestorset command use lazy membership testing. In a
linear repository with over 400,000 commits, without this patch, hg
perfancestorset takes 0.80 seconds no matter how far behind we're looking.
With this patch, hg perfancestorset -- X takes:
Rev X Time
-1 0.00s
-4000 0.01s
-20000 0.04s
-80000 0.17s
-200000 0.43s
-300000 0.69s
0 0.88s
Thus, for revisions close to tip, we're up to several orders of magnitude
faster. At 0 we're around 10% slower.
--- a/contrib/perf.py Tue Dec 18 10:14:01 2012 -0800
+++ b/contrib/perf.py Tue Dec 18 12:47:20 2012 -0800
@@ -82,7 +82,7 @@
revs = repo.revs(revset)
heads = repo.changelog.headrevs()
def d():
- s = set(repo.changelog.ancestors(heads))
+ s = repo.changelog.ancestors(heads)
for rev in revs:
rev in s
timer(d)
--- a/mercurial/ancestor.py Tue Dec 18 10:14:01 2012 -0800
+++ b/mercurial/ancestor.py Tue Dec 18 12:47:20 2012 -0800
@@ -177,8 +177,8 @@
"""Create a new object generating ancestors for the given revs. Does
not generate revs lower than stoprev.
- This is computed lazily starting from revs. The object only supports
- iteration.
+ This is computed lazily starting from revs. The object supports
+ iteration and membership.
cl should be a changelog and revs should be an iterable. inclusive is
a boolean that indicates whether revs should be included. Revs lower
@@ -190,6 +190,19 @@
self._stoprev = stoprev
self._inclusive = inclusive
+ # Initialize data structures for __contains__.
+ # For __contains__, we use a heap rather than a deque because
+ # (a) it minimizes the number of parentrevs calls made
+ # (b) it makes the loop termination condition obvious
+ # Python's heap is a min-heap. Multiply all values by -1 to convert it
+ # into a max-heap.
+ self._containsvisit = [-rev for rev in revs]
+ heapq.heapify(self._containsvisit)
+ if inclusive:
+ self._containsseen = set(revs)
+ else:
+ self._containsseen = set()
+
def __iter__(self):
"""Generate the ancestors of _initrevs in reverse topological order.
@@ -219,3 +232,33 @@
visit.append(parent)
seen.add(parent)
yield parent
+
+ def __contains__(self, target):
+ """Test whether target is an ancestor of self._initrevs."""
+ # Trying to do both __iter__ and __contains__ using the same visit
+ # heap and seen set is complex enough that it slows down both. Keep
+ # them separate.
+ seen = self._containsseen
+ if target in seen:
+ return True
+
+ parentrevs = self._parentrevs
+ visit = self._containsvisit
+ stoprev = self._stoprev
+ heappop = heapq.heappop
+ heappush = heapq.heappush
+
+ targetseen = False
+
+ while visit and -visit[0] > target and not targetseen:
+ for parent in parentrevs(-heappop(visit)):
+ if parent < stoprev or parent in seen:
+ continue
+ # We need to make sure we push all parents into the heap so
+ # that we leave it in a consistent state for future calls.
+ heappush(visit, -parent)
+ seen.add(parent)
+ if parent == target:
+ targetseen = True
+
+ return targetseen
--- a/tests/test-ancestor.py Tue Dec 18 10:14:01 2012 -0800
+++ b/tests/test-ancestor.py Tue Dec 18 12:47:20 2012 -0800
@@ -34,6 +34,9 @@
13: [8]}
pfunc = graph.get
+class mockchangelog(object):
+ parentrevs = graph.get
+
def runmissingancestors(revs, bases):
print "%% ancestors of %s and not of %s" % (revs, bases)
print ancestor.missingancestors(revs, bases, pfunc)
@@ -70,5 +73,34 @@
runmissingancestors([10, 11, 12], [13])
runmissingancestors([13], [10, 11, 12])
+def genlazyancestors(revs, stoprev=0, inclusive=False):
+ print ("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
+ (revs, stoprev, inclusive))
+ return ancestor.lazyancestors(mockchangelog, revs, stoprev=stoprev,
+ inclusive=inclusive)
+
+def printlazyancestors(s, l):
+ print [n for n in l if n in s]
+
+def test_lazyancestors():
+ # Empty revs
+ s = genlazyancestors([])
+ printlazyancestors(s, [3, 0, -1])
+
+ # Standard example
+ s = genlazyancestors([11, 13])
+ printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+ # Including revs
+ s = genlazyancestors([11, 13], inclusive=True)
+ printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+ # Test with stoprev
+ s = genlazyancestors([11, 13], stoprev=6)
+ printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+ s = genlazyancestors([11, 13], stoprev=6, inclusive=True)
+ printlazyancestors(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
if __name__ == '__main__':
test_missingancestors()
+ test_lazyancestors()
--- a/tests/test-ancestor.py.out Tue Dec 18 10:14:01 2012 -0800
+++ b/tests/test-ancestor.py.out Tue Dec 18 12:47:20 2012 -0800
@@ -34,3 +34,13 @@
[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
% ancestors of [13] and not of [10, 11, 12]
[8, 13]
+% lazy ancestor set for [], stoprev = 0, inclusive = False
+[]
+% lazy ancestor set for [11, 13], stoprev = 0, inclusive = False
+[7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], stoprev = 0, inclusive = True
+[11, 13, 7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], stoprev = 6, inclusive = False
+[7, 8]
+% lazy ancestor set for [11, 13], stoprev = 6, inclusive = True
+[11, 13, 7, 8]