lazyancestors: reuse __iter__ implementation in __contains__
authorMartin von Zweigbergk <martinvonz@google.com>
Fri, 07 Sep 2018 23:36:09 -0700
changeset 39499 77a2f6d805f2
parent 39498 b6a0e06b0f7d
child 39500 1fc39367eafd
lazyancestors: reuse __iter__ implementation in __contains__ There was a comment in the code that said "Trying to do both __iter__ and __contains__ using the same visit heap and seen set is complex enough that it slows down both. Keep them separate.". However, it seems easy and efficient to make __contains__ keep an iterator across calls. I couldn't measure any slowdown from `hg bundle --all` (which seem to call lazyancestors.__contains__ frequently). Differential Revision: https://phab.mercurial-scm.org/D4508
mercurial/ancestor.py
--- a/mercurial/ancestor.py	Sun Sep 09 23:16:55 2018 -0700
+++ b/mercurial/ancestor.py	Fri Sep 07 23:36:09 2018 -0700
@@ -308,18 +308,11 @@
         self._stoprev = stoprev
         self._inclusive = inclusive
 
-        # Initialize data structures for __contains__.
-        # For __contains__, we use a heap rather than a deque because
-        # (a) it minimizes the number of parentrevs calls made
-        # (b) it makes the loop termination condition obvious
-        # Python's heap is a min-heap. Multiply all values by -1 to convert it
-        # into a max-heap.
-        self._containsvisit = [-rev for rev in revs]
-        heapq.heapify(self._containsvisit)
-        if inclusive:
-            self._containsseen = set(revs)
-        else:
-            self._containsseen = set()
+        self._containsseen = set()
+        self._containsiter = _lazyancestorsiter(self._parentrevs,
+                                                self._initrevs,
+                                                self._stoprev,
+                                                self._inclusive)
 
     def __nonzero__(self):
         """False if the set is empty, True otherwise."""
@@ -348,35 +341,29 @@
 
     def __contains__(self, target):
         """Test whether target is an ancestor of self._initrevs."""
-        # Trying to do both __iter__ and __contains__ using the same visit
-        # heap and seen set is complex enough that it slows down both. Keep
-        # them separate.
         seen = self._containsseen
         if target in seen:
             return True
+        iter = self._containsiter
+        if iter is None:
+            # Iterator exhausted
+            return False
         # Only integer target is valid, but some callers expect 'None in self'
         # to be False. So we explicitly allow it.
         if target is None:
             return False
 
-        parentrevs = self._parentrevs
-        visit = self._containsvisit
-        stoprev = self._stoprev
-        heappop = heapq.heappop
-        heappush = heapq.heappush
         see = seen.add
-
-        targetseen = False
-
-        while visit and -visit[0] > target and not targetseen:
-            for parent in parentrevs(-heappop(visit)):
-                if parent < stoprev or parent in seen:
-                    continue
-                # We need to make sure we push all parents into the heap so
-                # that we leave it in a consistent state for future calls.
-                heappush(visit, -parent)
-                see(parent)
-                if parent == target:
-                    targetseen = True
-
-        return targetseen
+        try:
+            while True:
+                rev = next(iter)
+                see(rev)
+                if rev == target:
+                    return True
+                if rev < target:
+                    return False
+        except StopIteration:
+            # Set to None to indicate fast-path can be used next time, and to
+            # free up memory.
+            self._containsiter = None
+            return False