changeset 20074:5fc2ae1c631b

strip: add faster revlog strip computation The previous revlog strip computation would walk every rev in the revlog, from the bottom to the top. Since we're usually stripping only the top few revs of the revlog, this was needlessly expensive on large repos. The new algorithm walks the exact number of revs that will be stripped, thus making the operation not dependent on the number of revs in the repo. This makes amend on a large repo go from 8.7 seconds to 6 seconds.
author Durham Goode <durham@fb.com>
date Mon, 11 Nov 2013 16:42:49 -0800
parents eeba4eaf0716
children f8737bce736a
files mercurial/repair.py mercurial/revlog.py
diffstat 2 files changed, 44 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/repair.py	Mon Nov 11 16:40:02 2013 -0800
+++ b/mercurial/repair.py	Mon Nov 11 16:42:49 2013 -0800
@@ -38,16 +38,8 @@
     """return the changesets which will be broken by the truncation"""
     s = set()
     def collectone(revlog):
-        linkgen = (revlog.linkrev(i) for i in revlog)
-        # find the truncation point of the revlog
-        for lrev in linkgen:
-            if lrev >= striprev:
-                break
-        # see if any revision after this point has a linkrev
-        # less than striprev (those will be broken by strip)
-        for lrev in linkgen:
-            if lrev < striprev:
-                s.add(lrev)
+        _, brokenset = revlog.getstrippoint(striprev)
+        s.update([revlog.linkrev(r) for r in brokenset])
 
     collectone(repo.manifest)
     for fname in files:
--- a/mercurial/revlog.py	Mon Nov 11 16:40:02 2013 -0800
+++ b/mercurial/revlog.py	Mon Nov 11 16:42:49 2013 -0800
@@ -1285,6 +1285,46 @@
 
         return content
 
+    def getstrippoint(self, minlink):
+        """find the minimum rev that must be stripped to strip the linkrev
+
+        Returns a tuple containing the minimum rev and a set of all revs that
+        have linkrevs that will be broken by this strip.
+        """
+        brokenrevs = set()
+        strippoint = len(self)
+
+        heads = {}
+        futurelargelinkrevs = set()
+        for head in self.headrevs():
+            headlinkrev = self.linkrev(head)
+            heads[head] = headlinkrev
+            if headlinkrev >= minlink:
+                futurelargelinkrevs.add(headlinkrev)
+
+        # This algorithm involves walking down the rev graph, starting at the
+        # heads. Since the revs are topologically sorted according to linkrev,
+        # once all head linkrevs are below the minlink, we know there are
+        # no more revs that could have a linkrev greater than minlink.
+        # So we can stop walking.
+        while futurelargelinkrevs:
+            strippoint -= 1
+            linkrev = heads.pop(strippoint)
+
+            if linkrev < minlink:
+                brokenrevs.add(strippoint)
+            else:
+                futurelargelinkrevs.remove(linkrev)
+
+            for p in self.parentrevs(strippoint):
+                if p != nullrev:
+                    plinkrev = self.linkrev(p)
+                    heads[p] = plinkrev
+                    if plinkrev >= minlink:
+                        futurelargelinkrevs.add(plinkrev)
+
+        return strippoint, brokenrevs
+
     def strip(self, minlink, transaction):
         """truncate the revlog on the first revision with a linkrev >= minlink
 
@@ -1302,10 +1342,8 @@
         if len(self) == 0:
             return
 
-        for rev in self:
-            if self.index[rev][4] >= minlink:
-                break
-        else:
+        rev, _ = self.getstrippoint(minlink)
+        if rev == len(self):
             return
 
         # first truncate the files on disk