# HG changeset patch # User Durham Goode # Date 1384216969 28800 # Node ID 5fc2ae1c631b94ea83b2fe078c8acfea5aa55fbf # Parent eeba4eaf0716842f48beec32148b60ed17885500 strip: add faster revlog strip computation The previous revlog strip computation would walk every rev in the revlog, from the bottom to the top. Since we're usually stripping only the top few revs of the revlog, this was needlessly expensive on large repos. The new algorithm walks the exact number of revs that will be stripped, thus making the operation not dependent on the number of revs in the repo. This makes amend on a large repo go from 8.7 seconds to 6 seconds. diff -r eeba4eaf0716 -r 5fc2ae1c631b mercurial/repair.py --- a/mercurial/repair.py Mon Nov 11 16:40:02 2013 -0800 +++ b/mercurial/repair.py Mon Nov 11 16:42:49 2013 -0800 @@ -38,16 +38,8 @@ """return the changesets which will be broken by the truncation""" s = set() def collectone(revlog): - linkgen = (revlog.linkrev(i) for i in revlog) - # find the truncation point of the revlog - for lrev in linkgen: - if lrev >= striprev: - break - # see if any revision after this point has a linkrev - # less than striprev (those will be broken by strip) - for lrev in linkgen: - if lrev < striprev: - s.add(lrev) + _, brokenset = revlog.getstrippoint(striprev) + s.update([revlog.linkrev(r) for r in brokenset]) collectone(repo.manifest) for fname in files: diff -r eeba4eaf0716 -r 5fc2ae1c631b mercurial/revlog.py --- a/mercurial/revlog.py Mon Nov 11 16:40:02 2013 -0800 +++ b/mercurial/revlog.py Mon Nov 11 16:42:49 2013 -0800 @@ -1285,6 +1285,46 @@ return content + def getstrippoint(self, minlink): + """find the minimum rev that must be stripped to strip the linkrev + + Returns a tuple containing the minimum rev and a set of all revs that + have linkrevs that will be broken by this strip. + """ + brokenrevs = set() + strippoint = len(self) + + heads = {} + futurelargelinkrevs = set() + for head in self.headrevs(): + headlinkrev = self.linkrev(head) + heads[head] = headlinkrev + if headlinkrev >= minlink: + futurelargelinkrevs.add(headlinkrev) + + # This algorithm involves walking down the rev graph, starting at the + # heads. Since the revs are topologically sorted according to linkrev, + # once all head linkrevs are below the minlink, we know there are + # no more revs that could have a linkrev greater than minlink. + # So we can stop walking. + while futurelargelinkrevs: + strippoint -= 1 + linkrev = heads.pop(strippoint) + + if linkrev < minlink: + brokenrevs.add(strippoint) + else: + futurelargelinkrevs.remove(linkrev) + + for p in self.parentrevs(strippoint): + if p != nullrev: + plinkrev = self.linkrev(p) + heads[p] = plinkrev + if plinkrev >= minlink: + futurelargelinkrevs.add(plinkrev) + + return strippoint, brokenrevs + def strip(self, minlink, transaction): """truncate the revlog on the first revision with a linkrev >= minlink @@ -1302,10 +1342,8 @@ if len(self) == 0: return - for rev in self: - if self.index[rev][4] >= minlink: - break - else: + rev, _ = self.getstrippoint(minlink) + if rev == len(self): return # first truncate the files on disk