unbundle: faster computation of changed heads
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Thu, 21 Dec 2023 17:38:04 +0000
changeset 51395 a0d88b021a98
parent 51394 3a7ef1398385
child 51396 3f37d80d3ab4
unbundle: faster computation of changed heads To compute the set of changed heads it's sufficient to look at the recent commits, instead of looking at all heads currently in existence.
mercurial/changegroup.py
mercurial/dagop.py
mercurial/revlog.py
--- a/mercurial/changegroup.py	Wed Feb 21 11:53:30 2024 +0100
+++ b/mercurial/changegroup.py	Thu Dec 21 17:38:04 2023 +0000
@@ -518,7 +518,7 @@
             # will not see an inconsistent view
             cl = repo.changelog
             cl.delayupdate(tr)
-            oldheads = set(cl.heads())
+            oldrevcount = len(cl)
 
             trp = weakref.proxy(tr)
             # pull off the changeset group
@@ -673,12 +673,12 @@
                 tr.changes[b'changegroup-count-files'] += newfiles
 
             deltaheads = 0
-            if oldheads:
-                heads = cl.heads()
-                deltaheads += len(heads) - len(oldheads)
-                for h in heads:
-                    if h not in oldheads and repo[h].closesbranch():
-                        deltaheads -= 1
+            newrevcount = len(cl)
+            heads_removed, heads_added = cl.diffheads(oldrevcount, newrevcount)
+            deltaheads += len(heads_added) - len(heads_removed)
+            for h in heads_added:
+                if repo[h].closesbranch():
+                    deltaheads -= 1
 
             # see previous comment about checking ui.quiet
             if not repo.ui.quiet:
@@ -746,12 +746,11 @@
                         del args[b'node_last']
                         repo.hook(b"incoming", **pycompat.strkwargs(args))
 
-                    newheads = [h for h in repo.heads() if h not in oldheads]
                     repo.ui.log(
                         b"incoming",
                         b"%d incoming changes - new heads: %s\n",
                         len(added),
-                        b', '.join([hex(c[:6]) for c in newheads]),
+                        b', '.join([hex(c[:6]) for c in heads_added]),
                     )
 
                 tr.addpostclose(
@@ -1735,7 +1734,6 @@
                     x in self._fullclnodes
                     or cl.rev(x) in self._precomputedellipsis
                 ):
-
                     manifestnode = c.manifest
                     # Record the first changeset introducing this manifest
                     # version.
@@ -1994,6 +1992,7 @@
             clrevtolocalrev.clear()
 
             linkrevnodes = linknodes(filerevlog, fname)
+
             # Lookup for filenodes, we collected the linkrev nodes above in the
             # fastpath case and with lookupmf in the slowpath case.
             def lookupfilelog(x):
--- a/mercurial/dagop.py	Wed Feb 21 11:53:30 2024 +0100
+++ b/mercurial/dagop.py	Thu Dec 21 17:38:04 2023 +0000
@@ -1035,6 +1035,37 @@
     return headrevs
 
 
+def headrevsdiff(parentsfn, start, stop):
+    """Compute how the set of heads changed between
+    revisions `start-1` and `stop-1`.
+    """
+    parents = set()
+
+    heads_added = set()
+    heads_removed = set()
+
+    for rev in range(stop - 1, start - 1, -1):
+        if rev in parents:
+            parents.remove(rev)
+        else:
+            heads_added.add(rev)
+        for p in parentsfn(rev):
+            parents.add(p)
+
+    # now `parents` is the collection of candidate removed heads
+    rev = start - 1
+    while parents:
+        if rev in parents:
+            heads_removed.add(rev)
+            parents.remove(rev)
+
+        for p in parentsfn(rev):
+            parents.discard(p)
+        rev = rev - 1
+
+    return (heads_removed, heads_added)
+
+
 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
     """Returns the set of all revs that have no children with control.
 
--- a/mercurial/revlog.py	Wed Feb 21 11:53:30 2024 +0100
+++ b/mercurial/revlog.py	Thu Dec 21 17:38:04 2023 +0000
@@ -132,6 +132,7 @@
 # max size of inline data embedded into a revlog
 _maxinline = 131072
 
+
 # Flag processors for REVIDX_ELLIPSIS.
 def ellipsisreadprocessor(rl, text):
     return text, False
@@ -1577,7 +1578,6 @@
             ]
 
     def _loadindex(self, docket=None):
-
         new_header, mmapindexthreshold, force_nodemap = self._init_opts()
 
         if self.postfix is not None:
@@ -2345,6 +2345,12 @@
             return rustdagop.headrevs(self.index, revs)
         return dagop.headrevs(revs, self._uncheckedparentrevs)
 
+    def headrevsdiff(self, start, stop):
+        try:
+            return self.index.headrevsdiff(start, stop)
+        except AttributeError:
+            return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
+
     def computephases(self, roots):
         return self.index.computephasesmapsets(roots)
 
@@ -2392,6 +2398,12 @@
 
         return [self.node(rev) for rev in revs]
 
+    def diffheads(self, start, stop):
+        """return the nodes that make up the difference between
+        heads of revs before `start` and heads of revs before `stop`"""
+        removed, added = self.headrevsdiff(start, stop)
+        return [self.node(r) for r in removed], [self.node(r) for r in added]
+
     def children(self, node):
         """find the children of a given node"""
         c = []