revlog: extend addgroup() with callback for duplicates
authorJoerg Sonnenberger <joerg@bec.de>
Sun, 18 Oct 2020 22:18:02 +0200
changeset 45788 a5206e71c536
parent 45787 225e513c444e
child 45789 09735cde6275
revlog: extend addgroup() with callback for duplicates The addgroup() interface currently doesn't allow the caller to keep track of duplicated nodes except by looking at the returned node list. Add an optional second callback for this purpose and change the return type to a boolean. This allows follow-up changes to use more efficient storage for the node list in places that are memory-sensitive. Differential Revision: https://phab.mercurial-scm.org/D9231
hgext/sqlitestore.py
mercurial/changegroup.py
mercurial/exchangev2.py
mercurial/filelog.py
mercurial/interfaces/repository.py
mercurial/manifest.py
mercurial/revlog.py
mercurial/testing/storage.py
mercurial/unionrepo.py
tests/simplestorerepo.py
--- a/hgext/sqlitestore.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/hgext/sqlitestore.py	Sun Oct 18 22:18:02 2020 +0200
@@ -674,9 +674,10 @@
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
-        nodes = []
+        empty = True
 
         for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
             storeflags = 0
@@ -715,8 +716,6 @@
 
             linkrev = linkmapper(linknode)
 
-            nodes.append(node)
-
             if node in self._revisions:
                 # Possibly reset parents to make them proper.
                 entry = self._revisions[node]
@@ -741,6 +740,9 @@
                         (self._nodetorev[p1], entry.flags, entry.rid),
                     )
 
+                if duplicaterevisioncb:
+                    duplicaterevisioncb(self, node)
+                empty = False
                 continue
 
             if deltabase == nullid:
@@ -763,8 +765,9 @@
 
             if addrevisioncb:
                 addrevisioncb(self, node)
+            empty = False
 
-        return nodes
+        return not empty
 
     def censorrevision(self, tr, censornode, tombstone=b''):
         tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
--- a/mercurial/changegroup.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/changegroup.py	Sun Oct 18 22:18:02 2020 +0200
@@ -316,20 +316,29 @@
             self.callback = progress.increment
 
             efilesset = set()
+            cgnodes = []
 
             def onchangelog(cl, node):
                 efilesset.update(cl.readfiles(node))
+                cgnodes.append(node)
+
+            def ondupchangelog(cl, node):
+                cgnodes.append(node)
 
             self.changelogheader()
             deltas = self.deltaiter()
-            cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
-            efiles = len(efilesset)
-
-            if not cgnodes:
+            if not cl.addgroup(
+                deltas,
+                csmap,
+                trp,
+                addrevisioncb=onchangelog,
+                duplicaterevisioncb=ondupchangelog,
+            ):
                 repo.ui.develwarn(
                     b'applied empty changelog from changegroup',
                     config=b'warn-empty-changegroup',
                 )
+            efiles = len(efilesset)
             clend = len(cl)
             changesets = clend - clstart
             progress.complete()
--- a/mercurial/exchangev2.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/exchangev2.py	Sun Oct 18 22:18:02 2020 +0200
@@ -343,16 +343,21 @@
     )
 
     manifestnodes = {}
+    added = []
 
     def linkrev(node):
         repo.ui.debug(b'add changeset %s\n' % short(node))
         # Linkrev for changelog is always self.
         return len(cl)
 
+    def ondupchangeset(cl, node):
+        added.append(node)
+
     def onchangeset(cl, node):
         progress.increment()
 
         revision = cl.changelogrevision(node)
+        added.append(node)
 
         # We need to preserve the mapping of changelog revision to node
         # so we can set the linkrev accordingly when manifests are added.
@@ -403,8 +408,12 @@
                 0,
             )
 
-    added = cl.addgroup(
-        iterrevisions(), linkrev, weakref.proxy(tr), addrevisioncb=onchangeset
+    cl.addgroup(
+        iterrevisions(),
+        linkrev,
+        weakref.proxy(tr),
+        addrevisioncb=onchangeset,
+        duplicaterevisioncb=ondupchangeset,
     )
 
     progress.complete()
@@ -516,12 +525,15 @@
             # Chomp off header object.
             next(objs)
 
-            added.extend(
-                rootmanifest.addgroup(
-                    iterrevisions(objs, progress),
-                    linkrevs.__getitem__,
-                    weakref.proxy(tr),
-                )
+            def onchangeset(cl, node):
+                added.append(node)
+
+            rootmanifest.addgroup(
+                iterrevisions(objs, progress),
+                linkrevs.__getitem__,
+                weakref.proxy(tr),
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=onchangeset,
             )
 
     progress.complete()
--- a/mercurial/filelog.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/filelog.py	Sun Oct 18 22:18:02 2020 +0200
@@ -139,6 +139,7 @@
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         if maybemissingparents:
@@ -150,7 +151,11 @@
             )
 
         return self._revlog.addgroup(
-            deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+            deltas,
+            linkmapper,
+            transaction,
+            addrevisioncb=addrevisioncb,
+            duplicaterevisioncb=duplicaterevisioncb,
         )
 
     def getstrippoint(self, minlink):
--- a/mercurial/interfaces/repository.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/interfaces/repository.py	Sun Oct 18 22:18:02 2020 +0200
@@ -756,6 +756,7 @@
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         """Process a series of deltas for storage.
@@ -1247,7 +1248,13 @@
         See the documentation for ``ifiledata`` for more.
         """
 
-    def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         """Process a series of deltas for storage.
 
         See the documentation in ``ifilemutation`` for more.
--- a/mercurial/manifest.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/manifest.py	Sun Oct 18 22:18:02 2020 +0200
@@ -1832,9 +1832,20 @@
             deltamode=deltamode,
         )
 
-    def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        self,
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         return self._revlog.addgroup(
-            deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+            deltas,
+            linkmapper,
+            transaction,
+            addrevisioncb=addrevisioncb,
+            duplicaterevisioncb=duplicaterevisioncb,
         )
 
     def rawsize(self, rev):
--- a/mercurial/revlog.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/revlog.py	Sun Oct 18 22:18:02 2020 +0200
@@ -2368,7 +2368,14 @@
             self._enforceinlinesize(transaction, ifh)
         nodemaputil.setup_persistent_nodemap(transaction, self)
 
-    def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        self,
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         """
         add a delta group
 
@@ -2383,8 +2390,6 @@
         if self._writinghandles:
             raise error.ProgrammingError(b'cannot nest addgroup() calls')
 
-        nodes = []
-
         r = len(self)
         end = 0
         if r:
@@ -2405,6 +2410,7 @@
             ifh.flush()
 
         self._writinghandles = (ifh, dfh)
+        empty = True
 
         try:
             deltacomputer = deltautil.deltacomputer(self)
@@ -2414,11 +2420,12 @@
                 link = linkmapper(linknode)
                 flags = flags or REVIDX_DEFAULT_FLAGS
 
-                nodes.append(node)
-
                 if self.index.has_node(node):
+                    # this can happen if two branches make the same change
                     self._nodeduplicatecallback(transaction, node)
-                    # this can happen if two branches make the same change
+                    if duplicaterevisioncb:
+                        duplicaterevisioncb(self, node)
+                    empty = False
                     continue
 
                 for p in (p1, p2):
@@ -2472,6 +2479,7 @@
 
                 if addrevisioncb:
                     addrevisioncb(self, node)
+                empty = False
 
                 if not dfh and not self._inline:
                     # addrevision switched from inline to conventional
@@ -2486,8 +2494,7 @@
             if dfh:
                 dfh.close()
             ifh.close()
-
-        return nodes
+        return not empty
 
     def iscensored(self, rev):
         """Check if a file revision is censored."""
--- a/mercurial/testing/storage.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/testing/storage.py	Sun Oct 18 22:18:02 2020 +0200
@@ -1117,7 +1117,22 @@
             return 0
 
         with self._maketransactionfn() as tr:
-            nodes = f.addgroup([], None, tr, addrevisioncb=cb)
+            nodes = []
+
+            def onchangeset(cl, node):
+                nodes.append(node)
+                cb(cl, node)
+
+            def ondupchangeset(cl, node):
+                nodes.append(node)
+
+            f.addgroup(
+                [],
+                None,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=ondupchangeset,
+            )
 
         self.assertEqual(nodes, [])
         self.assertEqual(callbackargs, [])
@@ -1136,7 +1151,22 @@
         ]
 
         with self._maketransactionfn() as tr:
-            nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
+            nodes = []
+
+            def onchangeset(cl, node):
+                nodes.append(node)
+                cb(cl, node)
+
+            def ondupchangeset(cl, node):
+                nodes.append(node)
+
+            f.addgroup(
+                deltas,
+                linkmapper,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=ondupchangeset,
+            )
 
         self.assertEqual(
             nodes,
@@ -1175,7 +1205,19 @@
             deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
 
         with self._maketransactionfn() as tr:
-            self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes)
+            newnodes = []
+
+            def onchangeset(cl, node):
+                newnodes.append(node)
+
+            f.addgroup(
+                deltas,
+                lambda x: 0,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=onchangeset,
+            )
+            self.assertEqual(newnodes, nodes)
 
         self.assertEqual(len(f), len(deltas))
         self.assertEqual(list(f.revs()), [0, 1, 2])
--- a/mercurial/unionrepo.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/unionrepo.py	Sun Oct 18 22:18:02 2020 +0200
@@ -129,6 +129,7 @@
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         raise NotImplementedError
--- a/tests/simplestorerepo.py	Wed Oct 07 14:26:47 2020 +0530
+++ b/tests/simplestorerepo.py	Sun Oct 18 22:18:02 2020 +0200
@@ -532,6 +532,7 @@
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         if maybemissingparents:
@@ -539,7 +540,7 @@
                 _('simple store does not support missing parents ' 'write mode')
             )
 
-        nodes = []
+        empty = True
 
         transaction.addbackup(self._indexpath)
 
@@ -547,9 +548,10 @@
             linkrev = linkmapper(linknode)
             flags = flags or revlog.REVIDX_DEFAULT_FLAGS
 
-            nodes.append(node)
-
             if node in self._indexbynode:
+                if duplicaterevisioncb:
+                    duplicaterevisioncb(self, node)
+                empty = False
                 continue
 
             # Need to resolve the fulltext from the delta base.
@@ -564,7 +566,8 @@
 
             if addrevisioncb:
                 addrevisioncb(self, node)
-        return nodes
+            empty = False
+        return not empty
 
     def _headrevs(self):
         # Assume all revisions are heads by default.