revlog: extend addgroup() with callback for duplicates
The addgroup() interface currently doesn't allow the caller to keep
track of duplicated nodes except by looking at the returned node list.
Add an optional second callback for this purpose and change the return
type to a boolean. This allows follow-up changes to use more efficient
storage for the node list in places that are memory-sensitive.
Differential Revision: https://phab.mercurial-scm.org/D9231
--- a/hgext/sqlitestore.py Wed Oct 07 14:26:47 2020 +0530
+++ b/hgext/sqlitestore.py Sun Oct 18 22:18:02 2020 +0200
@@ -674,9 +674,10 @@
linkmapper,
transaction,
addrevisioncb=None,
+ duplicaterevisioncb=None,
maybemissingparents=False,
):
- nodes = []
+ empty = True
for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
storeflags = 0
@@ -715,8 +716,6 @@
linkrev = linkmapper(linknode)
- nodes.append(node)
-
if node in self._revisions:
# Possibly reset parents to make them proper.
entry = self._revisions[node]
@@ -741,6 +740,9 @@
(self._nodetorev[p1], entry.flags, entry.rid),
)
+ if duplicaterevisioncb:
+ duplicaterevisioncb(self, node)
+ empty = False
continue
if deltabase == nullid:
@@ -763,8 +765,9 @@
if addrevisioncb:
addrevisioncb(self, node)
+ empty = False
- return nodes
+ return not empty
def censorrevision(self, tr, censornode, tombstone=b''):
tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
--- a/mercurial/changegroup.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/changegroup.py Sun Oct 18 22:18:02 2020 +0200
@@ -316,20 +316,29 @@
self.callback = progress.increment
efilesset = set()
+ cgnodes = []
def onchangelog(cl, node):
efilesset.update(cl.readfiles(node))
+ cgnodes.append(node)
+
+ def ondupchangelog(cl, node):
+ cgnodes.append(node)
self.changelogheader()
deltas = self.deltaiter()
- cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
- efiles = len(efilesset)
-
- if not cgnodes:
+ if not cl.addgroup(
+ deltas,
+ csmap,
+ trp,
+ addrevisioncb=onchangelog,
+ duplicaterevisioncb=ondupchangelog,
+ ):
repo.ui.develwarn(
b'applied empty changelog from changegroup',
config=b'warn-empty-changegroup',
)
+ efiles = len(efilesset)
clend = len(cl)
changesets = clend - clstart
progress.complete()
--- a/mercurial/exchangev2.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/exchangev2.py Sun Oct 18 22:18:02 2020 +0200
@@ -343,16 +343,21 @@
)
manifestnodes = {}
+ added = []
def linkrev(node):
repo.ui.debug(b'add changeset %s\n' % short(node))
# Linkrev for changelog is always self.
return len(cl)
+ def ondupchangeset(cl, node):
+ added.append(node)
+
def onchangeset(cl, node):
progress.increment()
revision = cl.changelogrevision(node)
+ added.append(node)
# We need to preserve the mapping of changelog revision to node
# so we can set the linkrev accordingly when manifests are added.
@@ -403,8 +408,12 @@
0,
)
- added = cl.addgroup(
- iterrevisions(), linkrev, weakref.proxy(tr), addrevisioncb=onchangeset
+ cl.addgroup(
+ iterrevisions(),
+ linkrev,
+ weakref.proxy(tr),
+ addrevisioncb=onchangeset,
+ duplicaterevisioncb=ondupchangeset,
)
progress.complete()
@@ -516,12 +525,15 @@
# Chomp off header object.
next(objs)
- added.extend(
- rootmanifest.addgroup(
- iterrevisions(objs, progress),
- linkrevs.__getitem__,
- weakref.proxy(tr),
- )
+ def onchangeset(cl, node):
+ added.append(node)
+
+ rootmanifest.addgroup(
+ iterrevisions(objs, progress),
+ linkrevs.__getitem__,
+ weakref.proxy(tr),
+ addrevisioncb=onchangeset,
+ duplicaterevisioncb=onchangeset,
)
progress.complete()
--- a/mercurial/filelog.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/filelog.py Sun Oct 18 22:18:02 2020 +0200
@@ -139,6 +139,7 @@
linkmapper,
transaction,
addrevisioncb=None,
+ duplicaterevisioncb=None,
maybemissingparents=False,
):
if maybemissingparents:
@@ -150,7 +151,11 @@
)
return self._revlog.addgroup(
- deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+ deltas,
+ linkmapper,
+ transaction,
+ addrevisioncb=addrevisioncb,
+ duplicaterevisioncb=duplicaterevisioncb,
)
def getstrippoint(self, minlink):
--- a/mercurial/interfaces/repository.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/interfaces/repository.py Sun Oct 18 22:18:02 2020 +0200
@@ -756,6 +756,7 @@
linkmapper,
transaction,
addrevisioncb=None,
+ duplicaterevisioncb=None,
maybemissingparents=False,
):
"""Process a series of deltas for storage.
@@ -1247,7 +1248,13 @@
See the documentation for ``ifiledata`` for more.
"""
- def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
+ def addgroup(
+ deltas,
+ linkmapper,
+ transaction,
+ addrevisioncb=None,
+ duplicaterevisioncb=None,
+ ):
"""Process a series of deltas for storage.
See the documentation in ``ifilemutation`` for more.
--- a/mercurial/manifest.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/manifest.py Sun Oct 18 22:18:02 2020 +0200
@@ -1832,9 +1832,20 @@
deltamode=deltamode,
)
- def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+ def addgroup(
+ self,
+ deltas,
+ linkmapper,
+ transaction,
+ addrevisioncb=None,
+ duplicaterevisioncb=None,
+ ):
return self._revlog.addgroup(
- deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+ deltas,
+ linkmapper,
+ transaction,
+ addrevisioncb=addrevisioncb,
+ duplicaterevisioncb=duplicaterevisioncb,
)
def rawsize(self, rev):
--- a/mercurial/revlog.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/revlog.py Sun Oct 18 22:18:02 2020 +0200
@@ -2368,7 +2368,14 @@
self._enforceinlinesize(transaction, ifh)
nodemaputil.setup_persistent_nodemap(transaction, self)
- def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+ def addgroup(
+ self,
+ deltas,
+ linkmapper,
+ transaction,
+ addrevisioncb=None,
+ duplicaterevisioncb=None,
+ ):
"""
add a delta group
@@ -2383,8 +2390,6 @@
if self._writinghandles:
raise error.ProgrammingError(b'cannot nest addgroup() calls')
- nodes = []
-
r = len(self)
end = 0
if r:
@@ -2405,6 +2410,7 @@
ifh.flush()
self._writinghandles = (ifh, dfh)
+ empty = True
try:
deltacomputer = deltautil.deltacomputer(self)
@@ -2414,11 +2420,12 @@
link = linkmapper(linknode)
flags = flags or REVIDX_DEFAULT_FLAGS
- nodes.append(node)
-
if self.index.has_node(node):
+ # this can happen if two branches make the same change
self._nodeduplicatecallback(transaction, node)
- # this can happen if two branches make the same change
+ if duplicaterevisioncb:
+ duplicaterevisioncb(self, node)
+ empty = False
continue
for p in (p1, p2):
@@ -2472,6 +2479,7 @@
if addrevisioncb:
addrevisioncb(self, node)
+ empty = False
if not dfh and not self._inline:
# addrevision switched from inline to conventional
@@ -2486,8 +2494,7 @@
if dfh:
dfh.close()
ifh.close()
-
- return nodes
+ return not empty
def iscensored(self, rev):
"""Check if a file revision is censored."""
--- a/mercurial/testing/storage.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/testing/storage.py Sun Oct 18 22:18:02 2020 +0200
@@ -1117,7 +1117,22 @@
return 0
with self._maketransactionfn() as tr:
- nodes = f.addgroup([], None, tr, addrevisioncb=cb)
+ nodes = []
+
+ def onchangeset(cl, node):
+ nodes.append(node)
+ cb(cl, node)
+
+ def ondupchangeset(cl, node):
+ nodes.append(node)
+
+ f.addgroup(
+ [],
+ None,
+ tr,
+ addrevisioncb=onchangeset,
+ duplicaterevisioncb=ondupchangeset,
+ )
self.assertEqual(nodes, [])
self.assertEqual(callbackargs, [])
@@ -1136,7 +1151,22 @@
]
with self._maketransactionfn() as tr:
- nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
+ nodes = []
+
+ def onchangeset(cl, node):
+ nodes.append(node)
+ cb(cl, node)
+
+ def ondupchangeset(cl, node):
+ nodes.append(node)
+
+ f.addgroup(
+ deltas,
+ linkmapper,
+ tr,
+ addrevisioncb=onchangeset,
+ duplicaterevisioncb=ondupchangeset,
+ )
self.assertEqual(
nodes,
@@ -1175,7 +1205,19 @@
deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
with self._maketransactionfn() as tr:
- self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes)
+ newnodes = []
+
+ def onchangeset(cl, node):
+ newnodes.append(node)
+
+ f.addgroup(
+ deltas,
+ lambda x: 0,
+ tr,
+ addrevisioncb=onchangeset,
+ duplicaterevisioncb=onchangeset,
+ )
+ self.assertEqual(newnodes, nodes)
self.assertEqual(len(f), len(deltas))
self.assertEqual(list(f.revs()), [0, 1, 2])
--- a/mercurial/unionrepo.py Wed Oct 07 14:26:47 2020 +0530
+++ b/mercurial/unionrepo.py Sun Oct 18 22:18:02 2020 +0200
@@ -129,6 +129,7 @@
linkmapper,
transaction,
addrevisioncb=None,
+ duplicaterevisioncb=None,
maybemissingparents=False,
):
raise NotImplementedError
--- a/tests/simplestorerepo.py Wed Oct 07 14:26:47 2020 +0530
+++ b/tests/simplestorerepo.py Sun Oct 18 22:18:02 2020 +0200
@@ -532,6 +532,7 @@
linkmapper,
transaction,
addrevisioncb=None,
+ duplicaterevisioncb=None,
maybemissingparents=False,
):
if maybemissingparents:
@@ -539,7 +540,7 @@
_('simple store does not support missing parents ' 'write mode')
)
- nodes = []
+ empty = True
transaction.addbackup(self._indexpath)
@@ -547,9 +548,10 @@
linkrev = linkmapper(linknode)
flags = flags or revlog.REVIDX_DEFAULT_FLAGS
- nodes.append(node)
-
if node in self._indexbynode:
+ if duplicaterevisioncb:
+ duplicaterevisioncb(self, node)
+ empty = False
continue
# Need to resolve the fulltext from the delta base.
@@ -564,7 +566,8 @@
if addrevisioncb:
addrevisioncb(self, node)
- return nodes
+ empty = False
+ return not empty
def _headrevs(self):
# Assume all revisions are heads by default.