sidedata: replace sidedata upgrade mechanism with the new one
authorRaphaël Gomès <rgomes@octobus.net>
Mon, 19 Apr 2021 11:22:24 +0200
changeset 47084 27f1191b1305
parent 47083 81eb7091c494
child 47085 3aab2330b7d3
sidedata: replace sidedata upgrade mechanism with the new one Note: this is split into a separate change (like some other patches in this series) because it's not easy to have all patches work 100% and this seemed easier for reviewers. When cloning or upgrading a repo, we may need to compute (or remove) sidedata. This is the same mechanism that is used in exchange, so we re-use the new system to simplify the code and fix the remaining issues (correctly dropping flags and handling partial removal, etc.). This also highlighted an issue with `test-copies-in-changeset.t` that kept sidedata categories that are not relevant anymore. They should probably be dropped entirely, but that would be for another patch. Differential Revision: https://phab.mercurial-scm.org/D10359
mercurial/metadata.py
mercurial/revlog.py
mercurial/upgrade_utils/engine.py
tests/test-copies-in-changeset.t
tests/testlib/ext-sidedata.py
--- a/mercurial/metadata.py	Mon Apr 19 11:22:21 2021 +0200
+++ b/mercurial/metadata.py	Mon Apr 19 11:22:24 2021 +0200
@@ -14,7 +14,6 @@
 from .node import nullrev
 from . import (
     error,
-    pycompat,
     requirements as requirementsmod,
     util,
 )
@@ -837,14 +836,6 @@
     )
 
 
-def getsidedataadder(srcrepo, destrepo):
-    use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
-    if pycompat.iswindows or not use_w:
-        return _get_simple_sidedata_adder(srcrepo, destrepo)
-    else:
-        return _get_worker_sidedata_adder(srcrepo, destrepo)
-
-
 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
     """The function used by worker precomputing sidedata
 
@@ -911,57 +902,21 @@
     # received, when shelve 43 for later use.
     staging = {}
 
-    def sidedata_companion(revlog, rev):
-        data = {}, False
-        if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
-            # Is the data previously shelved ?
-            data = staging.pop(rev, None)
-            if data is None:
-                # look at the queued result until we find the one we are lookig
-                # for (shelve the other ones)
+    def sidedata_companion(repo, revlog, rev, old_sidedata):
+        # Is the data previously shelved ?
+        data = staging.pop(rev, None)
+        if data is None:
+            # look at the queued result until we find the one we are lookig
+            # for (shelve the other ones)
+            r, data = sidedataq.get()
+            while r != rev:
+                staging[r] = data
                 r, data = sidedataq.get()
-                while r != rev:
-                    staging[r] = data
-                    r, data = sidedataq.get()
-            tokens.release()
+        tokens.release()
         sidedata, has_copies_info = data
         new_flag = 0
         if has_copies_info:
             new_flag = sidedataflag.REVIDX_HASCOPIESINFO
-        return False, (), sidedata, new_flag, 0
+        return sidedata, (new_flag, 0)
 
     return sidedata_companion
-
-
-def _get_simple_sidedata_adder(srcrepo, destrepo):
-    """The simple version of the sidedata computation
-
-    It just compute it in the same thread on request"""
-
-    def sidedatacompanion(revlog, rev):
-        sidedata, has_copies_info = {}, False
-        if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
-            sidedata, has_copies_info = _getsidedata(srcrepo, rev)
-        new_flag = 0
-        if has_copies_info:
-            new_flag = sidedataflag.REVIDX_HASCOPIESINFO
-
-        return False, (), sidedata, new_flag, 0
-
-    return sidedatacompanion
-
-
-def getsidedataremover(srcrepo, destrepo):
-    def sidedatacompanion(revlog, rev):
-        f = ()
-        if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
-            if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
-                f = (
-                    sidedatamod.SD_P1COPIES,
-                    sidedatamod.SD_P2COPIES,
-                    sidedatamod.SD_FILESADDED,
-                    sidedatamod.SD_FILESREMOVED,
-                )
-        return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
-
-    return sidedatacompanion
--- a/mercurial/revlog.py	Mon Apr 19 11:22:21 2021 +0200
+++ b/mercurial/revlog.py	Mon Apr 19 11:22:24 2021 +0200
@@ -85,6 +85,7 @@
 
 # blanked usage of all the name to prevent pyflakes constraints
 # We need these name available in the module for extensions.
+
 REVLOGV0
 REVLOGV1
 REVLOGV2
@@ -1997,7 +1998,7 @@
 
         if sidedata is None:
             sidedata = {}
-        elif not self.hassidedata:
+        elif sidedata and not self.hassidedata:
             raise error.ProgrammingError(
                 _(b"trying to add sidedata to a revlog who don't support them")
             )
@@ -2658,7 +2659,7 @@
         addrevisioncb=None,
         deltareuse=DELTAREUSESAMEREVS,
         forcedeltabothparents=None,
-        sidedatacompanion=None,
+        sidedata_helpers=None,
     ):
         """Copy this revlog to another, possibly with format changes.
 
@@ -2701,21 +2702,7 @@
         argument controls whether to force compute deltas against both parents
         for merges. By default, the current default is used.
 
-        If not None, the `sidedatacompanion` is callable that accept two
-        arguments:
-
-            (srcrevlog, rev)
-
-        and return a quintet that control changes to sidedata content from the
-        old revision to the new clone result:
-
-            (dropall, filterout, update, new_flags, dropped_flags)
-
-        * if `dropall` is True, all sidedata should be dropped
-        * `filterout` is a set of sidedata keys that should be dropped
-        * `update` is a mapping of additionnal/new key -> value
-        * new_flags is a bitfields of new flags that the revision should get
-        * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
+        See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
         """
         if deltareuse not in self.DELTAREUSEALL:
             raise ValueError(
@@ -2755,7 +2742,7 @@
                 addrevisioncb,
                 deltareuse,
                 forcedeltabothparents,
-                sidedatacompanion,
+                sidedata_helpers,
             )
 
         finally:
@@ -2770,7 +2757,7 @@
         addrevisioncb,
         deltareuse,
         forcedeltabothparents,
-        sidedatacompanion,
+        sidedata_helpers,
     ):
         """perform the core duty of `revlog.clone` after parameter processing"""
         deltacomputer = deltautil.deltacomputer(destrevlog)
@@ -2786,31 +2773,18 @@
             p2 = index[entry[6]][7]
             node = entry[7]
 
-            sidedataactions = (False, [], {}, 0, 0)
-            if sidedatacompanion is not None:
-                sidedataactions = sidedatacompanion(self, rev)
-
             # (Possibly) reuse the delta from the revlog if allowed and
             # the revlog chunk is a delta.
             cachedelta = None
             rawtext = None
-            if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
-                dropall = sidedataactions[0]
-                filterout = sidedataactions[1]
-                update = sidedataactions[2]
-                new_flags = sidedataactions[3]
-                dropped_flags = sidedataactions[4]
+            if deltareuse == self.DELTAREUSEFULLADD:
                 text, sidedata = self._revisiondata(rev)
-                if dropall:
-                    sidedata = {}
-                for key in filterout:
-                    sidedata.pop(key, None)
-                sidedata.update(update)
-                if not sidedata:
-                    sidedata = None
-
-                flags |= new_flags
-                flags &= ~dropped_flags
+
+                if sidedata_helpers is not None:
+                    (sidedata, new_flags) = storageutil.run_sidedata_helpers(
+                        self, sidedata_helpers, sidedata, rev
+                    )
+                    flags = flags | new_flags[0] & ~new_flags[1]
 
                 destrevlog.addrevision(
                     text,
@@ -2830,8 +2804,17 @@
                     if dp != nullrev:
                         cachedelta = (dp, bytes(self._chunk(rev)))
 
+                sidedata = None
                 if not cachedelta:
-                    rawtext = self.rawdata(rev)
+                    rawtext, sidedata = self._revisiondata(rev)
+                if sidedata is None:
+                    sidedata = self.sidedata(rev)
+
+                if sidedata_helpers is not None:
+                    (sidedata, new_flags) = storageutil.run_sidedata_helpers(
+                        self, sidedata_helpers, sidedata, rev
+                    )
+                    flags = flags | new_flags[0] & ~new_flags[1]
 
                 ifh = destrevlog.opener(
                     destrevlog.indexfile, b'a+', checkambig=False
@@ -2852,6 +2835,7 @@
                         ifh,
                         dfh,
                         deltacomputer=deltacomputer,
+                        sidedata=sidedata,
                     )
                 finally:
                     if dfh:
--- a/mercurial/upgrade_utils/engine.py	Mon Apr 19 11:22:21 2021 +0200
+++ b/mercurial/upgrade_utils/engine.py	Mon Apr 19 11:22:24 2021 +0200
@@ -12,6 +12,7 @@
 from ..i18n import _
 from ..pycompat import getattr
 from .. import (
+    changegroup,
     changelog,
     error,
     filelog,
@@ -19,13 +20,32 @@
     metadata,
     pycompat,
     requirements,
-    revlog,
     scmutil,
     store,
     util,
     vfs as vfsmod,
 )
-from ..revlogutils import nodemap
+from ..revlogutils import (
+    constants as revlogconst,
+    flagutil,
+    nodemap,
+    sidedata as sidedatamod,
+)
+
+
+def get_sidedata_helpers(srcrepo, dstrepo):
+    use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
+    sequential = pycompat.iswindows or not use_w
+    if not sequential:
+        srcrepo.register_sidedata_computer(
+            revlogconst.KIND_CHANGELOG,
+            sidedatamod.SD_FILES,
+            (sidedatamod.SD_FILES,),
+            metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
+            flagutil.REVIDX_HASCOPIESINFO,
+            replace=True,
+        )
+    return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
 
 
 def _revlogfrompath(repo, rl_type, path):
@@ -89,25 +109,6 @@
 )
 
 
-def getsidedatacompanion(srcrepo, dstrepo):
-    sidedatacompanion = None
-    removedreqs = srcrepo.requirements - dstrepo.requirements
-    addedreqs = dstrepo.requirements - srcrepo.requirements
-    if requirements.SIDEDATA_REQUIREMENT in removedreqs:
-
-        def sidedatacompanion(rl, rev):
-            rl = getattr(rl, '_revlog', rl)
-            if rl.flags(rev) & revlog.REVIDX_SIDEDATA:
-                return True, (), {}, 0, 0
-            return False, (), {}, 0, 0
-
-    elif requirements.COPIESSDC_REQUIREMENT in addedreqs:
-        sidedatacompanion = metadata.getsidedataadder(srcrepo, dstrepo)
-    elif requirements.COPIESSDC_REQUIREMENT in removedreqs:
-        sidedatacompanion = metadata.getsidedataremover(srcrepo, dstrepo)
-    return sidedatacompanion
-
-
 def matchrevlog(revlogfilter, rl_type):
     """check if a revlog is selected for cloning.
 
@@ -131,7 +132,7 @@
     rl_type,
     unencoded,
     upgrade_op,
-    sidedatacompanion,
+    sidedata_helpers,
     oncopiedrevision,
 ):
     """ returns the new revlog object created"""
@@ -147,7 +148,7 @@
             addrevisioncb=oncopiedrevision,
             deltareuse=upgrade_op.delta_reuse_mode,
             forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
-            sidedatacompanion=sidedatacompanion,
+            sidedata_helpers=sidedata_helpers,
         )
     else:
         msg = _(b'blindly copying %s containing %i revisions\n')
@@ -257,7 +258,7 @@
     def oncopiedrevision(rl, rev, node):
         progress.increment()
 
-    sidedatacompanion = getsidedatacompanion(srcrepo, dstrepo)
+    sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
 
     # Migrating filelogs
     ui.status(
@@ -282,7 +283,7 @@
             rl_type,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
@@ -322,7 +323,7 @@
             rl_type,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
@@ -361,7 +362,7 @@
             rl_type,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
--- a/tests/test-copies-in-changeset.t	Mon Apr 19 11:22:21 2021 +0200
+++ b/tests/test-copies-in-changeset.t	Mon Apr 19 11:22:24 2021 +0200
@@ -419,7 +419,7 @@
 Test upgrading/downgrading to sidedata storage
 ==============================================
 
-downgrading (keeping some sidedata)
+downgrading
 
   $ hg debugformat -v
   format-variant     repo config default
@@ -465,11 +465,7 @@
   compression:        zstd   zstd    zstd (zstd !)
   compression-level:  default default default
   $ hg debugsidedata -c -- 0
-  1 sidedata entries
-   entry-0014 size 14
   $ hg debugsidedata -c -- 1
-  1 sidedata entries
-   entry-0014 size 14
   $ hg debugsidedata -m -- 0
 
 upgrading
--- a/tests/testlib/ext-sidedata.py	Mon Apr 19 11:22:21 2021 +0200
+++ b/tests/testlib/ext-sidedata.py	Mon Apr 19 11:22:24 2021 +0200
@@ -12,6 +12,7 @@
 
 from mercurial.node import nullrev
 from mercurial import (
+    changegroup,
     extensions,
     requirements,
     revlog,
@@ -19,6 +20,7 @@
 
 from mercurial.upgrade_utils import engine as upgrade_engine
 
+from mercurial.revlogutils import constants
 from mercurial.revlogutils import sidedata
 
 
@@ -54,13 +56,15 @@
     return text, sd
 
 
-def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
-    sidedatacompanion = orig(srcrepo, dstrepo)
+def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
+    repo, computers, removers = orig(srcrepo, dstrepo)
+    assert not computers and not removers  # deal with composition later
     addedreqs = dstrepo.requirements - srcrepo.requirements
+
     if requirements.SIDEDATA_REQUIREMENT in addedreqs:
-        assert sidedatacompanion is None  # deal with composition later
 
-        def sidedatacompanion(revlog, rev):
+        def computer(repo, revlog, rev, old_sidedata):
+            assert not old_sidedata  # not supported yet
             update = {}
             revlog.sidedatanocheck = True
             try:
@@ -73,16 +77,25 @@
             # and sha2 hashes
             sha256 = hashlib.sha256(text).digest()
             update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
-            return False, (), update, 0, 0
+            return update, (0, 0)
 
-    return sidedatacompanion
+        srcrepo.register_sidedata_computer(
+            constants.KIND_CHANGELOG,
+            b"whatever",
+            (sidedata.SD_TEST1, sidedata.SD_TEST2),
+            computer,
+            0,
+        )
+        dstrepo.register_wanted_sidedata(b"whatever")
+
+    return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
 
 
 def extsetup(ui):
     extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
     extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
     extensions.wrapfunction(
-        upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
+        upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
     )