changegroupv4: add sidedata helpers
authorRaphaël Gomès <rgomes@octobus.net>
Fri, 19 Feb 2021 11:15:42 +0100
changeset 46715 45f0d5297698
parent 46714 f63299ee7e4d
child 46716 c8bb7b89179e
changegroupv4: add sidedata helpers These helpers carry the information and computers needed to rewrite sidedata when generating/applying patches. We will be making use of them soon. Differential Revision: https://phab.mercurial-scm.org/D10029
hgext/sqlitestore.py
mercurial/changegroup.py
mercurial/filelog.py
mercurial/manifest.py
mercurial/revlog.py
mercurial/utils/storageutil.py
tests/simplestorerepo.py
--- a/hgext/sqlitestore.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/hgext/sqlitestore.py	Fri Feb 19 11:15:42 2021 +0100
@@ -587,6 +587,7 @@
         revisiondata=False,
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
+        sidedata_helpers=None,
     ):
         if nodesorder not in (b'nodes', b'storage', b'linear', None):
             raise error.ProgrammingError(
@@ -625,6 +626,7 @@
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
+            sidedata_helpers=sidedata_helpers,
         ):
 
             yield delta
--- a/mercurial/changegroup.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/mercurial/changegroup.py	Fri Feb 19 11:15:42 2021 +0100
@@ -272,6 +272,7 @@
         url,
         targetphase=phases.draft,
         expectedtotal=None,
+        sidedata_categories=None,
     ):
         """Add the changegroup returned by source.read() to this repo.
         srctype is a string like 'push', 'pull', or 'unbundle'.  url is
@@ -282,9 +283,23 @@
         - more heads than before: 1+added heads (2..n)
         - fewer heads than before: -1-removed heads (-2..-n)
         - number of heads stays the same: 1
+
+        `sidedata_categories` is an optional set of the remote's sidedata wanted
+        categories.
         """
         repo = repo.unfiltered()
 
+        # Only useful if we're adding sidedata categories. If both peers have
+        # the same categories, then we simply don't do anything.
+        if self.version == b'04' and srctype == b'pull':
+            sidedata_helpers = get_sidedata_helpers(
+                repo,
+                sidedata_categories or set(),
+                pull=True,
+            )
+        else:
+            sidedata_helpers = None
+
         def csmap(x):
             repo.ui.debug(b"add changeset %s\n" % short(x))
             return len(cl)
@@ -749,6 +764,7 @@
     clrevtolocalrev=None,
     fullclnodes=None,
     precomputedellipsis=None,
+    sidedata_helpers=None,
 ):
     """Calculate deltas for a set of revisions.
 
@@ -756,6 +772,8 @@
 
     If topic is not None, progress detail will be generated using this
     topic name (e.g. changesets, manifests, etc).
+
+    See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
     """
     if not nodes:
         return
@@ -854,6 +872,7 @@
         revisiondata=True,
         assumehaveparentrevisions=not ellipses,
         deltamode=deltamode,
+        sidedata_helpers=sidedata_helpers,
     )
 
     for i, revision in enumerate(revisions):
@@ -974,8 +993,21 @@
         self._verbosenote(_(b'uncompressed size of bundle content:\n'))
         size = 0
 
+        sidedata_helpers = None
+        if self.version == b'04':
+            remote_sidedata = self._remote_sidedata
+            if source == b'strip':
+                # We're our own remote when stripping, get the no-op helpers
+                # TODO a better approach would be for the strip bundle to
+                # correctly advertise its sidedata categories directly.
+                remote_sidedata = repo._wanted_sidedata
+            sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
+
         clstate, deltas = self._generatechangelog(
-            cl, clnodes, generate=changelog
+            cl,
+            clnodes,
+            generate=changelog,
+            sidedata_helpers=sidedata_helpers,
         )
         for delta in deltas:
             for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
@@ -1023,6 +1055,7 @@
             fnodes,
             source,
             clstate[b'clrevtomanifestrev'],
+            sidedata_helpers=sidedata_helpers,
         )
 
         for tree, deltas in it:
@@ -1063,6 +1096,7 @@
             fastpathlinkrev,
             fnodes,
             clrevs,
+            sidedata_helpers=sidedata_helpers,
         )
 
         for path, deltas in it:
@@ -1087,7 +1121,9 @@
         if clnodes:
             repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
 
-    def _generatechangelog(self, cl, nodes, generate=True):
+    def _generatechangelog(
+        self, cl, nodes, generate=True, sidedata_helpers=None
+    ):
         """Generate data for changelog chunks.
 
         Returns a 2-tuple of a dict containing state and an iterable of
@@ -1096,6 +1132,8 @@
 
         if generate is False, the state will be fully populated and no chunk
         stream will be yielded
+
+        See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
         """
         clrevorder = {}
         manifests = {}
@@ -1179,6 +1217,7 @@
             clrevtolocalrev={},
             fullclnodes=self._fullclnodes,
             precomputedellipsis=self._precomputedellipsis,
+            sidedata_helpers=sidedata_helpers,
         )
 
         return state, gen
@@ -1192,11 +1231,14 @@
         fnodes,
         source,
         clrevtolocalrev,
+        sidedata_helpers=None,
     ):
         """Returns an iterator of changegroup chunks containing manifests.
 
         `source` is unused here, but is used by extensions like remotefilelog to
         change what is sent based in pulls vs pushes, etc.
+
+        See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
         """
         repo = self._repo
         mfl = repo.manifestlog
@@ -1285,6 +1327,7 @@
                 clrevtolocalrev=clrevtolocalrev,
                 fullclnodes=self._fullclnodes,
                 precomputedellipsis=self._precomputedellipsis,
+                sidedata_helpers=sidedata_helpers,
             )
 
             if not self._oldmatcher.visitdir(store.tree[:-1]):
@@ -1323,6 +1366,7 @@
         fastpathlinkrev,
         fnodes,
         clrevs,
+        sidedata_helpers=None,
     ):
         changedfiles = [
             f
@@ -1417,6 +1461,7 @@
                 clrevtolocalrev=clrevtolocalrev,
                 fullclnodes=self._fullclnodes,
                 precomputedellipsis=self._precomputedellipsis,
+                sidedata_helpers=sidedata_helpers,
             )
 
             yield fname, deltas
@@ -1792,3 +1837,25 @@
                 )
 
     return revisions, files
+
+
+def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
+    # Computers for computing sidedata on-the-fly
+    sd_computers = collections.defaultdict(list)
+    # Computers for categories to remove from sidedata
+    sd_removers = collections.defaultdict(list)
+
+    to_generate = remote_sd_categories - repo._wanted_sidedata
+    to_remove = repo._wanted_sidedata - remote_sd_categories
+    if pull:
+        to_generate, to_remove = to_remove, to_generate
+
+    for revlog_kind, computers in repo._sidedata_computers.items():
+        for category, computer in computers.items():
+            if category in to_generate:
+                sd_computers[revlog_kind].append(computer)
+            if category in to_remove:
+                sd_removers[revlog_kind].append(computer)
+
+    sidedata_helpers = (repo, sd_computers, sd_removers)
+    return sidedata_helpers
--- a/mercurial/filelog.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/mercurial/filelog.py	Fri Feb 19 11:15:42 2021 +0100
@@ -103,6 +103,7 @@
         revisiondata=False,
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
+        sidedata_helpers=None,
     ):
         return self._revlog.emitrevisions(
             nodes,
@@ -110,6 +111,7 @@
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
+            sidedata_helpers=sidedata_helpers,
         )
 
     def addrevision(
--- a/mercurial/manifest.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/mercurial/manifest.py	Fri Feb 19 11:15:42 2021 +0100
@@ -1826,6 +1826,7 @@
         revisiondata=False,
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
+        sidedata_helpers=None,
     ):
         return self._revlog.emitrevisions(
             nodes,
@@ -1833,6 +1834,7 @@
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
+            sidedata_helpers=sidedata_helpers,
         )
 
     def addgroup(
--- a/mercurial/revlog.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/mercurial/revlog.py	Fri Feb 19 11:15:42 2021 +0100
@@ -2733,6 +2733,7 @@
         revisiondata=False,
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
+        sidedata_helpers=None,
     ):
         if nodesorder not in (b'nodes', b'storage', b'linear', None):
             raise error.ProgrammingError(
@@ -2761,6 +2762,7 @@
             deltamode=deltamode,
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
+            sidedata_helpers=sidedata_helpers,
         )
 
     DELTAREUSEALWAYS = b'always'
--- a/mercurial/utils/storageutil.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/mercurial/utils/storageutil.py	Fri Feb 19 11:15:42 2021 +0100
@@ -23,6 +23,7 @@
     pycompat,
 )
 from ..interfaces import repository
+from ..revlogutils import sidedata as sidedatamod
 from ..utils import hashutil
 
 _nullhash = hashutil.sha1(nullid)
@@ -294,6 +295,7 @@
     deltamode=repository.CG_DELTAMODE_STD,
     revisiondata=False,
     assumehaveparentrevisions=False,
+    sidedata_helpers=None,
 ):
     """Generic implementation of ifiledata.emitrevisions().
 
@@ -356,6 +358,21 @@
     ``nodesorder``
     ``revisiondata``
     ``assumehaveparentrevisions``
+    ``sidedata_helpers`` (optional)
+        If not None, means that sidedata should be included.
+        A dictionary of revlog type to tuples of `(repo, computers, removers)`:
+            * `repo` is used as an argument for computers
+            * `computers` is a list of `(category, (keys, computer)` that
+               compute the missing sidedata categories that were asked:
+               * `category` is the sidedata category
+               * `keys` are the sidedata keys to be affected
+               * `computer` is the function `(repo, store, rev, sidedata)` that
+                 returns a new sidedata dict.
+            * `removers` will remove the keys corresponding to the categories
+              that are present, but not needed.
+        If both `computers` and `removers` are empty, sidedata are simply not
+        transformed.
+        Revlog types are `changelog`, `manifest` or `filelog`.
     """
 
     fnode = store.node
@@ -469,6 +486,17 @@
 
                 available.add(rev)
 
+        sidedata = None
+        if sidedata_helpers:
+            sidedata = store.sidedata(rev)
+            sidedata = run_sidedata_helpers(
+                store=store,
+                sidedata_helpers=sidedata_helpers,
+                sidedata=sidedata,
+                rev=rev,
+            )
+            sidedata = sidedatamod.serialize_sidedata(sidedata)
+
         yield resultcls(
             node=node,
             p1node=fnode(p1rev),
@@ -484,6 +512,25 @@
         prevrev = rev
 
 
+def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
+    """Returns the sidedata for the given revision after running through
+    the given helpers.
+    - `store`: the revlog this applies to (changelog, manifest, or filelog
+      instance)
+    - `sidedata_helpers`: see `storageutil.emitrevisions`
+    - `sidedata`: previous sidedata at the given rev, if any
+    - `rev`: affected rev of `store`
+    """
+    repo, sd_computers, sd_removers = sidedata_helpers
+    kind = store.revlog_kind
+    for _keys, sd_computer in sd_computers.get(kind, []):
+        sidedata = sd_computer(repo, store, rev, sidedata)
+    for keys, _computer in sd_removers.get(kind, []):
+        for key in keys:
+            sidedata.pop(key, None)
+    return sidedata
+
+
 def deltaiscensored(delta, baserev, baselenfn):
     """Determine if a delta represents censored revision data.
 
--- a/tests/simplestorerepo.py	Wed Mar 10 19:33:18 2021 +0100
+++ b/tests/simplestorerepo.py	Fri Feb 19 11:15:42 2021 +0100
@@ -446,6 +446,7 @@
         revisiondata=False,
         assumehaveparentrevisions=False,
         deltamode=repository.CG_DELTAMODE_STD,
+        sidedata_helpers=None,
     ):
         # TODO this will probably break on some ordering options.
         nodes = [n for n in nodes if n != nullid]
@@ -459,6 +460,7 @@
             revisiondata=revisiondata,
             assumehaveparentrevisions=assumehaveparentrevisions,
             deltamode=deltamode,
+            sidedata_helpers=sidedata_helpers,
         ):
             yield delta