sidedata-exchange: rewrite sidedata on-the-fly whenever possible
authorRaphaël Gomès <rgomes@octobus.net>
Fri, 19 Feb 2021 11:24:50 +0100
changeset 46718 ba8e508a8e69
parent 46717 502e795b55ac
child 46719 0732a7264226
sidedata-exchange: rewrite sidedata on-the-fly whenever possible When a A exchanges with B, the difference of their supported sidedata categories is made, and the responsibility is always with the client to generated it: - If A pushes to B and B requires category `foo` that A does not have, A will need to generate it when sending it to B. - If A pulls from B and A needs category `foo`, it will generate `foo` before the end of the transaction. - Any category that is not required is removed. If peers are not compatible, abort. It is forbidden to rewrite sidedata for a rev that already has sidedata, since that would introduce unreachable (garbage) data in the data file, something we're not prepared for yet. Differential Revision: https://phab.mercurial-scm.org/D10032
mercurial/changegroup.py
mercurial/revlog.py
tests/test-copies-in-changeset.t
tests/test-sidedata-exchange.t
tests/testlib/ext-sidedata-2.py
tests/testlib/ext-sidedata-3.py
tests/testlib/ext-sidedata-4.py
tests/testlib/ext-sidedata-5.py
tests/testlib/ext-sidedata.py
--- a/mercurial/changegroup.py	Mon Feb 15 11:08:28 2021 +0100
+++ b/mercurial/changegroup.py	Fri Feb 19 11:24:50 2021 +0100
@@ -7,6 +7,7 @@
 
 from __future__ import absolute_import
 
+import collections
 import os
 import struct
 import weakref
@@ -252,7 +253,7 @@
                     pos = next
             yield closechunk()
 
-    def _unpackmanifests(self, repo, revmap, trp, prog):
+    def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
         self.callback = prog.increment
         # no need to check for empty manifest group here:
         # if the result of the merge of 1 and 2 is the same in 3 and 4,
@@ -260,7 +261,8 @@
         # be empty during the pull
         self.manifestheader()
         deltas = self.deltaiter()
-        repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
+        storage = repo.manifestlog.getstorage(b'')
+        storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb)
         prog.complete()
         self.callback = None
 
@@ -369,6 +371,13 @@
             efilesset = None
             self.callback = None
 
+            # Keep track of the (non-changelog) revlogs we've updated and their
+            # range of new revisions for sidedata rewrite.
+            # TODO do something more efficient than keeping the reference to
+            # the revlogs, especially memory-wise.
+            touched_manifests = {}
+            touched_filelogs = {}
+
             # pull off the manifest group
             repo.ui.status(_(b"adding manifests\n"))
             # We know that we'll never have more manifests than we had
@@ -376,7 +385,24 @@
             progress = repo.ui.makeprogress(
                 _(b'manifests'), unit=_(b'chunks'), total=changesets
             )
-            self._unpackmanifests(repo, revmap, trp, progress)
+            on_manifest_rev = None
+            if sidedata_helpers and b'manifest' in sidedata_helpers[1]:
+
+                def on_manifest_rev(manifest, rev):
+                    range = touched_manifests.get(manifest)
+                    if not range:
+                        touched_manifests[manifest] = (rev, rev)
+                    else:
+                        assert rev == range[1] + 1
+                        touched_manifests[manifest] = (range[0], rev)
+
+            self._unpackmanifests(
+                repo,
+                revmap,
+                trp,
+                progress,
+                addrevisioncb=on_manifest_rev,
+            )
 
             needfiles = {}
             if repo.ui.configbool(b'server', b'validate'):
@@ -390,12 +416,37 @@
                     for f, n in pycompat.iteritems(mfest):
                         needfiles.setdefault(f, set()).add(n)
 
+            on_filelog_rev = None
+            if sidedata_helpers and b'filelog' in sidedata_helpers[1]:
+
+                def on_filelog_rev(filelog, rev):
+                    range = touched_filelogs.get(filelog)
+                    if not range:
+                        touched_filelogs[filelog] = (rev, rev)
+                    else:
+                        assert rev == range[1] + 1
+                        touched_filelogs[filelog] = (range[0], rev)
+
             # process the files
             repo.ui.status(_(b"adding file changes\n"))
             newrevs, newfiles = _addchangegroupfiles(
-                repo, self, revmap, trp, efiles, needfiles
+                repo,
+                self,
+                revmap,
+                trp,
+                efiles,
+                needfiles,
+                addrevisioncb=on_filelog_rev,
             )
 
+            if sidedata_helpers:
+                if b'changelog' in sidedata_helpers[1]:
+                    cl.rewrite_sidedata(sidedata_helpers, clstart, clend - 1)
+                for mf, (startrev, endrev) in touched_manifests.items():
+                    mf.rewrite_sidedata(sidedata_helpers, startrev, endrev)
+                for fl, (startrev, endrev) in touched_filelogs.items():
+                    fl.rewrite_sidedata(sidedata_helpers, startrev, endrev)
+
             # making sure the value exists
             tr.changes.setdefault(b'changegroup-count-changesets', 0)
             tr.changes.setdefault(b'changegroup-count-revisions', 0)
@@ -559,14 +610,18 @@
         node, p1, p2, deltabase, cs, flags = headertuple
         return node, p1, p2, deltabase, cs, flags
 
-    def _unpackmanifests(self, repo, revmap, trp, prog):
-        super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
+    def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
+        super(cg3unpacker, self)._unpackmanifests(
+            repo, revmap, trp, prog, addrevisioncb=addrevisioncb
+        )
         for chunkdata in iter(self.filelogheader, {}):
             # If we get here, there are directory manifests in the changegroup
             d = chunkdata[b"filename"]
             repo.ui.debug(b"adding %s revisions\n" % d)
             deltas = self.deltaiter()
-            if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
+            if not repo.manifestlog.getstorage(d).addgroup(
+                deltas, revmap, trp, addrevisioncb=addrevisioncb
+            ):
                 raise error.Abort(_(b"received dir revlog group is empty"))
 
 
@@ -1793,7 +1848,15 @@
     return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
 
 
-def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
+def _addchangegroupfiles(
+    repo,
+    source,
+    revmap,
+    trp,
+    expectedfiles,
+    needfiles,
+    addrevisioncb=None,
+):
     revisions = 0
     files = 0
     progress = repo.ui.makeprogress(
@@ -1808,7 +1871,13 @@
         o = len(fl)
         try:
             deltas = source.deltaiter()
-            if not fl.addgroup(deltas, revmap, trp):
+            added = fl.addgroup(
+                deltas,
+                revmap,
+                trp,
+                addrevisioncb=addrevisioncb,
+            )
+            if not added:
                 raise error.Abort(_(b"received file revlog group is empty"))
         except error.CensoredBaseError as e:
             raise error.Abort(_(b"received delta base is censored: %s") % e)
--- a/mercurial/revlog.py	Mon Feb 15 11:08:28 2021 +0100
+++ b/mercurial/revlog.py	Fri Feb 19 11:24:50 2021 +0100
@@ -3205,3 +3205,54 @@
             )
 
         return d
+
+    def rewrite_sidedata(self, helpers, startrev, endrev):
+        if self.version & 0xFFFF != REVLOGV2:
+            return
+        # inline are not yet supported because they suffer from an issue when
+        # rewriting them (since it's not an append-only operation).
+        # See issue6485.
+        assert not self._inline
+        if not helpers[1] and not helpers[2]:
+            # Nothing to generate or remove
+            return
+
+        new_entries = []
+        # append the new sidedata
+        with self._datafp(b'a+') as fp:
+            # Maybe this bug still exists, see revlog._writeentry
+            fp.seek(0, os.SEEK_END)
+            current_offset = fp.tell()
+            for rev in range(startrev, endrev + 1):
+                entry = self.index[rev]
+                new_sidedata = storageutil.run_sidedata_helpers(
+                    store=self,
+                    sidedata_helpers=helpers,
+                    sidedata={},
+                    rev=rev,
+                )
+
+                serialized_sidedata = sidedatautil.serialize_sidedata(
+                    new_sidedata
+                )
+                if entry[8] != 0 or entry[9] != 0:
+                    # rewriting entries that already have sidedata is not
+                    # supported yet, because it introduces garbage data in the
+                    # revlog.
+                    msg = "Rewriting existing sidedata is not supported yet"
+                    raise error.Abort(msg)
+                entry = entry[:8]
+                entry += (current_offset, len(serialized_sidedata))
+
+                fp.write(serialized_sidedata)
+                new_entries.append(entry)
+                current_offset += len(serialized_sidedata)
+
+        # rewrite the new index entries
+        with self._indexfp(b'w+') as fp:
+            fp.seek(startrev * self._io.size)
+            for i, entry in enumerate(new_entries):
+                rev = startrev + i
+                self.index.replace_sidedata_info(rev, entry[8], entry[9])
+                packed = self._io.packentry(entry, self.node, self.version, rev)
+                fp.write(packed)
--- a/tests/test-copies-in-changeset.t	Mon Feb 15 11:08:28 2021 +0100
+++ b/tests/test-copies-in-changeset.t	Fri Feb 19 11:24:50 2021 +0100
@@ -271,13 +271,12 @@
   $ hg ci --amend -m 'copy a to j, v2'
   saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
   $ hg debugsidedata -c -v -- -1
-  1 sidedata entries (missing-correct-output !)
-   entry-0014 size 24 (missing-correct-output !)
-    '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj' (missing-correct-output !)
+  1 sidedata entries
+   entry-0014 size 24
+    '\x00\x00\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\x00aj'
 #endif
   $ hg showcopies --config experimental.copies.read-from=filelog-only
-  a -> j (sidedata missing-correct-output !)
-  a -> j (no-sidedata !)
+  a -> j
 The entries should be written to extras even if they're empty (so the client
 won't have to fall back to reading from filelogs)
   $ echo x >> j
@@ -355,8 +354,7 @@
   saved backup bundle to $TESTTMP/rebase-rename/.hg/strip-backup/*-*-rebase.hg (glob)
   $ hg st --change . --copies
   A b
-    a (sidedata missing-correct-output !)
-    a (no-sidedata !)
+    a
   R a
   $ cd ..
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-sidedata-exchange.t	Fri Feb 19 11:24:50 2021 +0100
@@ -0,0 +1,473 @@
+===========================
+Tests for sidedata exchange
+===========================
+
+Check simple exchange behavior
+==============================
+
+Pusher and pushed have sidedata enabled
+---------------------------------------
+
+  $ hg init sidedata-source --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-source/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ hg init sidedata-target --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-target/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ cd sidedata-source
+  $ echo a > a
+  $ echo b > b
+  $ echo c > c
+  $ hg commit -Am "initial"
+  adding a
+  adding b
+  adding c
+  $ echo aa > a
+  $ hg commit -m "other"
+  $ hg push -r . ../sidedata-target
+  pushing to ../sidedata-target
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 4 changes to 3 files
+  $ hg -R ../sidedata-target debugsidedata -c 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata -c 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00:'
+   entry-0002 size 32
+    '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
+  $ hg -R ../sidedata-target debugsidedata -m 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata -m 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00\x81'
+   entry-0002 size 32
+    '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
+  $ hg -R ../sidedata-target debugsidedata a 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata a 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00\x03'
+   entry-0002 size 32
+    '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
+  $ cd ..
+
+Puller and pulled have sidedata enabled
+---------------------------------------
+
+  $ rm -rf sidedata-source sidedata-target
+  $ hg init sidedata-source --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-source/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ hg init sidedata-target --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-target/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ cd sidedata-source
+  $ echo a > a
+  $ echo b > b
+  $ echo c > c
+  $ hg commit -Am "initial"
+  adding a
+  adding b
+  adding c
+  $ echo aa > a
+  $ hg commit -m "other"
+  $ hg pull -R ../sidedata-target ../sidedata-source
+  pulling from ../sidedata-source
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 4 changes to 3 files
+  new changesets 05da661850d7:7ec8b4049447
+  (run 'hg update' to get a working copy)
+  $ hg -R ../sidedata-target debugsidedata -c 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata -c 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00:'
+   entry-0002 size 32
+    '\xa3\xee4v\x99\x85$\x9f\x1f\x8dKe\x0f\xc3\x9d-\xc9\xb5%[\x15=h\xe9\xf2O\xb5\xd9\x1f*\xff\xe5'
+  $ hg -R ../sidedata-target debugsidedata -m 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata -m 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00\x81'
+   entry-0002 size 32
+    '-bL\xc5\xa4uu"#\xac\x1b`,\xc0\xbc\x9d\xf5\xac\xf0\x1d\x89)2\xf8N\xb1\x14m\xce\xd7\xbc\xae'
+  $ hg -R ../sidedata-target debugsidedata a 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg -R ../sidedata-target debugsidedata a 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x00\x03'
+   entry-0002 size 32
+    '\xd9\xcd\x81UvL5C\xf1\x0f\xad\x8aH\rt17Fo\x8dU!<\x8e\xae\xfc\xd1/\x06\xd4:\x80'
+  $ cd ..
+
+Now on to asymmetric configs.
+
+Pusher has sidedata enabled, pushed does not
+--------------------------------------------
+
+  $ rm -rf sidedata-source sidedata-target
+  $ hg init sidedata-source --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-source/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ hg init sidedata-target --config format.exp-use-side-data=no
+  $ cd sidedata-source
+  $ echo a > a
+  $ echo b > b
+  $ echo c > c
+  $ hg commit -Am "initial"
+  adding a
+  adding b
+  adding c
+  $ echo aa > a
+  $ hg commit -m "other"
+  $ hg push -r . ../sidedata-target --traceback
+  pushing to ../sidedata-target
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 4 changes to 3 files
+  $ hg -R ../sidedata-target log -G
+  o  changeset:   1:7ec8b4049447
+  |  tag:         tip
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     other
+  |
+  o  changeset:   0:05da661850d7
+     user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     initial
+  
+
+  $ hg -R ../sidedata-target debugsidedata -c 0
+  $ hg -R ../sidedata-target debugsidedata -c 1 -v
+  $ hg -R ../sidedata-target debugsidedata -m 0
+  $ hg -R ../sidedata-target debugsidedata -m 1 -v
+  $ hg -R ../sidedata-target debugsidedata a 0
+  $ hg -R ../sidedata-target debugsidedata a 1 -v
+  $ cd ..
+
+Pulled has sidedata enabled, puller does not
+--------------------------------------------
+
+  $ rm -rf sidedata-source sidedata-target
+  $ hg init sidedata-source --config format.exp-use-side-data=yes
+  $ cat << EOF >> sidedata-source/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata-5.py
+  > EOF
+  $ hg init sidedata-target --config format.exp-use-side-data=no
+  $ cd sidedata-source
+  $ echo a > a
+  $ echo b > b
+  $ echo c > c
+  $ hg commit -Am "initial"
+  adding a
+  adding b
+  adding c
+  $ echo aa > a
+  $ hg commit -m "other"
+  $ hg pull -R ../sidedata-target ../sidedata-source
+  pulling from ../sidedata-source
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 4 changes to 3 files
+  new changesets 05da661850d7:7ec8b4049447
+  (run 'hg update' to get a working copy)
+  $ hg -R ../sidedata-target log -G
+  o  changeset:   1:7ec8b4049447
+  |  tag:         tip
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     other
+  |
+  o  changeset:   0:05da661850d7
+     user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     initial
+  
+
+  $ hg -R ../sidedata-target debugsidedata -c 0
+  $ hg -R ../sidedata-target debugsidedata -c 1 -v
+  $ hg -R ../sidedata-target debugsidedata -m 0
+  $ hg -R ../sidedata-target debugsidedata -m 1 -v
+  $ hg -R ../sidedata-target debugsidedata a 0
+  $ hg -R ../sidedata-target debugsidedata a 1 -v
+  $ cd ..
+
+
+Check sidedata exchange with on-the-fly generation and removal
+==============================================================
+
+(Push) Target has strict superset of the source
+-----------------------------------------------
+
+  $ hg init source-repo --config format.exp-use-side-data=yes
+  $ hg init target-repo --config format.exp-use-side-data=yes
+  $ cat << EOF >> target-repo/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata.py
+  > EOF
+  $ cd source-repo
+  $ echo aaa > a
+  $ hg add a
+  $ hg commit -m a
+  $ echo aaa > b
+  $ hg add b
+  $ hg commit -m b
+  $ echo xxx >> a
+  $ hg commit -m aa
+
+No sidedata is generated in the source
+  $ hg debugsidedata -c 0
+
+Check that sidedata capabilities are advertised
+  $ hg debugcapabilities ../target-repo | grep sidedata
+    exp-wanted-sidedata=1,2
+
+We expect the client to abort the push since it's not capable of generating
+what the server is asking
+  $ hg push -r . ../target-repo
+  pushing to ../target-repo
+  abort: cannot push: required sidedata category not supported by this client: '1'
+  [255]
+
+Add the required capabilities
+  $ cat << EOF >> .hg/hgrc
+  > [extensions]
+  > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
+  > EOF
+
+We expect the target to have sidedata that was generated by the source on push
+  $ hg push -r . ../target-repo
+  pushing to ../target-repo
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 3 changesets with 3 changes to 2 files
+  $ cd ../target-repo
+  $ hg debugsidedata -c 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata -c 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x006'
+   entry-0002 size 32
+    '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
+  $ hg debugsidedata -m 2
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata a 1
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ cd ..
+
+(Push) Difference is not subset/superset
+----------------------------------------
+
+Source has one in common, one missing and one more sidedata category with the
+target.
+
+  $ rm -rf source-repo target-repo
+  $ hg init source-repo --config format.exp-use-side-data=yes
+  $ cat << EOF >> source-repo/.hg/hgrc
+  > [extensions]
+  > testsidedata3=$TESTDIR/testlib/ext-sidedata-3.py
+  > EOF
+  $ hg init target-repo --config format.exp-use-side-data=yes
+  $ cat << EOF >> target-repo/.hg/hgrc
+  > [extensions]
+  > testsidedata4=$TESTDIR/testlib/ext-sidedata-4.py
+  > EOF
+  $ cd source-repo
+  $ echo aaa > a
+  $ hg add a
+  $ hg commit -m a
+  $ echo aaa > b
+  $ hg add b
+  $ hg commit -m b
+  $ echo xxx >> a
+  $ hg commit -m aa
+
+Check that sidedata capabilities are advertised
+  $ hg debugcapabilities . | grep sidedata
+    exp-wanted-sidedata=1,2
+  $ hg debugcapabilities ../target-repo | grep sidedata
+    exp-wanted-sidedata=2,3
+
+Sidedata is generated in the source, but only the right categories (entry-0001 and entry-0002)
+  $ hg debugsidedata -c 0
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata -c 1 -v
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x006'
+   entry-0002 size 32
+    '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
+  $ hg debugsidedata -m 2
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata a 1
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+
+
+We expect the target to have sidedata that was generated by the source on push,
+and also removed the sidedata categories that are not supported by the target.
+Namely, we expect entry-0002 (only exchanged) and entry-0003 (generated),
+but not entry-0001.
+
+  $ hg push -r . ../target-repo --traceback
+  pushing to ../target-repo
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 3 changesets with 3 changes to 2 files
+  $ cd ../target-repo
+  $ hg log -G
+  o  changeset:   2:40f977031323
+  |  tag:         tip
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     aa
+  |
+  o  changeset:   1:2707720c6597
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     b
+  |
+  o  changeset:   0:7049e48789d7
+     user:        test
+     date:        Thu Jan 01 00:00:00 1970 +0000
+     summary:     a
+  
+  $ hg debugsidedata -c 0
+  2 sidedata entries
+   entry-0002 size 32
+   entry-0003 size 48
+  $ hg debugsidedata -c 1 -v
+  2 sidedata entries
+   entry-0002 size 32
+    '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
+   entry-0003 size 48
+    '\x87\xcf\xdfI/\xb5\xed\xeaC\xc1\xf0S\xf3X\x1c\xcc\x00m\xee\xe6#\xc1\xe3\xcaB8Fk\x82e\xfc\xc01\xf6\xb7\xb9\xb3([\xf6D\xa6\xcf\x9b\xea\x11{\x08'
+  $ hg debugsidedata -m 2
+  2 sidedata entries
+   entry-0002 size 32
+   entry-0003 size 48
+  $ hg debugsidedata a 1
+  2 sidedata entries
+   entry-0002 size 32
+   entry-0003 size 48
+  $ cd ..
+
+(Pull) Target has strict superset of the source
+-----------------------------------------------
+
+  $ rm -rf source-repo target-repo
+  $ hg init source-repo --config format.exp-use-side-data=yes
+  $ hg init target-repo --config format.exp-use-side-data=yes
+  $ cat << EOF >> target-repo/.hg/hgrc
+  > [extensions]
+  > testsidedata=$TESTDIR/testlib/ext-sidedata.py
+  > EOF
+  $ cd source-repo
+  $ echo aaa > a
+  $ hg add a
+  $ hg commit -m a
+  $ echo aaa > b
+  $ hg add b
+  $ hg commit -m b
+  $ echo xxx >> a
+  $ hg commit -m aa
+
+No sidedata is generated in the source
+  $ hg debugsidedata -c 0
+
+Check that sidedata capabilities are advertised
+  $ hg debugcapabilities ../target-repo | grep sidedata
+    exp-wanted-sidedata=1,2
+
+  $ cd ../target-repo
+
+Add the required capabilities
+  $ cat << EOF >> .hg/hgrc
+  > [extensions]
+  > testsidedata2=$TESTDIR/testlib/ext-sidedata-2.py
+  > EOF
+
+We expect the target to have sidedata that it generated on-the-fly during pull
+  $ hg pull -r . ../source-repo  --traceback
+  pulling from ../source-repo
+  adding changesets
+  adding manifests
+  adding file changes
+  added 3 changesets with 3 changes to 2 files
+  new changesets 7049e48789d7:40f977031323
+  (run 'hg update' to get a working copy)
+  $ hg debugsidedata -c 0 --traceback
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata -c 1 -v --traceback
+  2 sidedata entries
+   entry-0001 size 4
+    '\x00\x00\x006'
+   entry-0002 size 32
+    '\x98\t\xf9\xc4v\xf0\xc5P\x90\xf7wRf\xe8\xe27e\xfc\xc1\x93\xa4\x96\xd0\x1d\x97\xaaG\x1d\xd7t\xfa\xde'
+  $ hg debugsidedata -m 2
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ hg debugsidedata a 1
+  2 sidedata entries
+   entry-0001 size 4
+   entry-0002 size 32
+  $ cd ..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/testlib/ext-sidedata-2.py	Fri Feb 19 11:24:50 2021 +0100
@@ -0,0 +1,50 @@
+# coding: utf8
+# ext-sidedata-2.py - small extension to test (differently) the sidedata logic
+#
+# Simulates a client for a complex sidedata exchange.
+#
+# Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import hashlib
+import struct
+
+from mercurial.revlogutils import sidedata as sidedatamod
+
+
+def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
+    return sidedata
+
+
+def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sha256 = hashlib.sha256(text).digest()
+    sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
+    return sidedata
+
+
+def reposetup(ui, repo):
+    # Sidedata keys happen to be the same as the categories, easier for testing.
+    for kind in (b'changelog', b'manifest', b'filelog'):
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST1,
+            (sidedatamod.SD_TEST1,),
+            compute_sidedata_1,
+        )
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST2,
+            (sidedatamod.SD_TEST2,),
+            compute_sidedata_2,
+        )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/testlib/ext-sidedata-3.py	Fri Feb 19 11:24:50 2021 +0100
@@ -0,0 +1,88 @@
+# coding: utf8
+# ext-sidedata-3.py - small extension to test (differently still) the sidedata
+# logic
+#
+# Simulates a client for a complex sidedata exchange.
+#
+# Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import hashlib
+import struct
+
+from mercurial import (
+    extensions,
+    revlog,
+)
+
+from mercurial.revlogutils import sidedata as sidedatamod
+
+
+def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
+    return sidedata
+
+
+def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sha256 = hashlib.sha256(text).digest()
+    sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
+    return sidedata
+
+
+def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sha384 = hashlib.sha384(text).digest()
+    sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
+    return sidedata
+
+
+def wrapaddrevision(
+    orig, self, text, transaction, link, p1, p2, *args, **kwargs
+):
+    if kwargs.get('sidedata') is None:
+        kwargs['sidedata'] = {}
+    sd = kwargs['sidedata']
+    sd = compute_sidedata_1(None, self, None, sd, text=text)
+    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
+    return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
+
+
+def extsetup(ui):
+    extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
+
+
+def reposetup(ui, repo):
+    # Sidedata keys happen to be the same as the categories, easier for testing.
+    for kind in (b'changelog', b'manifest', b'filelog'):
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST1,
+            (sidedatamod.SD_TEST1,),
+            compute_sidedata_1,
+        )
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST2,
+            (sidedatamod.SD_TEST2,),
+            compute_sidedata_2,
+        )
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST3,
+            (sidedatamod.SD_TEST3,),
+            compute_sidedata_3,
+        )
+    repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
+    repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/testlib/ext-sidedata-4.py	Fri Feb 19 11:24:50 2021 +0100
@@ -0,0 +1,19 @@
+# coding: utf8
+# ext-sidedata-4.py - small extension to test (differently still) the sidedata
+# logic
+#
+# Simulates a server for a complex sidedata exchange.
+#
+# Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from mercurial.revlogutils import sidedata
+
+
+def reposetup(ui, repo):
+    repo.register_wanted_sidedata(sidedata.SD_TEST2)
+    repo.register_wanted_sidedata(sidedata.SD_TEST3)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/testlib/ext-sidedata-5.py	Fri Feb 19 11:24:50 2021 +0100
@@ -0,0 +1,81 @@
+# coding: utf8
+# ext-sidedata-5.py - small extension to test (differently still) the sidedata
+# logic
+#
+# Simulates a server for a simple sidedata exchange.
+#
+# Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import hashlib
+import struct
+
+from mercurial import (
+    extensions,
+    revlog,
+)
+
+
+from mercurial.revlogutils import sidedata as sidedatamod
+
+
+def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
+    return sidedata
+
+
+def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
+    sidedata = sidedata.copy()
+    if text is None:
+        text = revlog.revision(rev)
+    sha256 = hashlib.sha256(text).digest()
+    sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
+    return sidedata
+
+
+def reposetup(ui, repo):
+    # Sidedata keys happen to be the same as the categories, easier for testing.
+    for kind in (b'changelog', b'manifest', b'filelog'):
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST1,
+            (sidedatamod.SD_TEST1,),
+            compute_sidedata_1,
+        )
+        repo.register_sidedata_computer(
+            kind,
+            sidedatamod.SD_TEST2,
+            (sidedatamod.SD_TEST2,),
+            compute_sidedata_2,
+        )
+
+    # We don't register sidedata computers because we don't care within these
+    # tests
+    repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
+    repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
+
+
+def wrapaddrevision(
+    orig, self, text, transaction, link, p1, p2, *args, **kwargs
+):
+    if kwargs.get('sidedata') is None:
+        kwargs['sidedata'] = {}
+    sd = kwargs['sidedata']
+    ## let's store some arbitrary data just for testing
+    # text length
+    sd[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
+    # and sha2 hashes
+    sha256 = hashlib.sha256(text).digest()
+    sd[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
+    return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
+
+
+def extsetup(ui):
+    extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
--- a/tests/testlib/ext-sidedata.py	Mon Feb 15 11:08:28 2021 +0100
+++ b/tests/testlib/ext-sidedata.py	Fri Feb 19 11:24:50 2021 +0100
@@ -1,6 +1,6 @@
 # ext-sidedata.py - small extension to test the sidedata logic
 #
-# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
+# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
 #
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
@@ -47,11 +47,12 @@
     if self.version & 0xFFFF != 2:
         return text, sd
     if nodeorrev != nullrev and nodeorrev != nullid:
-        if len(text) != struct.unpack('>I', sd[sidedata.SD_TEST1])[0]:
+        cat1 = sd.get(sidedata.SD_TEST1)
+        if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
             raise RuntimeError('text size mismatch')
-        expected = sd[sidedata.SD_TEST2]
+        expected = sd.get(sidedata.SD_TEST2)
         got = hashlib.sha256(text).digest()
-        if got != expected:
+        if expected is not None and got != expected:
             raise RuntimeError('sha256 mismatch')
     return text, sd
 
@@ -86,3 +87,10 @@
     extensions.wrapfunction(
         upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
     )
+
+
+def reposetup(ui, repo):
+    # We don't register sidedata computers because we don't care within these
+    # tests
+    repo.register_wanted_sidedata(sidedata.SD_TEST1)
+    repo.register_wanted_sidedata(sidedata.SD_TEST2)