cg4: introduce protocol flag to signify the presence of sidedata
authorRaphaël Gomès <rgomes@octobus.net>
Sat, 10 Apr 2021 11:27:40 +0200
changeset 47077 119790e1c67c
parent 47076 08e26ef4ad35
child 47078 223b47235d1c
cg4: introduce protocol flag to signify the presence of sidedata We need a way of signaling whether the current revision has sidedata or not, and re-using the revision flags would waste potential revlog flags and mix two normally independent layers. In this change, we add a single byte at the start of the ch4 delta header to set potential protocol flags. We also reclaim the revlog flag for sidedata, since it is no longer used, in its place now lives the (also experimental) copytracing flag. When generating deltas, apply the `CG_FLAG_SIDEDATA` flag if there is sidedata. When applying the deltas, if said flag is present, the next chunk contains the sidedata. Differential Revision: https://phab.mercurial-scm.org/D10343
hgext/remotefilelog/remotefilelog.py
hgext/sqlitestore.py
mercurial/changegroup.py
mercurial/helptext/internals/changegroups.txt
mercurial/interfaces/repository.py
mercurial/revlog.py
mercurial/revlogutils/constants.py
mercurial/revlogutils/flagutil.py
mercurial/utils/storageutil.py
tests/test-check-interfaces.py
tests/test-help.t
tests/test-lfs-serve.t
--- a/hgext/remotefilelog/remotefilelog.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/hgext/remotefilelog/remotefilelog.py	Sat Apr 10 11:27:40 2021 +0200
@@ -331,6 +331,8 @@
                 delta=delta,
                 # Sidedata is not supported yet
                 sidedata=None,
+                # Protocol flags are not used yet
+                protocol_flags=0,
             )
 
     def revdiff(self, node1, node2):
--- a/hgext/sqlitestore.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/hgext/sqlitestore.py	Sat Apr 10 11:27:40 2021 +0200
@@ -289,6 +289,7 @@
     revision = attr.ib()
     delta = attr.ib()
     sidedata = attr.ib()
+    protocol_flags = attr.ib()
     linknode = attr.ib(default=None)
 
 
--- a/mercurial/changegroup.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/changegroup.py	Sat Apr 10 11:27:40 2021 +0200
@@ -34,10 +34,12 @@
 from .interfaces import repository
 from .revlogutils import sidedata as sidedatamod
 from .revlogutils import constants as revlog_constants
+from .utils import storageutil
 
 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
+_CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
 
 LFS_REQUIREMENT = b'lfs'
 
@@ -194,7 +196,8 @@
         else:
             deltabase = prevnode
         flags = 0
-        return node, p1, p2, deltabase, cs, flags
+        protocol_flags = 0
+        return node, p1, p2, deltabase, cs, flags, protocol_flags
 
     def deltachunk(self, prevnode):
         l = self._chunklength()
@@ -203,10 +206,9 @@
         headerdata = readexactly(self._stream, self.deltaheadersize)
         header = self.deltaheader.unpack(headerdata)
         delta = readexactly(self._stream, l - self.deltaheadersize)
-        node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
-        # cg4 forward-compat
-        sidedata = {}
-        return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
+        header = self._deltaheader(header, prevnode)
+        node, p1, p2, deltabase, cs, flags, protocol_flags = header
+        return node, p1, p2, cs, deltabase, delta, flags, protocol_flags
 
     def getchunks(self):
         """returns all the chunks contains in the bundle
@@ -597,7 +599,8 @@
     def _deltaheader(self, headertuple, prevnode):
         node, p1, p2, deltabase, cs = headertuple
         flags = 0
-        return node, p1, p2, deltabase, cs, flags
+        protocol_flags = 0
+        return node, p1, p2, deltabase, cs, flags, protocol_flags
 
 
 class cg3unpacker(cg2unpacker):
@@ -615,7 +618,8 @@
 
     def _deltaheader(self, headertuple, prevnode):
         node, p1, p2, deltabase, cs, flags = headertuple
-        return node, p1, p2, deltabase, cs, flags
+        protocol_flags = 0
+        return node, p1, p2, deltabase, cs, flags, protocol_flags
 
     def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None):
         super(cg3unpacker, self)._unpackmanifests(
@@ -638,18 +642,24 @@
     cg4 streams add support for exchanging sidedata.
     """
 
+    deltaheader = _CHANGEGROUPV4_DELTA_HEADER
+    deltaheadersize = deltaheader.size
     version = b'04'
 
+    def _deltaheader(self, headertuple, prevnode):
+        protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
+        return node, p1, p2, deltabase, cs, flags, protocol_flags
+
     def deltachunk(self, prevnode):
         res = super(cg4unpacker, self).deltachunk(prevnode)
         if not res:
             return res
 
-        (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
+        (node, p1, p2, cs, deltabase, delta, flags, protocol_flags) = res
 
-        sidedata_raw = getchunk(self._stream)
         sidedata = {}
-        if len(sidedata_raw) > 0:
+        if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
+            sidedata_raw = getchunk(self._stream)
             sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
 
         return node, p1, p2, cs, deltabase, delta, flags, sidedata
@@ -695,10 +705,10 @@
         yield prefix
     yield data
 
-    sidedata = delta.sidedata
-    if sidedata is not None:
+    if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
         # Need a separate chunk for sidedata to be able to differentiate
         # "raw delta" length and sidedata length
+        sidedata = delta.sidedata
         yield chunkheader(len(sidedata))
         yield sidedata
 
@@ -1640,11 +1650,18 @@
     fullnodes=None,
     remote_sidedata=None,
 ):
-    # Same header func as cg3. Sidedata is in a separate chunk from the delta to
-    # differenciate "raw delta" and sidedata.
-    builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
-        d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
-    )
+    # Sidedata is in a separate chunk from the delta to differentiate
+    # "raw delta" and sidedata.
+    def builddeltaheader(d):
+        return _CHANGEGROUPV4_DELTA_HEADER.pack(
+            d.protocol_flags,
+            d.node,
+            d.p1node,
+            d.p2node,
+            d.basenode,
+            d.linknode,
+            d.flags,
+        )
 
     return cgpacker(
         repo,
@@ -1930,7 +1947,6 @@
     sd_computers = collections.defaultdict(list)
     # Computers for categories to remove from sidedata
     sd_removers = collections.defaultdict(list)
-
     to_generate = remote_sd_categories - repo._wanted_sidedata
     to_remove = repo._wanted_sidedata - remote_sd_categories
     if pull:
--- a/mercurial/helptext/internals/changegroups.txt	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/helptext/internals/changegroups.txt	Sat Apr 10 11:27:40 2021 +0200
@@ -2,12 +2,13 @@
 the changelog data, root/flat manifest data, treemanifest data, and
 filelogs.
 
-There are 3 versions of changegroups: ``1``, ``2``, and ``3``. From a
+There are 4 versions of changegroups: ``1``, ``2``, ``3`` and ``4``. From a
 high-level, versions ``1`` and ``2`` are almost exactly the same, with the
 only difference being an additional item in the *delta header*. Version
 ``3`` adds support for storage flags in the *delta header* and optionally
 exchanging treemanifests (enabled by setting an option on the
-``changegroup`` part in the bundle2).
+``changegroup`` part in the bundle2). Version ``4`` adds support for exchanging
+sidedata (additional revision metadata not part of the digest).
 
 Changegroups when not exchanging treemanifests consist of 3 logical
 segments::
@@ -74,8 +75,8 @@
 entry (either that the recipient already has, or previously specified in the
 bundle/changegroup).
 
-The *delta header* is different between versions ``1``, ``2``, and
-``3`` of the changegroup format.
+The *delta header* is different between versions ``1``, ``2``, ``3`` and ``4``
+of the changegroup format.
 
 Version 1 (headerlen=80)::
 
@@ -104,6 +105,15 @@
    |            |             |             |            |            |           |
    +------------------------------------------------------------------------------+
 
+Version 4 (headerlen=103)::
+
+   +------------------------------------------------------------------------------+----------+
+   |            |             |             |            |            |           |          |
+   |    node    |   p1 node   |   p2 node   | base node  | link node  |   flags   |  pflags  |
+   | (20 bytes) |  (20 bytes) |  (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) |
+   |            |             |             |            |            |           |          |
+   +------------------------------------------------------------------------------+----------+
+
 The *delta data* consists of ``chunklen - 4 - headerlen`` bytes, which contain a
 series of *delta*s, densely packed (no separators). These deltas describe a diff
 from an existing entry (either that the recipient already has, or previously
@@ -140,12 +150,24 @@
    Externally stored. The revision fulltext contains ``key:value`` ``\n``
    delimited metadata defining an object stored elsewhere. Used by the LFS
    extension.
+4096
+   Contains copy information. This revision changes files in a way that could
+   affect copy tracing. This does *not* affect changegroup handling, but is
+   relevant for other parts of Mercurial.
 
 For historical reasons, the integer values are identical to revlog version 1
 per-revision storage flags and correspond to bits being set in this 2-byte
 field. Bits were allocated starting from the most-significant bit, hence the
 reverse ordering and allocation of these flags.
 
+The *pflags* (protocol flags) field holds bitwise flags affecting the protocol
+itself. They are first in the header since they may affect the handling of the
+rest of the fields in a future version. They are defined as such:
+
+1 indicates whether to read a chunk of sidedata (of variable length) right
+  after the revision flags.
+
+
 Changeset Segment
 =================
 
@@ -166,9 +188,9 @@
 Treemanifests Segment
 ---------------------
 
-The *treemanifests segment* only exists in changegroup version ``3``, and
-only if the 'treemanifest' param is part of the bundle2 changegroup part
-(it is not possible to use changegroup version 3 outside of bundle2).
+The *treemanifests segment* only exists in changegroup version ``3`` and ``4``,
+and only if the 'treemanifest' param is part of the bundle2 changegroup part
+(it is not possible to use changegroup version 3 or 4 outside of bundle2).
 Aside from the filenames in the *treemanifests segment* containing a
 trailing ``/`` character, it behaves identically to the *filelogs segment*
 (see below). The final sub-segment is followed by an *empty chunk* (logically,
--- a/mercurial/interfaces/repository.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/interfaces/repository.py	Sat Apr 10 11:27:40 2021 +0200
@@ -27,14 +27,12 @@
 REVISION_FLAG_CENSORED = 1 << 15
 REVISION_FLAG_ELLIPSIS = 1 << 14
 REVISION_FLAG_EXTSTORED = 1 << 13
-REVISION_FLAG_SIDEDATA = 1 << 12
-REVISION_FLAG_HASCOPIESINFO = 1 << 11
+REVISION_FLAG_HASCOPIESINFO = 1 << 12
 
 REVISION_FLAGS_KNOWN = (
     REVISION_FLAG_CENSORED
     | REVISION_FLAG_ELLIPSIS
     | REVISION_FLAG_EXTSTORED
-    | REVISION_FLAG_SIDEDATA
     | REVISION_FLAG_HASCOPIESINFO
 )
 
@@ -457,6 +455,13 @@
         """Raw sidedata bytes for the given revision."""
     )
 
+    protocol_flags = interfaceutil.Attribute(
+        """Single byte of integer flags that can influence the protocol.
+
+        This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
+        """
+    )
+
 
 class ifilerevisionssequence(interfaceutil.Interface):
     """Contains index data for all revisions of a file.
--- a/mercurial/revlog.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/revlog.py	Sat Apr 10 11:27:40 2021 +0200
@@ -55,7 +55,6 @@
     REVIDX_HASCOPIESINFO,
     REVIDX_ISCENSORED,
     REVIDX_RAWTEXT_CHANGING_FLAGS,
-    REVIDX_SIDEDATA,
 )
 from .thirdparty import attr
 from . import (
@@ -98,7 +97,6 @@
 REVLOGV2_FLAGS
 REVIDX_ISCENSORED
 REVIDX_ELLIPSIS
-REVIDX_SIDEDATA
 REVIDX_HASCOPIESINFO
 REVIDX_EXTSTORED
 REVIDX_DEFAULT_FLAGS
@@ -196,6 +194,7 @@
     revision = attr.ib()
     delta = attr.ib()
     sidedata = attr.ib()
+    protocol_flags = attr.ib()
     linknode = attr.ib(default=None)
 
 
--- a/mercurial/revlogutils/constants.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/revlogutils/constants.py	Sat Apr 10 11:27:40 2021 +0200
@@ -99,8 +99,6 @@
 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
 # revision data is stored externally
 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
-# revision data contains extra metadata not part of the official digest
-REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
 # revision changes files in a way that could affect copy tracing.
 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
 REVIDX_DEFAULT_FLAGS = 0
@@ -109,13 +107,10 @@
     REVIDX_ISCENSORED,
     REVIDX_ELLIPSIS,
     REVIDX_EXTSTORED,
-    REVIDX_SIDEDATA,
     REVIDX_HASCOPIESINFO,
 ]
 
 # bitmark for flags that could cause rawdata content change
-REVIDX_RAWTEXT_CHANGING_FLAGS = (
-    REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
-)
+REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
 
 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
--- a/mercurial/revlogutils/flagutil.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/revlogutils/flagutil.py	Sat Apr 10 11:27:40 2021 +0200
@@ -18,7 +18,6 @@
     REVIDX_HASCOPIESINFO,
     REVIDX_ISCENSORED,
     REVIDX_RAWTEXT_CHANGING_FLAGS,
-    REVIDX_SIDEDATA,
 )
 
 from .. import error, util
@@ -28,7 +27,6 @@
 REVIDX_ISCENSORED
 REVIDX_ELLIPSIS
 REVIDX_EXTSTORED
-REVIDX_SIDEDATA
 REVIDX_HASCOPIESINFO,
 REVIDX_DEFAULT_FLAGS
 REVIDX_FLAGS_ORDER
--- a/mercurial/utils/storageutil.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/mercurial/utils/storageutil.py	Sat Apr 10 11:27:40 2021 +0200
@@ -28,6 +28,10 @@
 
 _nullhash = hashutil.sha1(sha1nodeconstants.nullid)
 
+# revision data contains extra metadata not part of the official digest
+# Only used in changegroup >= v4.
+CG_FLAG_SIDEDATA = 1
+
 
 def hashrevisionsha1(text, p1, p2):
     """Compute the SHA-1 for revision data and its parents.
@@ -486,7 +490,7 @@
 
                 available.add(rev)
 
-        sidedata = None
+        serialized_sidedata = None
         if sidedata_helpers:
             sidedata = store.sidedata(rev)
             sidedata = run_sidedata_helpers(
@@ -495,18 +499,26 @@
                 sidedata=sidedata,
                 rev=rev,
             )
-            sidedata = sidedatamod.serialize_sidedata(sidedata)
+            if sidedata:
+                serialized_sidedata = sidedatamod.serialize_sidedata(sidedata)
+
+        flags = flagsfn(rev) if flagsfn else 0
+        protocol_flags = 0
+        if serialized_sidedata:
+            # Advertise that sidedata exists to the other side
+            protocol_flags |= CG_FLAG_SIDEDATA
 
         yield resultcls(
             node=node,
             p1node=fnode(p1rev),
             p2node=fnode(p2rev),
             basenode=fnode(baserev),
-            flags=flagsfn(rev) if flagsfn else 0,
+            flags=flags,
             baserevisionsize=baserevisionsize,
             revision=revision,
             delta=delta,
-            sidedata=sidedata,
+            sidedata=serialized_sidedata,
+            protocol_flags=protocol_flags,
         )
 
         prevrev = rev
--- a/tests/test-check-interfaces.py	Thu Apr 08 16:34:11 2021 +0200
+++ b/tests/test-check-interfaces.py	Sat Apr 10 11:27:40 2021 +0200
@@ -282,6 +282,7 @@
         revision=b'',
         sidedata=b'',
         delta=None,
+        protocol_flags=b'',
     )
     checkzobject(rd)
 
--- a/tests/test-help.t	Thu Apr 08 16:34:11 2021 +0200
+++ b/tests/test-help.t	Sat Apr 10 11:27:40 2021 +0200
@@ -1136,12 +1136,13 @@
       the changelog data, root/flat manifest data, treemanifest data, and
       filelogs.
   
-      There are 3 versions of changegroups: "1", "2", and "3". From a high-
+      There are 4 versions of changegroups: "1", "2", "3" and "4". From a high-
       level, versions "1" and "2" are almost exactly the same, with the only
       difference being an additional item in the *delta header*. Version "3"
       adds support for storage flags in the *delta header* and optionally
       exchanging treemanifests (enabled by setting an option on the
-      "changegroup" part in the bundle2).
+      "changegroup" part in the bundle2). Version "4" adds support for
+      exchanging sidedata (additional revision metadata not part of the digest).
   
       Changegroups when not exchanging treemanifests consist of 3 logical
       segments:
@@ -1208,8 +1209,8 @@
       existing entry (either that the recipient already has, or previously
       specified in the bundle/changegroup).
   
-      The *delta header* is different between versions "1", "2", and "3" of the
-      changegroup format.
+      The *delta header* is different between versions "1", "2", "3" and "4" of
+      the changegroup format.
   
       Version 1 (headerlen=80):
   
@@ -1238,6 +1239,15 @@
         |            |             |             |            |            |           |
         +------------------------------------------------------------------------------+
   
+      Version 4 (headerlen=103):
+  
+        +------------------------------------------------------------------------------+----------+
+        |            |             |             |            |            |           |          |
+        |    node    |   p1 node   |   p2 node   | base node  | link node  |   flags   |  pflags  |
+        | (20 bytes) |  (20 bytes) |  (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) |
+        |            |             |             |            |            |           |          |
+        +------------------------------------------------------------------------------+----------+
+  
       The *delta data* consists of "chunklen - 4 - headerlen" bytes, which
       contain a series of *delta*s, densely packed (no separators). These deltas
       describe a diff from an existing entry (either that the recipient already
@@ -1278,11 +1288,24 @@
          delimited metadata defining an object stored elsewhere. Used by the LFS
          extension.
   
+      4096
+         Contains copy information. This revision changes files in a way that
+         could affect copy tracing. This does *not* affect changegroup handling,
+         but is relevant for other parts of Mercurial.
+  
       For historical reasons, the integer values are identical to revlog version
       1 per-revision storage flags and correspond to bits being set in this
       2-byte field. Bits were allocated starting from the most-significant bit,
       hence the reverse ordering and allocation of these flags.
   
+      The *pflags* (protocol flags) field holds bitwise flags affecting the
+      protocol itself. They are first in the header since they may affect the
+      handling of the rest of the fields in a future version. They are defined
+      as such:
+  
+      1 indicates whether to read a chunk of sidedata (of variable length) right
+        after the revision flags.
+  
       Changeset Segment
       =================
   
@@ -1303,14 +1326,14 @@
       Treemanifests Segment
       ---------------------
   
-      The *treemanifests segment* only exists in changegroup version "3", and
-      only if the 'treemanifest' param is part of the bundle2 changegroup part
-      (it is not possible to use changegroup version 3 outside of bundle2).
-      Aside from the filenames in the *treemanifests segment* containing a
-      trailing "/" character, it behaves identically to the *filelogs segment*
-      (see below). The final sub-segment is followed by an *empty chunk*
-      (logically, a sub-segment with filename size 0). This denotes the boundary
-      to the *filelogs segment*.
+      The *treemanifests segment* only exists in changegroup version "3" and
+      "4", and only if the 'treemanifest' param is part of the bundle2
+      changegroup part (it is not possible to use changegroup version 3 or 4
+      outside of bundle2). Aside from the filenames in the *treemanifests
+      segment* containing a trailing "/" character, it behaves identically to
+      the *filelogs segment* (see below). The final sub-segment is followed by
+      an *empty chunk* (logically, a sub-segment with filename size 0). This
+      denotes the boundary to the *filelogs segment*.
   
       Filelogs Segment
       ================
@@ -3648,12 +3671,13 @@
   filelogs.
   </p>
   <p>
-  There are 3 versions of changegroups: &quot;1&quot;, &quot;2&quot;, and &quot;3&quot;. From a
+  There are 4 versions of changegroups: &quot;1&quot;, &quot;2&quot;, &quot;3&quot; and &quot;4&quot;. From a
   high-level, versions &quot;1&quot; and &quot;2&quot; are almost exactly the same, with the
   only difference being an additional item in the *delta header*. Version
   &quot;3&quot; adds support for storage flags in the *delta header* and optionally
   exchanging treemanifests (enabled by setting an option on the
-  &quot;changegroup&quot; part in the bundle2).
+  &quot;changegroup&quot; part in the bundle2). Version &quot;4&quot; adds support for exchanging
+  sidedata (additional revision metadata not part of the digest).
   </p>
   <p>
   Changegroups when not exchanging treemanifests consist of 3 logical
@@ -3733,8 +3757,8 @@
   bundle/changegroup).
   </p>
   <p>
-  The *delta header* is different between versions &quot;1&quot;, &quot;2&quot;, and
-  &quot;3&quot; of the changegroup format.
+  The *delta header* is different between versions &quot;1&quot;, &quot;2&quot;, &quot;3&quot; and &quot;4&quot;
+  of the changegroup format.
   </p>
   <p>
   Version 1 (headerlen=80):
@@ -3770,6 +3794,17 @@
   +------------------------------------------------------------------------------+
   </pre>
   <p>
+  Version 4 (headerlen=103):
+  </p>
+  <pre>
+  +------------------------------------------------------------------------------+----------+
+  |            |             |             |            |            |           |          |
+  |    node    |   p1 node   |   p2 node   | base node  | link node  |   flags   |  pflags  |
+  | (20 bytes) |  (20 bytes) |  (20 bytes) | (20 bytes) | (20 bytes) | (2 bytes) | (1 byte) |
+  |            |             |             |            |            |           |          |
+  +------------------------------------------------------------------------------+----------+
+  </pre>
+  <p>
   The *delta data* consists of &quot;chunklen - 4 - headerlen&quot; bytes, which contain a
   series of *delta*s, densely packed (no separators). These deltas describe a diff
   from an existing entry (either that the recipient already has, or previously
@@ -3808,6 +3843,8 @@
    <dd>Ellipsis revision. Revision hash does not match data (likely due to rewritten parents).
    <dt>8192
    <dd>Externally stored. The revision fulltext contains &quot;key:value&quot; &quot;\n&quot; delimited metadata defining an object stored elsewhere. Used by the LFS extension.
+   <dt>4096
+   <dd>Contains copy information. This revision changes files in a way that could affect copy tracing. This does *not* affect changegroup handling, but is relevant for other parts of Mercurial.
   </dl>
   <p>
   For historical reasons, the integer values are identical to revlog version 1
@@ -3815,6 +3852,15 @@
   field. Bits were allocated starting from the most-significant bit, hence the
   reverse ordering and allocation of these flags.
   </p>
+  <p>
+  The *pflags* (protocol flags) field holds bitwise flags affecting the protocol
+  itself. They are first in the header since they may affect the handling of the
+  rest of the fields in a future version. They are defined as such:
+  </p>
+  <dl>
+   <dt>1 indicates whether to read a chunk of sidedata (of variable length) right
+   <dd>after the revision flags.
+  </dl>
   <h2>Changeset Segment</h2>
   <p>
   The *changeset segment* consists of a single *delta group* holding
@@ -3832,9 +3878,9 @@
   </p>
   <h3>Treemanifests Segment</h3>
   <p>
-  The *treemanifests segment* only exists in changegroup version &quot;3&quot;, and
-  only if the 'treemanifest' param is part of the bundle2 changegroup part
-  (it is not possible to use changegroup version 3 outside of bundle2).
+  The *treemanifests segment* only exists in changegroup version &quot;3&quot; and &quot;4&quot;,
+  and only if the 'treemanifest' param is part of the bundle2 changegroup part
+  (it is not possible to use changegroup version 3 or 4 outside of bundle2).
   Aside from the filenames in the *treemanifests segment* containing a
   trailing &quot;/&quot; character, it behaves identically to the *filelogs segment*
   (see below). The final sub-segment is followed by an *empty chunk* (logically,
--- a/tests/test-lfs-serve.t	Thu Apr 08 16:34:11 2021 +0200
+++ b/tests/test-lfs-serve.t	Sat Apr 10 11:27:40 2021 +0200
@@ -355,11 +355,11 @@
   # LFS required- both lfs and non-lfs revlogs have 0x2000 flag
   *** runcommand debugprocessors lfs.bin -R ../server
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   registered processor '0x2000'
   *** runcommand debugprocessors nonlfs2.txt -R ../server
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   registered processor '0x2000'
   *** runcommand config extensions --cwd ../server
   extensions.debugprocessors=$TESTTMP/debugprocessors.py
@@ -368,7 +368,7 @@
   # LFS not enabled- revlogs don't have 0x2000 flag
   *** runcommand debugprocessors nonlfs3.txt
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   *** runcommand config extensions
   extensions.debugprocessors=$TESTTMP/debugprocessors.py
 
@@ -411,11 +411,11 @@
   # LFS enabled- both lfs and non-lfs revlogs have 0x2000 flag
   *** runcommand debugprocessors lfs.bin -R ../server
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   registered processor '0x2000'
   *** runcommand debugprocessors nonlfs2.txt -R ../server
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   registered processor '0x2000'
   *** runcommand config extensions --cwd ../server
   extensions.debugprocessors=$TESTTMP/debugprocessors.py
@@ -424,7 +424,7 @@
   # LFS enabled without requirement- revlogs have 0x2000 flag
   *** runcommand debugprocessors nonlfs3.txt
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   registered processor '0x2000'
   *** runcommand config extensions
   extensions.debugprocessors=$TESTTMP/debugprocessors.py
@@ -433,7 +433,7 @@
   # LFS disabled locally- revlogs don't have 0x2000 flag
   *** runcommand debugprocessors nonlfs.txt -R ../nonlfs
   registered processor '0x8000'
-  registered processor '0x800'
+  registered processor '0x1000'
   *** runcommand config extensions --cwd ../nonlfs
   extensions.debugprocessors=$TESTTMP/debugprocessors.py
   extensions.lfs=!