revlog: recommit 49fd21f32695 with a fix for issue6528
authorJoerg Sonnenberger <joerg@bec.de>
Tue, 20 Jul 2021 15:07:10 +0200
changeset 49012 5b65721a75eb
parent 49011 b999edb15f8c
child 49013 186223795e4a
revlog: recommit 49fd21f32695 with a fix for issue6528 `filelog.size` currently special cases two forms of metadata encoding: - copy data via the parent order as flag bit - censor data by peaking into the raw delta All other forms of metadata encoding including the empty metadata block are mishandled. In `basefilectx.cmp` the empty metadata block is explicitly checked to compensate for this. Restore 49fd21f32695, but disable it for filelog, so that the original flag bit use contines to work. Document all this mess for now in preparation of a proper rework. Differential Revision: https://phab.mercurial-scm.org/D11203
mercurial/context.py
mercurial/filelog.py
mercurial/revlog.py
tests/test-narrow-shallow-merges.t
--- a/mercurial/context.py	Fri Mar 18 12:23:47 2022 -0700
+++ b/mercurial/context.py	Tue Jul 20 15:07:10 2021 +0200
@@ -992,6 +992,16 @@
             if self._repo._encodefilterpats:
                 # can't rely on size() because wdir content may be decoded
                 return self._filelog.cmp(self._filenode, fctx.data())
+            # filelog.size() has two special cases:
+            # - censored metadata
+            # - copy/rename tracking
+            # The first is detected by peaking into the delta,
+            # the second is detected by abusing parent order
+            # in the revlog index as flag bit. This leaves files using
+            # the dummy encoding and non-standard meta attributes.
+            # The following check is a special case for the empty
+            # metadata block used if the raw file content starts with '\1\n'.
+            # Cases of arbitrary metadata flags are currently mishandled.
             if self.size() - 4 == fctx.size():
                 # size() can match:
                 # if file data starts with '\1\n', empty metadata block is
--- a/mercurial/filelog.py	Fri Mar 18 12:23:47 2022 -0700
+++ b/mercurial/filelog.py	Tue Jul 20 15:07:10 2021 +0200
@@ -32,6 +32,7 @@
             target=(revlog_constants.KIND_FILELOG, path),
             radix=b'/'.join((b'data', path)),
             censorable=True,
+            canonical_parent_order=False,  # see comment in revlog.py
         )
         # Full name of the user visible file, relative to the repository root.
         # Used by LFS.
@@ -207,6 +208,7 @@
             return 0
 
         # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
+        # XXX See also basefilectx.cmp.
         return self._revlog.size(rev)
 
     def cmp(self, node, text):
--- a/mercurial/revlog.py	Fri Mar 18 12:23:47 2022 -0700
+++ b/mercurial/revlog.py	Tue Jul 20 15:07:10 2021 +0200
@@ -298,6 +298,7 @@
         persistentnodemap=False,
         concurrencychecker=None,
         trypending=False,
+        canonical_parent_order=True,
     ):
         """
         create a revlog object
@@ -373,6 +374,13 @@
 
         self._concurrencychecker = concurrencychecker
 
+        # parent order is supposed to be semantically irrelevant, so we
+        # normally resort parents to ensure that the first parent is non-null,
+        # if there is a non-null parent at all.
+        # filelog abuses the parent order as flag to mark some instances of
+        # meta-encoded files, so allow it to disable this behavior.
+        self.canonical_parent_order = canonical_parent_order
+
     def _init_opts(self):
         """process options (from above/config) to setup associated default revlog mode
 
@@ -898,7 +906,10 @@
                 raise error.WdirUnsupported
             raise
 
-        return entry[5], entry[6]
+        if self.canonical_parent_order and entry[5] == nullrev:
+            return entry[6], entry[5]
+        else:
+            return entry[5], entry[6]
 
     # fast parentrevs(rev) where rev isn't filtered
     _uncheckedparentrevs = parentrevs
@@ -919,7 +930,11 @@
     def parents(self, node):
         i = self.index
         d = i[self.rev(node)]
-        return i[d[5]][7], i[d[6]][7]  # map revisions to nodes inline
+        # inline node() to avoid function call overhead
+        if self.canonical_parent_order and d[5] == self.nullid:
+            return i[d[6]][7], i[d[5]][7]
+        else:
+            return i[d[5]][7], i[d[6]][7]
 
     def chainlen(self, rev):
         return self._chaininfo(rev)[0]
--- a/tests/test-narrow-shallow-merges.t	Fri Mar 18 12:23:47 2022 -0700
+++ b/tests/test-narrow-shallow-merges.t	Tue Jul 20 15:07:10 2021 +0200
@@ -179,7 +179,7 @@
   
 
   $ hg log -T '{if(ellipsis,"...")}{node|short} {p1node|short} {p2node|short} {desc}\n' | sort
-  ...2a20009de83e 000000000000 3ac1f5779de3 outside 10
+  ...2a20009de83e 3ac1f5779de3 000000000000 outside 10
   ...3ac1f5779de3 bb96a08b062a 465567bdfb2d merge a/b/c/d 9
   ...8d874d57adea 7ef88b4dd4fa 000000000000 outside 12
   ...b844052e7b3b 000000000000 000000000000 outside 2c