diff mercurial/revlog.py @ 46722:3d740058b467

sidedata: move to new sidedata storage in revlogv2 The current (experimental) sidedata system uses flagprocessors to signify the presence and store/retrieve sidedata from the raw revlog data. This proved to be quite fragile from an exchange perspective and a lot more complex than simply having a dedicated space in the new revlog format. This change does not handle exchange (ironically), so the test for amend - that uses a bundle - is broken. This functionality is split into the next patches. Differential Revision: https://phab.mercurial-scm.org/D9993
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 18 Jan 2021 11:44:51 +0100
parents 913485776542
children 4cd214c9948d
line wrap: on
line diff
--- a/mercurial/revlog.py	Wed Jan 20 18:35:12 2021 +0100
+++ b/mercurial/revlog.py	Mon Jan 18 11:44:51 2021 +0100
@@ -120,10 +120,10 @@
 
 # Flag processors for REVIDX_ELLIPSIS.
 def ellipsisreadprocessor(rl, text):
-    return text, False, {}
-
-
-def ellipsiswriteprocessor(rl, text, sidedata):
+    return text, False
+
+
+def ellipsiswriteprocessor(rl, text):
     return text, False
 
 
@@ -554,8 +554,6 @@
         if self._mmaplargeindex and b'mmapindexthreshold' in opts:
             mmapindexthreshold = opts[b'mmapindexthreshold']
         self.hassidedata = bool(opts.get(b'side-data', False))
-        if self.hassidedata:
-            self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
         self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
         withsparseread = bool(opts.get(b'with-sparse-read', False))
         # sparse-revlog forces sparse-read
@@ -856,6 +854,11 @@
     def length(self, rev):
         return self.index[rev][1]
 
+    def sidedata_length(self, rev):
+        if self.version & 0xFFFF != REVLOGV2:
+            return 0
+        return self.index[rev][9]
+
     def rawsize(self, rev):
         """return the length of the uncompressed text for a given revision"""
         l = self.index[rev][2]
@@ -917,7 +920,7 @@
     # Derived from index values.
 
     def end(self, rev):
-        return self.start(rev) + self.length(rev)
+        return self.start(rev) + self.length(rev) + self.sidedata_length(rev)
 
     def parents(self, node):
         i = self.index
@@ -1853,7 +1856,7 @@
         elif operation == b'read':
             return flagutil.processflagsread(self, text, flags)
         else:  # write operation
-            return flagutil.processflagswrite(self, text, flags, None)
+            return flagutil.processflagswrite(self, text, flags)
 
     def revision(self, nodeorrev, _df=None, raw=False):
         """return an uncompressed revision of a given node or revision
@@ -1898,10 +1901,17 @@
         # revision or might need to be processed to retrieve the revision.
         rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
 
+        if self.version & 0xFFFF == REVLOGV2:
+            if rev is None:
+                rev = self.rev(node)
+            sidedata = self._sidedata(rev)
+        else:
+            sidedata = {}
+
         if raw and validated:
             # if we don't want to process the raw text and that raw
             # text is cached, we can exit early.
-            return rawtext, {}
+            return rawtext, sidedata
         if rev is None:
             rev = self.rev(node)
         # the revlog's flag for this revision
@@ -1910,20 +1920,14 @@
 
         if validated and flags == REVIDX_DEFAULT_FLAGS:
             # no extra flags set, no flag processor runs, text = rawtext
-            return rawtext, {}
-
-        sidedata = {}
+            return rawtext, sidedata
+
         if raw:
             validatehash = flagutil.processflagsraw(self, rawtext, flags)
             text = rawtext
         else:
-            try:
-                r = flagutil.processflagsread(self, rawtext, flags)
-            except error.SidedataHashError as exc:
-                msg = _(b"integrity check failed on %s:%s sidedata key %d")
-                msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
-                raise error.RevlogError(msg)
-            text, validatehash, sidedata = r
+            r = flagutil.processflagsread(self, rawtext, flags)
+            text, validatehash = r
         if validatehash:
             self.checkhash(text, node, rev=rev)
         if not validated:
@@ -1974,6 +1978,21 @@
         del basetext  # let us have a chance to free memory early
         return (rev, rawtext, False)
 
+    def _sidedata(self, rev):
+        """Return the sidedata for a given revision number."""
+        index_entry = self.index[rev]
+        sidedata_offset = index_entry[8]
+        sidedata_size = index_entry[9]
+
+        if self._inline:
+            sidedata_offset += self._io.size * (1 + rev)
+        if sidedata_size == 0:
+            return {}
+
+        segment = self._getsegment(sidedata_offset, sidedata_size)
+        sidedata = sidedatautil.deserialize_sidedata(segment)
+        return sidedata
+
     def rawdata(self, nodeorrev, _df=None):
         """return an uncompressed raw data of a given node or revision number.
 
@@ -2107,20 +2126,15 @@
 
         if sidedata is None:
             sidedata = {}
-            flags = flags & ~REVIDX_SIDEDATA
         elif not self.hassidedata:
             raise error.ProgrammingError(
                 _(b"trying to add sidedata to a revlog who don't support them")
             )
-        else:
-            flags |= REVIDX_SIDEDATA
 
         if flags:
             node = node or self.hash(text, p1, p2)
 
-        rawtext, validatehash = flagutil.processflagswrite(
-            self, text, flags, sidedata=sidedata
-        )
+        rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
 
         # If the flag processor modifies the revision data, ignore any provided
         # cachedelta.
@@ -2153,6 +2167,7 @@
             flags,
             cachedelta=cachedelta,
             deltacomputer=deltacomputer,
+            sidedata=sidedata,
         )
 
     def addrawrevision(
@@ -2166,6 +2181,7 @@
         flags,
         cachedelta=None,
         deltacomputer=None,
+        sidedata=None,
     ):
         """add a raw revision with known flags, node and parents
         useful when reusing a revision not stored in this revlog (ex: received
@@ -2188,6 +2204,7 @@
                 ifh,
                 dfh,
                 deltacomputer=deltacomputer,
+                sidedata=sidedata,
             )
         finally:
             if dfh:
@@ -2281,6 +2298,7 @@
         dfh,
         alwayscache=False,
         deltacomputer=None,
+        sidedata=None,
     ):
         """internal function to add revisions to the log
 
@@ -2350,6 +2368,16 @@
 
         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
 
+        if sidedata:
+            serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
+            sidedata_offset = offset + deltainfo.deltalen
+        else:
+            serialized_sidedata = b""
+            # Don't store the offset if the sidedata is empty, that way
+            # we can easily detect empty sidedata and they will be no different
+            # than ones we manually add.
+            sidedata_offset = 0
+
         e = (
             offset_type(offset, flags),
             deltainfo.deltalen,
@@ -2359,18 +2387,24 @@
             p1r,
             p2r,
             node,
-            0,
-            0,
+            sidedata_offset,
+            len(serialized_sidedata),
         )
 
         if self.version & 0xFFFF != REVLOGV2:
             e = e[:8]
 
         self.index.append(e)
-
         entry = self._io.packentry(e, self.node, self.version, curr)
         self._writeentry(
-            transaction, ifh, dfh, entry, deltainfo.data, link, offset
+            transaction,
+            ifh,
+            dfh,
+            entry,
+            deltainfo.data,
+            link,
+            offset,
+            serialized_sidedata,
         )
 
         rawtext = btext[0]
@@ -2383,7 +2417,9 @@
         self._chainbasecache[curr] = deltainfo.chainbase
         return curr
 
-    def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
+    def _writeentry(
+        self, transaction, ifh, dfh, entry, data, link, offset, sidedata
+    ):
         # Files opened in a+ mode have inconsistent behavior on various
         # platforms. Windows requires that a file positioning call be made
         # when the file handle transitions between reads and writes. See
@@ -2407,6 +2443,8 @@
             if data[0]:
                 dfh.write(data[0])
             dfh.write(data[1])
+            if sidedata:
+                dfh.write(sidedata)
             ifh.write(entry)
         else:
             offset += curr * self._io.size
@@ -2414,6 +2452,8 @@
             ifh.write(entry)
             ifh.write(data[0])
             ifh.write(data[1])
+            if sidedata:
+                ifh.write(sidedata)
             self._enforceinlinesize(transaction, ifh)
         nodemaputil.setup_persistent_nodemap(transaction, self)