revlog: introduce a plain compression mode
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 03 May 2021 19:46:25 +0200
changeset 47253 b876f0bf7366
parent 47252 e340b556a13e
child 47254 eac3591abbf4
revlog: introduce a plain compression mode That mode is simple it means the chunk contains uncompressed data and can be used directly. Differential Revision: https://phab.mercurial-scm.org/D10650
mercurial/revlog.py
mercurial/revlogutils/constants.py
mercurial/revlogutils/deltas.py
--- a/mercurial/revlog.py	Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlog.py	Mon May 03 19:46:25 2021 +0200
@@ -36,6 +36,7 @@
 from .revlogutils.constants import (
     ALL_KINDS,
     COMP_MODE_INLINE,
+    COMP_MODE_PLAIN,
     FEATURES_BY_VERSION,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
@@ -1757,7 +1758,16 @@
 
         Returns a str holding uncompressed data for the requested revision.
         """
-        return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
+        compression_mode = self.index[rev][10]
+        data = self._getsegmentforrevs(rev, rev, df=df)[1]
+        if compression_mode == COMP_MODE_PLAIN:
+            return data
+        elif compression_mode == COMP_MODE_INLINE:
+            return self.decompress(data)
+        else:
+            msg = 'unknown compression mode %d'
+            msg %= compression_mode
+            raise error.RevlogError(msg)
 
     def _chunks(self, revs, df=None, targetsize=None):
         """Obtain decompressed chunks for the specified revisions.
@@ -1810,8 +1820,16 @@
                 if inline:
                     chunkstart += (rev + 1) * iosize
                 chunklength = length(rev)
+                comp_mode = self.index[rev][10]
                 c = buffer(data, chunkstart - offset, chunklength)
-                ladd(decomp(c))
+                if comp_mode == COMP_MODE_PLAIN:
+                    ladd(c)
+                elif comp_mode == COMP_MODE_INLINE:
+                    ladd(decomp(c))
+                else:
+                    msg = 'unknown compression mode %d'
+                    msg %= comp_mode
+                    raise error.RevlogError(msg)
 
         return l
 
@@ -2461,6 +2479,20 @@
 
         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
 
+        compression_mode = COMP_MODE_INLINE
+        if self._docket is not None:
+            h, d = deltainfo.data
+            if not h and not d:
+                # not data to store at all... declare them uncompressed
+                compression_mode = COMP_MODE_PLAIN
+            elif not h and d[0:1] == b'\0':
+                compression_mode = COMP_MODE_PLAIN
+            elif h == b'u':
+                # we have a more efficient way to declare uncompressed
+                h = b''
+                compression_mode = COMP_MODE_PLAIN
+                deltainfo = deltautil.drop_u_compression(deltainfo)
+
         if sidedata and self.hassidedata:
             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
             sidedata_offset = offset + deltainfo.deltalen
@@ -2482,7 +2514,7 @@
             node,
             sidedata_offset,
             len(serialized_sidedata),
-            COMP_MODE_INLINE,
+            compression_mode,
         )
 
         self.index.append(e)
--- a/mercurial/revlogutils/constants.py	Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlogutils/constants.py	Mon May 03 19:46:25 2021 +0200
@@ -119,6 +119,10 @@
 # These constants are used in revlog version >=2 to denote the compression used
 # for a chunk.
 
+# Chunk use no compression, the data stored on disk can be directly use as
+# chunk value. Without any header information prefixed.
+COMP_MODE_PLAIN = 0
+
 # Chunk use a compression mode stored "inline" at the start of the chunk
 # itself.  This is the mode always used for revlog version "0" and "1"
 COMP_MODE_INLINE = 2
--- a/mercurial/revlogutils/deltas.py	Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlogutils/deltas.py	Mon May 03 19:46:25 2021 +0200
@@ -553,6 +553,24 @@
     snapshotdepth = attr.ib()
 
 
+def drop_u_compression(delta):
+    """turn into a "u" (no-compression) into no-compression without header
+
+    This is useful for revlog format that has better compression method.
+    """
+    assert delta.data[0] == b'u', delta.data[0]
+    return _deltainfo(
+        delta.distance,
+        delta.deltalen - 1,
+        (b'', delta.data[1]),
+        delta.base,
+        delta.chainbase,
+        delta.chainlen,
+        delta.compresseddeltalen,
+        delta.snapshotdepth,
+    )
+
+
 def isgooddeltainfo(revlog, deltainfo, revinfo):
     """Returns True if the given delta is good. Good means that it is within
     the disk span, disk size, and chain length bounds that we know to be