changeset 47255:ff9fd7107d11

revlog: implement a "default compression" mode The revlog docker is now storing a default compression engine. When a chunk use that compression, a dedicated mode is used in the revlog entry and we can directly route it to the right decompressor. We should probably make PLAIN and DEFAULT mode the only available mode for revlogv2, but this is something for later. Differential Revision: https://phab.mercurial-scm.org/D10652
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 03 May 2021 21:13:24 +0200
parents eac3591abbf4
children 2b69555e4875
files mercurial/revlog.py mercurial/revlogutils/constants.py mercurial/revlogutils/docket.py
diffstat 3 files changed, 46 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlog.py	Mon May 03 21:04:55 2021 +0200
+++ b/mercurial/revlog.py	Mon May 03 21:13:24 2021 +0200
@@ -35,6 +35,7 @@
 from .pycompat import getattr
 from .revlogutils.constants import (
     ALL_KINDS,
+    COMP_MODE_DEFAULT,
     COMP_MODE_INLINE,
     COMP_MODE_PLAIN,
     FEATURES_BY_VERSION,
@@ -708,6 +709,15 @@
         engine = util.compengines[self._compengine]
         return engine.revlogcompressor(self._compengineopts)
 
+    @util.propertycache
+    def _decompressor(self):
+        """the default decompressor"""
+        if self._docket is None:
+            return None
+        t = self._docket.default_compression_header
+        c = self._get_decompressor(t)
+        return c.decompress
+
     def _indexfp(self):
         """file object for the revlog's index file"""
         return self.opener(self._indexfile, mode=b"r")
@@ -1776,6 +1786,8 @@
         data = self._getsegmentforrevs(rev, rev, df=df)[1]
         if compression_mode == COMP_MODE_PLAIN:
             return data
+        elif compression_mode == COMP_MODE_DEFAULT:
+            return self._decompressor(data)
         elif compression_mode == COMP_MODE_INLINE:
             return self.decompress(data)
         else:
@@ -1829,6 +1841,8 @@
                 return [self._chunk(rev, df=df) for rev in revschunk]
 
             decomp = self.decompress
+            # self._decompressor might be None, but will not be used in that case
+            def_decomp = self._decompressor
             for rev in revschunk:
                 chunkstart = start(rev)
                 if inline:
@@ -1840,6 +1854,8 @@
                     ladd(c)
                 elif comp_mode == COMP_MODE_INLINE:
                     ladd(decomp(c))
+                elif comp_mode == COMP_MODE_DEFAULT:
+                    ladd(def_decomp(c))
                 else:
                     msg = 'unknown compression mode %d'
                     msg %= comp_mode
@@ -2489,8 +2505,12 @@
             if not h and not d:
                 # not data to store at all... declare them uncompressed
                 compression_mode = COMP_MODE_PLAIN
-            elif not h and d[0:1] == b'\0':
-                compression_mode = COMP_MODE_PLAIN
+            elif not h:
+                t = d[0:1]
+                if t == b'\0':
+                    compression_mode = COMP_MODE_PLAIN
+                elif t == self._docket.default_compression_header:
+                    compression_mode = COMP_MODE_DEFAULT
             elif h == b'u':
                 # we have a more efficient way to declare uncompressed
                 h = b''
--- a/mercurial/revlogutils/constants.py	Mon May 03 21:04:55 2021 +0200
+++ b/mercurial/revlogutils/constants.py	Mon May 03 21:13:24 2021 +0200
@@ -123,6 +123,16 @@
 # chunk value. Without any header information prefixed.
 COMP_MODE_PLAIN = 0
 
+# Chunk use the "default compression" for the revlog (usually defined in the
+# revlog docket). A header is still used.
+#
+# XXX: keeping a header is probably not useful and we should probably drop it.
+#
+# XXX: The value of allow mixed type of compression in the revlog is unclear
+#      and we should consider making PLAIN/DEFAULT the only available mode for
+#      revlog v2, disallowing INLINE mode.
+COMP_MODE_DEFAULT = 1
+
 # Chunk use a compression mode stored "inline" at the start of the chunk
 # itself.  This is the mode always used for revlog version "0" and "1"
 COMP_MODE_INLINE = 2
--- a/mercurial/revlogutils/docket.py	Mon May 03 21:04:55 2021 +0200
+++ b/mercurial/revlogutils/docket.py	Mon May 03 21:13:24 2021 +0200
@@ -21,6 +21,7 @@
 
 from .. import (
     error,
+    util,
 )
 
 from . import (
@@ -36,7 +37,8 @@
 # * 8 bytes: pending size of index-data
 # * 8 bytes: size of data
 # * 8 bytes: pending size of data
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')
+# * 1 bytes: default compression header
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
 
 
 class RevlogDocket(object):
@@ -51,6 +53,7 @@
         pending_index_end=0,
         data_end=0,
         pending_data_end=0,
+        default_compression_header=None,
     ):
         self._version_header = version_header
         self._read_only = bool(use_pending)
@@ -71,6 +74,7 @@
         else:
             self._index_end = self._initial_index_end
             self._data_end = self._initial_data_end
+        self.default_compression_header = default_compression_header
 
     def index_filepath(self):
         """file path to the current index file associated to this docket"""
@@ -134,6 +138,7 @@
             self._index_end,
             official_data_end,
             self._data_end,
+            self.default_compression_header,
         )
         return S_HEADER.pack(*data)
 
@@ -142,7 +147,12 @@
     """given a revlog version a new docket object for the given revlog"""
     if (version_header & 0xFFFF) != constants.REVLOGV2:
         return None
-    docket = RevlogDocket(revlog, version_header=version_header)
+    comp = util.compengines[revlog._compengine].revlogheader()
+    docket = RevlogDocket(
+        revlog,
+        version_header=version_header,
+        default_compression_header=comp,
+    )
     docket._dirty = True
     return docket
 
@@ -155,6 +165,7 @@
     pending_index_size = header[2]
     data_size = header[3]
     pending_data_size = header[4]
+    default_compression_header = header[5]
     docket = RevlogDocket(
         revlog,
         use_pending=use_pending,
@@ -163,5 +174,6 @@
         pending_index_end=pending_index_size,
         data_end=data_size,
         pending_data_end=pending_data_size,
+        default_compression_header=default_compression_header,
     )
     return docket