# HG changeset patch # User Pierre-Yves David # Date 1620069204 -7200 # Node ID ff9fd7107d118efc7a1e82f1656a17f3a237f1e2 # Parent eac3591abbf4ee2111d1197f38c73c2e16b17837 revlog: implement a "default compression" mode The revlog docker is now storing a default compression engine. When a chunk use that compression, a dedicated mode is used in the revlog entry and we can directly route it to the right decompressor. We should probably make PLAIN and DEFAULT mode the only available mode for revlogv2, but this is something for later. Differential Revision: https://phab.mercurial-scm.org/D10652 diff -r eac3591abbf4 -r ff9fd7107d11 mercurial/revlog.py --- a/mercurial/revlog.py Mon May 03 21:04:55 2021 +0200 +++ b/mercurial/revlog.py Mon May 03 21:13:24 2021 +0200 @@ -35,6 +35,7 @@ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, + COMP_MODE_DEFAULT, COMP_MODE_INLINE, COMP_MODE_PLAIN, FEATURES_BY_VERSION, @@ -708,6 +709,15 @@ engine = util.compengines[self._compengine] return engine.revlogcompressor(self._compengineopts) + @util.propertycache + def _decompressor(self): + """the default decompressor""" + if self._docket is None: + return None + t = self._docket.default_compression_header + c = self._get_decompressor(t) + return c.decompress + def _indexfp(self): """file object for the revlog's index file""" return self.opener(self._indexfile, mode=b"r") @@ -1776,6 +1786,8 @@ data = self._getsegmentforrevs(rev, rev, df=df)[1] if compression_mode == COMP_MODE_PLAIN: return data + elif compression_mode == COMP_MODE_DEFAULT: + return self._decompressor(data) elif compression_mode == COMP_MODE_INLINE: return self.decompress(data) else: @@ -1829,6 +1841,8 @@ return [self._chunk(rev, df=df) for rev in revschunk] decomp = self.decompress + # self._decompressor might be None, but will not be used in that case + def_decomp = self._decompressor for rev in revschunk: chunkstart = start(rev) if inline: @@ -1840,6 +1854,8 @@ ladd(c) elif comp_mode == COMP_MODE_INLINE: ladd(decomp(c)) + elif comp_mode == COMP_MODE_DEFAULT: + ladd(def_decomp(c)) else: msg = 'unknown compression mode %d' msg %= comp_mode @@ -2489,8 +2505,12 @@ if not h and not d: # not data to store at all... declare them uncompressed compression_mode = COMP_MODE_PLAIN - elif not h and d[0:1] == b'\0': - compression_mode = COMP_MODE_PLAIN + elif not h: + t = d[0:1] + if t == b'\0': + compression_mode = COMP_MODE_PLAIN + elif t == self._docket.default_compression_header: + compression_mode = COMP_MODE_DEFAULT elif h == b'u': # we have a more efficient way to declare uncompressed h = b'' diff -r eac3591abbf4 -r ff9fd7107d11 mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py Mon May 03 21:04:55 2021 +0200 +++ b/mercurial/revlogutils/constants.py Mon May 03 21:13:24 2021 +0200 @@ -123,6 +123,16 @@ # chunk value. Without any header information prefixed. COMP_MODE_PLAIN = 0 +# Chunk use the "default compression" for the revlog (usually defined in the +# revlog docket). A header is still used. +# +# XXX: keeping a header is probably not useful and we should probably drop it. +# +# XXX: The value of allow mixed type of compression in the revlog is unclear +# and we should consider making PLAIN/DEFAULT the only available mode for +# revlog v2, disallowing INLINE mode. +COMP_MODE_DEFAULT = 1 + # Chunk use a compression mode stored "inline" at the start of the chunk # itself. This is the mode always used for revlog version "0" and "1" COMP_MODE_INLINE = 2 diff -r eac3591abbf4 -r ff9fd7107d11 mercurial/revlogutils/docket.py --- a/mercurial/revlogutils/docket.py Mon May 03 21:04:55 2021 +0200 +++ b/mercurial/revlogutils/docket.py Mon May 03 21:13:24 2021 +0200 @@ -21,6 +21,7 @@ from .. import ( error, + util, ) from . import ( @@ -36,7 +37,8 @@ # * 8 bytes: pending size of index-data # * 8 bytes: size of data # * 8 bytes: pending size of data -S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL') +# * 1 bytes: default compression header +S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc') class RevlogDocket(object): @@ -51,6 +53,7 @@ pending_index_end=0, data_end=0, pending_data_end=0, + default_compression_header=None, ): self._version_header = version_header self._read_only = bool(use_pending) @@ -71,6 +74,7 @@ else: self._index_end = self._initial_index_end self._data_end = self._initial_data_end + self.default_compression_header = default_compression_header def index_filepath(self): """file path to the current index file associated to this docket""" @@ -134,6 +138,7 @@ self._index_end, official_data_end, self._data_end, + self.default_compression_header, ) return S_HEADER.pack(*data) @@ -142,7 +147,12 @@ """given a revlog version a new docket object for the given revlog""" if (version_header & 0xFFFF) != constants.REVLOGV2: return None - docket = RevlogDocket(revlog, version_header=version_header) + comp = util.compengines[revlog._compengine].revlogheader() + docket = RevlogDocket( + revlog, + version_header=version_header, + default_compression_header=comp, + ) docket._dirty = True return docket @@ -155,6 +165,7 @@ pending_index_size = header[2] data_size = header[3] pending_data_size = header[4] + default_compression_header = header[5] docket = RevlogDocket( revlog, use_pending=use_pending, @@ -163,5 +174,6 @@ pending_index_end=pending_index_size, data_end=data_size, pending_data_end=pending_data_size, + default_compression_header=default_compression_header, ) return docket