--- a/mercurial/revlog.py Mon Oct 23 14:27:07 2023 +0200
+++ b/mercurial/revlog.py Wed Oct 25 02:13:18 2023 +0200
@@ -353,7 +353,9 @@
sidedata_file,
inline,
data_config,
+ feature_config,
chunk_cache,
+ default_compression_header,
):
self.opener = opener
self.index = index
@@ -363,6 +365,9 @@
self.sidedata_file = sidedata_file
self.inline = inline
self.data_config = data_config
+ self.feature_config = feature_config
+
+ self._default_compression_header = default_compression_header
# index
@@ -381,6 +386,9 @@
self.data_config.chunk_cache_size,
)
+ # revlog header -> revlog compressor
+ self._decompressors = {}
+
@property
def index_file(self):
return self.__index_file
@@ -405,6 +413,103 @@
"""the end of the data chunk for this revision"""
return self.start(rev) + self.length(rev)
+ @util.propertycache
+ def _compressor(self):
+ engine = util.compengines[self.feature_config.compression_engine]
+ return engine.revlogcompressor(
+ self.feature_config.compression_engine_options
+ )
+
+ @util.propertycache
+ def _decompressor(self):
+ """the default decompressor"""
+ if self._default_compression_header is None:
+ return None
+ t = self._default_compression_header
+ c = self._get_decompressor(t)
+ return c.decompress
+
+ def _get_decompressor(self, t):
+ try:
+ compressor = self._decompressors[t]
+ except KeyError:
+ try:
+ engine = util.compengines.forrevlogheader(t)
+ compressor = engine.revlogcompressor(
+ self.feature_config.compression_engine_options
+ )
+ self._decompressors[t] = compressor
+ except KeyError:
+ raise error.RevlogError(
+ _(b'unknown compression type %s') % binascii.hexlify(t)
+ )
+ return compressor
+
+ def compress(self, data):
+ """Generate a possibly-compressed representation of data."""
+ if not data:
+ return b'', data
+
+ compressed = self._compressor.compress(data)
+
+ if compressed:
+ # The revlog compressor added the header in the returned data.
+ return b'', compressed
+
+ if data[0:1] == b'\0':
+ return b'', data
+ return b'u', data
+
+ def decompress(self, data):
+ """Decompress a revlog chunk.
+
+ The chunk is expected to begin with a header identifying the
+ format type so it can be routed to an appropriate decompressor.
+ """
+ if not data:
+ return data
+
+ # Revlogs are read much more frequently than they are written and many
+ # chunks only take microseconds to decompress, so performance is
+ # important here.
+ #
+ # We can make a few assumptions about revlogs:
+ #
+ # 1) the majority of chunks will be compressed (as opposed to inline
+ # raw data).
+ # 2) decompressing *any* data will likely by at least 10x slower than
+ # returning raw inline data.
+ # 3) we want to prioritize common and officially supported compression
+ # engines
+ #
+ # It follows that we want to optimize for "decompress compressed data
+ # when encoded with common and officially supported compression engines"
+ # case over "raw data" and "data encoded by less common or non-official
+ # compression engines." That is why we have the inline lookup first
+ # followed by the compengines lookup.
+ #
+ # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
+ # compressed chunks. And this matters for changelog and manifest reads.
+ t = data[0:1]
+
+ if t == b'x':
+ try:
+ return _zlibdecompress(data)
+ except zlib.error as e:
+ raise error.RevlogError(
+ _(b'revlog decompress error: %s')
+ % stringutil.forcebytestr(e)
+ )
+ # '\0' is more common than 'u' so it goes first.
+ elif t == b'\0':
+ return data
+ elif t == b'u':
+ return util.buffer(data, 1)
+
+ compressor = self._get_decompressor(t)
+
+ return compressor.decompress(data)
+
@contextlib.contextmanager
def reading(self):
"""Context manager that keeps data and sidedata files open for reading"""
@@ -1284,12 +1389,15 @@
self.index = index
# revnum -> (chain-length, sum-delta-length)
self._chaininfocache = util.lrucachedict(500)
- # revlog header -> revlog compressor
- self._decompressors = {}
return chunkcache
def _load_inner(self, chunk_cache):
+ if self._docket is None:
+ default_compression_header = None
+ else:
+ default_compression_header = self._docket.default_compression_header
+
self._inner = _InnerRevlog(
opener=self.opener,
index=self.index,
@@ -1298,7 +1406,9 @@
sidedata_file=self._sidedatafile,
inline=self._inline,
data_config=self.data_config,
+ feature_config=self.feature_config,
chunk_cache=chunk_cache,
+ default_compression_header=default_compression_header,
)
def get_revlog(self):
@@ -1319,38 +1429,6 @@
else:
return self.radix
- def _get_decompressor(self, t):
- try:
- compressor = self._decompressors[t]
- except KeyError:
- try:
- engine = util.compengines.forrevlogheader(t)
- compressor = engine.revlogcompressor(
- self.feature_config.compression_engine_options
- )
- self._decompressors[t] = compressor
- except KeyError:
- raise error.RevlogError(
- _(b'unknown compression type %s') % binascii.hexlify(t)
- )
- return compressor
-
- @util.propertycache
- def _compressor(self):
- engine = util.compengines[self.feature_config.compression_engine]
- return engine.revlogcompressor(
- self.feature_config.compression_engine_options
- )
-
- @util.propertycache
- def _decompressor(self):
- """the default decompressor"""
- if self._docket is None:
- return None
- t = self._docket.default_compression_header
- c = self._get_decompressor(t)
- return c.decompress
-
def _datafp(self, mode=b'r'):
"""file object for the revlog's data file"""
return self.opener(self._datafile, mode=mode)
@@ -2272,9 +2350,9 @@
if compression_mode == COMP_MODE_PLAIN:
return data
elif compression_mode == COMP_MODE_DEFAULT:
- return self._decompressor(data)
+ return self._inner._decompressor(data)
elif compression_mode == COMP_MODE_INLINE:
- return self.decompress(data)
+ return self._inner.decompress(data)
else:
msg = b'unknown compression mode %d'
msg %= compression_mode
@@ -2328,9 +2406,9 @@
# 2G on Windows
return [self._chunk(rev) for rev in revschunk]
- decomp = self.decompress
+ decomp = self._inner.decompress
# self._decompressor might be None, but will not be used in that case
- def_decomp = self._decompressor
+ def_decomp = self._inner._decompressor
for rev in revschunk:
chunkstart = start(rev)
if inline:
@@ -2544,9 +2622,9 @@
if comp == COMP_MODE_PLAIN:
segment = comp_segment
elif comp == COMP_MODE_DEFAULT:
- segment = self._decompressor(comp_segment)
+ segment = self._inner._decompressor(comp_segment)
elif comp == COMP_MODE_INLINE:
- segment = self.decompress(comp_segment)
+ segment = self._inner.decompress(comp_segment)
else:
msg = b'unknown compression mode %d'
msg %= comp
@@ -2842,69 +2920,10 @@
)
def compress(self, data):
- """Generate a possibly-compressed representation of data."""
- if not data:
- return b'', data
-
- compressed = self._compressor.compress(data)
-
- if compressed:
- # The revlog compressor added the header in the returned data.
- return b'', compressed
-
- if data[0:1] == b'\0':
- return b'', data
- return b'u', data
+ return self._inner.compress(data)
def decompress(self, data):
- """Decompress a revlog chunk.
-
- The chunk is expected to begin with a header identifying the
- format type so it can be routed to an appropriate decompressor.
- """
- if not data:
- return data
-
- # Revlogs are read much more frequently than they are written and many
- # chunks only take microseconds to decompress, so performance is
- # important here.
- #
- # We can make a few assumptions about revlogs:
- #
- # 1) the majority of chunks will be compressed (as opposed to inline
- # raw data).
- # 2) decompressing *any* data will likely by at least 10x slower than
- # returning raw inline data.
- # 3) we want to prioritize common and officially supported compression
- # engines
- #
- # It follows that we want to optimize for "decompress compressed data
- # when encoded with common and officially supported compression engines"
- # case over "raw data" and "data encoded by less common or non-official
- # compression engines." That is why we have the inline lookup first
- # followed by the compengines lookup.
- #
- # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
- # compressed chunks. And this matters for changelog and manifest reads.
- t = data[0:1]
-
- if t == b'x':
- try:
- return _zlibdecompress(data)
- except zlib.error as e:
- raise error.RevlogError(
- _(b'revlog decompress error: %s')
- % stringutil.forcebytestr(e)
- )
- # '\0' is more common than 'u' so it goes first.
- elif t == b'\0':
- return data
- elif t == b'u':
- return util.buffer(data, 1)
-
- compressor = self._get_decompressor(t)
-
- return compressor.decompress(data)
+ return self._inner.decompress(data)
def _addrevision(
self,
@@ -3029,7 +3048,7 @@
sidedata_compression_mode = COMP_MODE_PLAIN
serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
sidedata_offset = self._docket.sidedata_end
- h, comp_sidedata = self.compress(serialized_sidedata)
+ h, comp_sidedata = self._inner.compress(serialized_sidedata)
if (
h != b'u'
and comp_sidedata[0:1] != b'\0'
@@ -3876,7 +3895,7 @@
sidedata_compression_mode = COMP_MODE_INLINE
if serialized_sidedata and self.feature_config.has_side_data:
sidedata_compression_mode = COMP_MODE_PLAIN
- h, comp_sidedata = self.compress(serialized_sidedata)
+ h, comp_sidedata = self._inner.compress(serialized_sidedata)
if (
h != b'u'
and comp_sidedata[0] != b'\0'
--- a/mercurial/revlogutils/deltas.py Mon Oct 23 14:27:07 2023 +0200
+++ b/mercurial/revlogutils/deltas.py Wed Oct 25 02:13:18 2023 +0200
@@ -1205,7 +1205,7 @@
msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
self._write_debug(msg)
return None
- header, data = revlog.compress(delta)
+ header, data = revlog._inner.compress(delta)
deltalen = len(header) + len(data)
offset = revlog.end(len(revlog) - 1)
dist = deltalen + offset - revlog.start(chainbase)
@@ -1226,7 +1226,7 @@
def _fullsnapshotinfo(self, revinfo, curr):
rawtext = self.buildtext(revinfo)
- data = self.revlog.compress(rawtext)
+ data = self.revlog._inner.compress(rawtext)
compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
deltabase = chainbase = curr
snapshotdepth = 0