revlog: introduce a plain compression mode
That mode is simple it means the chunk contains uncompressed data and can be
used directly.
Differential Revision: https://phab.mercurial-scm.org/D10650
--- a/mercurial/revlog.py Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlog.py Mon May 03 19:46:25 2021 +0200
@@ -36,6 +36,7 @@
from .revlogutils.constants import (
ALL_KINDS,
COMP_MODE_INLINE,
+ COMP_MODE_PLAIN,
FEATURES_BY_VERSION,
FLAG_GENERALDELTA,
FLAG_INLINE_DATA,
@@ -1757,7 +1758,16 @@
Returns a str holding uncompressed data for the requested revision.
"""
- return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
+ compression_mode = self.index[rev][10]
+ data = self._getsegmentforrevs(rev, rev, df=df)[1]
+ if compression_mode == COMP_MODE_PLAIN:
+ return data
+ elif compression_mode == COMP_MODE_INLINE:
+ return self.decompress(data)
+ else:
+ msg = 'unknown compression mode %d'
+ msg %= compression_mode
+ raise error.RevlogError(msg)
def _chunks(self, revs, df=None, targetsize=None):
"""Obtain decompressed chunks for the specified revisions.
@@ -1810,8 +1820,16 @@
if inline:
chunkstart += (rev + 1) * iosize
chunklength = length(rev)
+ comp_mode = self.index[rev][10]
c = buffer(data, chunkstart - offset, chunklength)
- ladd(decomp(c))
+ if comp_mode == COMP_MODE_PLAIN:
+ ladd(c)
+ elif comp_mode == COMP_MODE_INLINE:
+ ladd(decomp(c))
+ else:
+ msg = 'unknown compression mode %d'
+ msg %= comp_mode
+ raise error.RevlogError(msg)
return l
@@ -2461,6 +2479,20 @@
deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
+ compression_mode = COMP_MODE_INLINE
+ if self._docket is not None:
+ h, d = deltainfo.data
+ if not h and not d:
+ # not data to store at all... declare them uncompressed
+ compression_mode = COMP_MODE_PLAIN
+ elif not h and d[0:1] == b'\0':
+ compression_mode = COMP_MODE_PLAIN
+ elif h == b'u':
+ # we have a more efficient way to declare uncompressed
+ h = b''
+ compression_mode = COMP_MODE_PLAIN
+ deltainfo = deltautil.drop_u_compression(deltainfo)
+
if sidedata and self.hassidedata:
serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
sidedata_offset = offset + deltainfo.deltalen
@@ -2482,7 +2514,7 @@
node,
sidedata_offset,
len(serialized_sidedata),
- COMP_MODE_INLINE,
+ compression_mode,
)
self.index.append(e)
--- a/mercurial/revlogutils/constants.py Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlogutils/constants.py Mon May 03 19:46:25 2021 +0200
@@ -119,6 +119,10 @@
# These constants are used in revlog version >=2 to denote the compression used
# for a chunk.
+# Chunk use no compression, the data stored on disk can be directly use as
+# chunk value. Without any header information prefixed.
+COMP_MODE_PLAIN = 0
+
# Chunk use a compression mode stored "inline" at the start of the chunk
# itself. This is the mode always used for revlog version "0" and "1"
COMP_MODE_INLINE = 2
--- a/mercurial/revlogutils/deltas.py Mon May 03 20:22:34 2021 +0200
+++ b/mercurial/revlogutils/deltas.py Mon May 03 19:46:25 2021 +0200
@@ -553,6 +553,24 @@
snapshotdepth = attr.ib()
+def drop_u_compression(delta):
+ """turn into a "u" (no-compression) into no-compression without header
+
+ This is useful for revlog format that has better compression method.
+ """
+ assert delta.data[0] == b'u', delta.data[0]
+ return _deltainfo(
+ delta.distance,
+ delta.deltalen - 1,
+ (b'', delta.data[1]),
+ delta.base,
+ delta.chainbase,
+ delta.chainlen,
+ delta.compresseddeltalen,
+ delta.snapshotdepth,
+ )
+
+
def isgooddeltainfo(revlog, deltainfo, revinfo):
"""Returns True if the given delta is good. Good means that it is within
the disk span, disk size, and chain length bounds that we know to be