Mercurial > hg
changeset 47253:b876f0bf7366
revlog: introduce a plain compression mode
That mode is simple it means the chunk contains uncompressed data and can be
used directly.
Differential Revision: https://phab.mercurial-scm.org/D10650
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 03 May 2021 19:46:25 +0200 |
parents | e340b556a13e |
children | eac3591abbf4 |
files | mercurial/revlog.py mercurial/revlogutils/constants.py mercurial/revlogutils/deltas.py |
diffstat | 3 files changed, 57 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlog.py Mon May 03 20:22:34 2021 +0200 +++ b/mercurial/revlog.py Mon May 03 19:46:25 2021 +0200 @@ -36,6 +36,7 @@ from .revlogutils.constants import ( ALL_KINDS, COMP_MODE_INLINE, + COMP_MODE_PLAIN, FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, @@ -1757,7 +1758,16 @@ Returns a str holding uncompressed data for the requested revision. """ - return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) + compression_mode = self.index[rev][10] + data = self._getsegmentforrevs(rev, rev, df=df)[1] + if compression_mode == COMP_MODE_PLAIN: + return data + elif compression_mode == COMP_MODE_INLINE: + return self.decompress(data) + else: + msg = 'unknown compression mode %d' + msg %= compression_mode + raise error.RevlogError(msg) def _chunks(self, revs, df=None, targetsize=None): """Obtain decompressed chunks for the specified revisions. @@ -1810,8 +1820,16 @@ if inline: chunkstart += (rev + 1) * iosize chunklength = length(rev) + comp_mode = self.index[rev][10] c = buffer(data, chunkstart - offset, chunklength) - ladd(decomp(c)) + if comp_mode == COMP_MODE_PLAIN: + ladd(c) + elif comp_mode == COMP_MODE_INLINE: + ladd(decomp(c)) + else: + msg = 'unknown compression mode %d' + msg %= comp_mode + raise error.RevlogError(msg) return l @@ -2461,6 +2479,20 @@ deltainfo = deltacomputer.finddeltainfo(revinfo, fh) + compression_mode = COMP_MODE_INLINE + if self._docket is not None: + h, d = deltainfo.data + if not h and not d: + # not data to store at all... declare them uncompressed + compression_mode = COMP_MODE_PLAIN + elif not h and d[0:1] == b'\0': + compression_mode = COMP_MODE_PLAIN + elif h == b'u': + # we have a more efficient way to declare uncompressed + h = b'' + compression_mode = COMP_MODE_PLAIN + deltainfo = deltautil.drop_u_compression(deltainfo) + if sidedata and self.hassidedata: serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) sidedata_offset = offset + deltainfo.deltalen @@ -2482,7 +2514,7 @@ node, sidedata_offset, len(serialized_sidedata), - COMP_MODE_INLINE, + compression_mode, ) self.index.append(e)
--- a/mercurial/revlogutils/constants.py Mon May 03 20:22:34 2021 +0200 +++ b/mercurial/revlogutils/constants.py Mon May 03 19:46:25 2021 +0200 @@ -119,6 +119,10 @@ # These constants are used in revlog version >=2 to denote the compression used # for a chunk. +# Chunk use no compression, the data stored on disk can be directly use as +# chunk value. Without any header information prefixed. +COMP_MODE_PLAIN = 0 + # Chunk use a compression mode stored "inline" at the start of the chunk # itself. This is the mode always used for revlog version "0" and "1" COMP_MODE_INLINE = 2
--- a/mercurial/revlogutils/deltas.py Mon May 03 20:22:34 2021 +0200 +++ b/mercurial/revlogutils/deltas.py Mon May 03 19:46:25 2021 +0200 @@ -553,6 +553,24 @@ snapshotdepth = attr.ib() +def drop_u_compression(delta): + """turn into a "u" (no-compression) into no-compression without header + + This is useful for revlog format that has better compression method. + """ + assert delta.data[0] == b'u', delta.data[0] + return _deltainfo( + delta.distance, + delta.deltalen - 1, + (b'', delta.data[1]), + delta.base, + delta.chainbase, + delta.chainlen, + delta.compresseddeltalen, + delta.snapshotdepth, + ) + + def isgooddeltainfo(revlog, deltainfo, revinfo): """Returns True if the given delta is good. Good means that it is within the disk span, disk size, and chain length bounds that we know to be