Mercurial > hg
changeset 47256:2b69555e4875
revlog: introduce a compression mode for sidedata in the revlog index
We will use this for compression for the sidedata payload.
Differential Revision: https://phab.mercurial-scm.org/D10653
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 03 May 2021 21:34:02 +0200 |
parents | ff9fd7107d11 |
children | 87d057137f82 |
files | mercurial/bundlerepo.py mercurial/cext/revlog.c mercurial/pure/parsers.py mercurial/revlog.py mercurial/revlogutils/revlogv0.py mercurial/unionrepo.py tests/test-parseindex2.py |
diffstat | 7 files changed, 72 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/bundlerepo.py Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/bundlerepo.py Mon May 03 21:34:02 2021 +0200 @@ -106,6 +106,7 @@ 0, 0, revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n)
--- a/mercurial/cext/revlog.c Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/cext/revlog.c Mon May 03 21:34:02 2021 +0200 @@ -118,9 +118,9 @@ static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB"); +static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB"); #else -static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB"); +static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ @@ -296,7 +296,7 @@ uint64_t offset_flags, sidedata_offset; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, sidedata_comp_len; - char data_comp_mode; + char data_comp_mode, sidedata_comp_mode; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -339,16 +339,18 @@ sidedata_offset = 0; sidedata_comp_len = 0; data_comp_mode = comp_mode_inline; + sidedata_comp_mode = comp_mode_inline; } else { sidedata_offset = getbe64(data + 64); sidedata_comp_len = getbe32(data + 72); - data_comp_mode = data[76]; + data_comp_mode = data[76] & 3; + sidedata_comp_mode = ((data[76] >> 2) & 3); } return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, self->nodelen, sidedata_offset, sidedata_comp_len, - data_comp_mode); + data_comp_mode, sidedata_comp_mode); } /* * Pack header information in binary @@ -449,16 +451,17 @@ { uint64_t offset_flags, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; - char data_comp_mode; + char data_comp_mode, sidedata_comp_mode; Py_ssize_t c_node_id_len, sidedata_comp_len; const char *c_node_id; + char comp_field; char *data; if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2, &c_node_id, &c_node_id_len, &sidedata_offset, &sidedata_comp_len, - &data_comp_mode)) { + &data_comp_mode, &sidedata_comp_mode)) { PyErr_SetString(PyExc_TypeError, "11-tuple required"); return NULL; } @@ -467,12 +470,20 @@ PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; } - if (self->format_version == format_v1 && - data_comp_mode != comp_mode_inline) { - PyErr_Format(PyExc_ValueError, - "invalid data compression mode: %i", - data_comp_mode); - return NULL; + if (self->format_version == format_v1) { + + if (data_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid data compression mode: %i", + data_comp_mode); + return NULL; + } + if (sidedata_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid sidedata compression mode: %i", + sidedata_comp_mode); + return NULL; + } } if (self->new_length == self->added_length) { @@ -501,7 +512,9 @@ if (self->format_version == format_v2) { putbe64(sidedata_offset, data + 64); putbe32(sidedata_comp_len, data + 72); - data[76] = (char)data_comp_mode; + comp_field = data_comp_mode & 3; + comp_field = comp_field | (sidedata_comp_mode & 3) << 2; + data[76] = comp_field; /* Padding for 96 bytes alignment */ memset(data + 77, 0, self->entry_size - 77); } @@ -2777,9 +2790,9 @@ self->entry_size = v1_entry_size; } - self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0, - 0, 0, -1, -1, -1, -1, nullid, - self->nodelen, 0, 0, comp_mode_inline); + self->nullentry = Py_BuildValue( + PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1, + nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline); if (!self->nullentry) return -1;
--- a/mercurial/pure/parsers.py Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/pure/parsers.py Mon May 03 21:34:02 2021 +0200 @@ -66,6 +66,7 @@ 0, 0, revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) @util.propertycache @@ -147,7 +148,12 @@ def _unpack_entry(self, data): r = self.index_format.unpack(data) - r = r + (0, 0, revlog_constants.COMP_MODE_INLINE) + r = r + ( + 0, + 0, + revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, + ) return r def pack_header(self, header): @@ -315,10 +321,19 @@ self._extra[rev - self._lgt] = new def _unpack_entry(self, data): - return self.index_format.unpack(data) + data = self.index_format.unpack(data) + entry = data[:10] + data_comp = data[10] & 3 + sidedata_comp = (data[10] & (3 << 2)) >> 2 + return entry + (data_comp, sidedata_comp) def _pack_entry(self, entry): - return self.index_format.pack(*entry[:11]) + data = entry[:10] + data_comp = entry[10] & 3 + sidedata_comp = (entry[11] & 3) << 2 + data += (data_comp | sidedata_comp,) + + return self.index_format.pack(*data) def entry_binary(self, rev): """return the raw binary string representing a revision"""
--- a/mercurial/revlog.py Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/revlog.py Mon May 03 21:34:02 2021 +0200 @@ -345,6 +345,9 @@ (see "COMP_MODE_*" constants for details). For revlog version 0 and 1 this will always be COMP_MODE_INLINE. + [11] side-data compression mode: + two bits that detail the way the sidedata chunk is compressed on disk. + (see "COMP_MODE_*" constants for details) """ _flagserrorclass = error.RevlogError @@ -2517,7 +2520,9 @@ compression_mode = COMP_MODE_PLAIN deltainfo = deltautil.drop_u_compression(deltainfo) + sidedata_compression_mode = COMP_MODE_INLINE if sidedata and self.hassidedata: + sidedata_compression_mode = COMP_MODE_PLAIN serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) sidedata_offset = offset + deltainfo.deltalen else: @@ -2539,6 +2544,7 @@ sidedata_offset, len(serialized_sidedata), compression_mode, + sidedata_compression_mode, ) self.index.append(e)
--- a/mercurial/revlogutils/revlogv0.py Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/revlogutils/revlogv0.py Mon May 03 21:34:02 2021 +0200 @@ -55,6 +55,7 @@ 0, 0, COMP_MODE_INLINE, + COMP_MODE_INLINE, ) @property
--- a/mercurial/unionrepo.py Mon May 03 21:13:24 2021 +0200 +++ b/mercurial/unionrepo.py Mon May 03 21:34:02 2021 +0200 @@ -70,6 +70,7 @@ _sdo, _sds, _dcm, + _sdcm, ) = rev flags = _start & 0xFFFF @@ -105,6 +106,7 @@ 0, # sidedata offset 0, # sidedata size revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n)
--- a/tests/test-parseindex2.py Mon May 03 21:13:24 2021 +0200 +++ b/tests/test-parseindex2.py Mon May 03 21:34:02 2021 +0200 @@ -52,7 +52,12 @@ cache = (0, data) while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0, constants.COMP_MODE_INLINE) + e = e + ( + 0, + 0, + constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, + ) nodemap[e[7]] = n append(e) n += 1 @@ -62,7 +67,12 @@ else: while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0, constants.COMP_MODE_INLINE) + e = e + ( + 0, + 0, + constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, + ) nodemap[e[7]] = n append(e) n += 1 @@ -257,6 +267,7 @@ 0, 0, constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, ) index, junk = parsers.parse_index2(data_inlined, True) got = index[-1] @@ -291,6 +302,7 @@ 0, 0, constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, ) index.append(e)