revlog: introduce a compression mode for sidedata in the revlog index
We will use this for compression for the sidedata payload.
Differential Revision: https://phab.mercurial-scm.org/D10653
--- a/mercurial/bundlerepo.py Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/bundlerepo.py Mon May 03 21:34:02 2021 +0200
@@ -106,6 +106,7 @@
0,
0,
revlog_constants.COMP_MODE_INLINE,
+ revlog_constants.COMP_MODE_INLINE,
)
self.index.append(e)
self.bundlerevs.add(n)
--- a/mercurial/cext/revlog.c Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/cext/revlog.c Mon May 03 21:34:02 2021 +0200
@@ -118,9 +118,9 @@
static int index_find_node(indexObject *self, const char *node);
#if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
+static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
#else
-static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
+static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
#endif
/* A RevlogNG v1 index entry is 64 bytes long. */
@@ -296,7 +296,7 @@
uint64_t offset_flags, sidedata_offset;
int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
sidedata_comp_len;
- char data_comp_mode;
+ char data_comp_mode, sidedata_comp_mode;
const char *c_node_id;
const char *data;
Py_ssize_t length = index_length(self);
@@ -339,16 +339,18 @@
sidedata_offset = 0;
sidedata_comp_len = 0;
data_comp_mode = comp_mode_inline;
+ sidedata_comp_mode = comp_mode_inline;
} else {
sidedata_offset = getbe64(data + 64);
sidedata_comp_len = getbe32(data + 72);
- data_comp_mode = data[76];
+ data_comp_mode = data[76] & 3;
+ sidedata_comp_mode = ((data[76] >> 2) & 3);
}
return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
base_rev, link_rev, parent_1, parent_2, c_node_id,
self->nodelen, sidedata_offset, sidedata_comp_len,
- data_comp_mode);
+ data_comp_mode, sidedata_comp_mode);
}
/*
* Pack header information in binary
@@ -449,16 +451,17 @@
{
uint64_t offset_flags, sidedata_offset;
int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
- char data_comp_mode;
+ char data_comp_mode, sidedata_comp_mode;
Py_ssize_t c_node_id_len, sidedata_comp_len;
const char *c_node_id;
+ char comp_field;
char *data;
if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
&uncomp_len, &base_rev, &link_rev, &parent_1,
&parent_2, &c_node_id, &c_node_id_len,
&sidedata_offset, &sidedata_comp_len,
- &data_comp_mode)) {
+ &data_comp_mode, &sidedata_comp_mode)) {
PyErr_SetString(PyExc_TypeError, "11-tuple required");
return NULL;
}
@@ -467,12 +470,20 @@
PyErr_SetString(PyExc_TypeError, "invalid node");
return NULL;
}
- if (self->format_version == format_v1 &&
- data_comp_mode != comp_mode_inline) {
- PyErr_Format(PyExc_ValueError,
- "invalid data compression mode: %i",
- data_comp_mode);
- return NULL;
+ if (self->format_version == format_v1) {
+
+ if (data_comp_mode != comp_mode_inline) {
+ PyErr_Format(PyExc_ValueError,
+ "invalid data compression mode: %i",
+ data_comp_mode);
+ return NULL;
+ }
+ if (sidedata_comp_mode != comp_mode_inline) {
+ PyErr_Format(PyExc_ValueError,
+ "invalid sidedata compression mode: %i",
+ sidedata_comp_mode);
+ return NULL;
+ }
}
if (self->new_length == self->added_length) {
@@ -501,7 +512,9 @@
if (self->format_version == format_v2) {
putbe64(sidedata_offset, data + 64);
putbe32(sidedata_comp_len, data + 72);
- data[76] = (char)data_comp_mode;
+ comp_field = data_comp_mode & 3;
+ comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
+ data[76] = comp_field;
/* Padding for 96 bytes alignment */
memset(data + 77, 0, self->entry_size - 77);
}
@@ -2777,9 +2790,9 @@
self->entry_size = v1_entry_size;
}
- self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
- 0, 0, -1, -1, -1, -1, nullid,
- self->nodelen, 0, 0, comp_mode_inline);
+ self->nullentry = Py_BuildValue(
+ PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
+ nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
if (!self->nullentry)
return -1;
--- a/mercurial/pure/parsers.py Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/pure/parsers.py Mon May 03 21:34:02 2021 +0200
@@ -66,6 +66,7 @@
0,
0,
revlog_constants.COMP_MODE_INLINE,
+ revlog_constants.COMP_MODE_INLINE,
)
@util.propertycache
@@ -147,7 +148,12 @@
def _unpack_entry(self, data):
r = self.index_format.unpack(data)
- r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
+ r = r + (
+ 0,
+ 0,
+ revlog_constants.COMP_MODE_INLINE,
+ revlog_constants.COMP_MODE_INLINE,
+ )
return r
def pack_header(self, header):
@@ -315,10 +321,19 @@
self._extra[rev - self._lgt] = new
def _unpack_entry(self, data):
- return self.index_format.unpack(data)
+ data = self.index_format.unpack(data)
+ entry = data[:10]
+ data_comp = data[10] & 3
+ sidedata_comp = (data[10] & (3 << 2)) >> 2
+ return entry + (data_comp, sidedata_comp)
def _pack_entry(self, entry):
- return self.index_format.pack(*entry[:11])
+ data = entry[:10]
+ data_comp = entry[10] & 3
+ sidedata_comp = (entry[11] & 3) << 2
+ data += (data_comp | sidedata_comp,)
+
+ return self.index_format.pack(*data)
def entry_binary(self, rev):
"""return the raw binary string representing a revision"""
--- a/mercurial/revlog.py Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/revlog.py Mon May 03 21:34:02 2021 +0200
@@ -345,6 +345,9 @@
(see "COMP_MODE_*" constants for details). For revlog version 0 and
1 this will always be COMP_MODE_INLINE.
+ [11] side-data compression mode:
+ two bits that detail the way the sidedata chunk is compressed on disk.
+ (see "COMP_MODE_*" constants for details)
"""
_flagserrorclass = error.RevlogError
@@ -2517,7 +2520,9 @@
compression_mode = COMP_MODE_PLAIN
deltainfo = deltautil.drop_u_compression(deltainfo)
+ sidedata_compression_mode = COMP_MODE_INLINE
if sidedata and self.hassidedata:
+ sidedata_compression_mode = COMP_MODE_PLAIN
serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
sidedata_offset = offset + deltainfo.deltalen
else:
@@ -2539,6 +2544,7 @@
sidedata_offset,
len(serialized_sidedata),
compression_mode,
+ sidedata_compression_mode,
)
self.index.append(e)
--- a/mercurial/revlogutils/revlogv0.py Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/revlogutils/revlogv0.py Mon May 03 21:34:02 2021 +0200
@@ -55,6 +55,7 @@
0,
0,
COMP_MODE_INLINE,
+ COMP_MODE_INLINE,
)
@property
--- a/mercurial/unionrepo.py Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/unionrepo.py Mon May 03 21:34:02 2021 +0200
@@ -70,6 +70,7 @@
_sdo,
_sds,
_dcm,
+ _sdcm,
) = rev
flags = _start & 0xFFFF
@@ -105,6 +106,7 @@
0, # sidedata offset
0, # sidedata size
revlog_constants.COMP_MODE_INLINE,
+ revlog_constants.COMP_MODE_INLINE,
)
self.index.append(e)
self.bundlerevs.add(n)
--- a/tests/test-parseindex2.py Mon May 03 21:13:24 2021 +0200
+++ b/tests/test-parseindex2.py Mon May 03 21:34:02 2021 +0200
@@ -52,7 +52,12 @@
cache = (0, data)
while off <= l:
e = struct.unpack(indexformatng, data[off : off + s])
- e = e + (0, 0, constants.COMP_MODE_INLINE)
+ e = e + (
+ 0,
+ 0,
+ constants.COMP_MODE_INLINE,
+ constants.COMP_MODE_INLINE,
+ )
nodemap[e[7]] = n
append(e)
n += 1
@@ -62,7 +67,12 @@
else:
while off <= l:
e = struct.unpack(indexformatng, data[off : off + s])
- e = e + (0, 0, constants.COMP_MODE_INLINE)
+ e = e + (
+ 0,
+ 0,
+ constants.COMP_MODE_INLINE,
+ constants.COMP_MODE_INLINE,
+ )
nodemap[e[7]] = n
append(e)
n += 1
@@ -257,6 +267,7 @@
0,
0,
constants.COMP_MODE_INLINE,
+ constants.COMP_MODE_INLINE,
)
index, junk = parsers.parse_index2(data_inlined, True)
got = index[-1]
@@ -291,6 +302,7 @@
0,
0,
constants.COMP_MODE_INLINE,
+ constants.COMP_MODE_INLINE,
)
index.append(e)