revlog: add a `entry_binary` method on index
The revlog index is already responsible for unpacking the binary entry, it would be
simpler to make it responsible for packing them. In practice the C version of
the index is already doing this internally.
We introduce a "entry_binary" method that return the binary version of an
existing revision. The method currently need to also take the revlog header to
deal with the "first revision" special case. We will introduce further refactor
in a later changeset to split that logic out.
Differential Revision: https://phab.mercurial-scm.org/D10508
--- a/mercurial/cext/revlog.c Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/cext/revlog.c Thu Apr 08 00:01:11 2021 +0200
@@ -342,6 +342,38 @@
sidedata_offset, sidedata_comp_len);
}
}
+/*
+ * Return the raw binary string representing a revision
+ */
+static PyObject *index_entry_binary(indexObject *self, PyObject *args)
+{
+ long rev;
+ int header;
+ const char *data;
+ char entry[v2_hdrsize];
+
+ Py_ssize_t length = index_length(self);
+
+ if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
+ return NULL;
+ }
+ if (rev < 0 || rev >= length) {
+ PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
+ rev);
+ return NULL;
+ };
+
+ data = index_deref(self, rev);
+ if (data == NULL)
+ return NULL;
+ if (rev == 0) {
+ // put the header at the start of the first entry
+ memcpy(entry, data, self->hdrsize);
+ putbe32(header, entry);
+ return PyBytes_FromStringAndSize(entry, self->hdrsize);
+ }
+ return PyBytes_FromStringAndSize(data, self->hdrsize);
+}
/*
* Return the hash of node corresponding to the given rev.
@@ -2859,6 +2891,8 @@
{"shortest", (PyCFunction)index_shortest, METH_VARARGS,
"find length of shortest hex nodeid of a binary ID"},
{"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
+ {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
+ "return an entry in binary form"},
{NULL} /* Sentinel */
};
--- a/mercurial/pure/parsers.py Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/pure/parsers.py Thu Apr 08 00:01:11 2021 +0200
@@ -127,10 +127,24 @@
r = (offset_type(0, gettype(r[0])),) + r[1:]
return r
+ def entry_binary(self, rev, header):
+ """return the raw binary string representing a revision"""
+ entry = self[rev]
+ p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
+ if rev == 0:
+ v_fmt = revlog_constants.INDEX_HEADER
+ v_bin = v_fmt.pack(header)
+ p = v_bin + p[v_fmt.size :]
+ return p
+
class IndexObject(BaseIndexObject):
def __init__(self, data):
- assert len(data) % self.entry_size == 0
+ assert len(data) % self.entry_size == 0, (
+ len(data),
+ self.entry_size,
+ len(data) % self.entry_size,
+ )
self._data = data
self._lgt = len(data) // self.entry_size
self._extra = []
@@ -272,6 +286,16 @@
msg = b"cannot rewrite entries outside of this transaction"
raise KeyError(msg)
+ def entry_binary(self, rev, header):
+ """return the raw binary string representing a revision"""
+ entry = self[rev]
+ p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
+ if rev == 0:
+ v_fmt = revlog_constants.INDEX_HEADER
+ v_bin = v_fmt.pack(header)
+ p = v_bin + p[v_fmt.size :]
+ return p
+
class IndexObject2(Index2Mixin, IndexObject):
pass
--- a/mercurial/revlog.py Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/revlog.py Thu Apr 08 00:01:11 2021 +0200
@@ -268,6 +268,24 @@
return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
return list.__getitem__(self, i)
+ def entry_binary(self, rev, header):
+ """return the raw binary string representing a revision"""
+ entry = self[rev]
+ if gettype(entry[0]):
+ raise error.RevlogError(
+ _(b'index entry flags need revlog version 1')
+ )
+ e2 = (
+ getoffset(entry[0]),
+ entry[1],
+ entry[3],
+ entry[4],
+ self[entry[5]][7],
+ self[entry[6]][7],
+ entry[7],
+ )
+ return INDEX_ENTRY_V0.pack(*e2)
+
class revlogoldio(object):
def parseindex(self, data, inline):
@@ -298,29 +316,6 @@
index = revlogoldindex(index)
return index, None
- def packentry(self, entry, node, version, rev):
- """return the binary representation of an entry
-
- entry: a tuple containing all the values (see index.__getitem__)
- node: a callback to convert a revision to nodeid
- version: the changelog version
- rev: the revision number
- """
- if gettype(entry[0]):
- raise error.RevlogError(
- _(b'index entry flags need revlog version 1')
- )
- e2 = (
- getoffset(entry[0]),
- entry[1],
- entry[3],
- entry[4],
- node(entry[5]),
- node(entry[6]),
- entry[7],
- )
- return INDEX_ENTRY_V0.pack(*e2)
-
# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
# signed integer)
@@ -333,24 +328,12 @@
index, cache = parsers.parse_index2(data, inline)
return index, cache
- def packentry(self, entry, node, version, rev):
- p = INDEX_ENTRY_V1.pack(*entry)
- if rev == 0:
- p = INDEX_HEADER.pack(version) + p[4:]
- return p
-
class revlogv2io(object):
def parseindex(self, data, inline):
index, cache = parsers.parse_index2(data, inline, revlogv2=True)
return index, cache
- def packentry(self, entry, node, version, rev):
- p = INDEX_ENTRY_V2.pack(*entry)
- if rev == 0:
- p = INDEX_HEADER.pack(version) + p[4:]
- return p
-
NodemapRevlogIO = None
@@ -2068,7 +2051,7 @@
self._inline = False
io = self._io
for i in self:
- e = io.packentry(self.index[i], self.node, self.version, i)
+ e = self.index.entry_binary(i, self.version)
fp.write(e)
# the temp file replace the real index when we exit the context
@@ -2390,7 +2373,7 @@
e = e[:8]
self.index.append(e)
- entry = self._io.packentry(e, self.node, self.version, curr)
+ entry = self.index.entry_binary(curr, self.version)
self._writeentry(
transaction,
ifh,
@@ -3243,5 +3226,5 @@
for i, entry in enumerate(new_entries):
rev = startrev + i
self.index.replace_sidedata_info(rev, entry[8], entry[9])
- packed = self._io.packentry(entry, self.node, self.version, rev)
+ packed = self.index.entry_binary(rev, self.version)
fp.write(packed)
--- a/rust/hg-cpython/src/revlog.rs Thu Apr 15 12:08:34 2021 +0200
+++ b/rust/hg-cpython/src/revlog.rs Thu Apr 08 00:01:11 2021 +0200
@@ -172,6 +172,11 @@
self.call_cindex(py, "clearcaches", args, kw)
}
+ /// return the raw binary string representing a revision
+ def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
+ self.call_cindex(py, "entry_binary", args, kw)
+ }
+
/// get an index entry
def get(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "get", args, kw)