revlog: replace revlog._io.size with a new revlog.index.entry_size
The `revlogio` class is mostly a relic from the past. Once in charge of the full
revlog related Input/Output code, that class gradually lost responsibilities to
the point where more IO are now done by `revlog.index` objects or revlog objects
themself. I would like to ultimately remove the `revlogio` class, to do so I
start simple with move the "entry size" information on the index. (The index is
already responsible of the binary unpacking, so it knows the size.
Differential Revision: https://phab.mercurial-scm.org/D10309
--- a/contrib/perf.py Wed Apr 14 20:32:53 2021 +0200
+++ b/contrib/perf.py Mon Apr 05 12:22:25 2021 +0200
@@ -3228,7 +3228,10 @@
start = r.start
length = r.length
inline = r._inline
- iosize = r._io.size
+ try:
+ iosize = r.index.entry_size
+ except AttributeError:
+ iosize = r._io.size
buffer = util.buffer
chunks = []
--- a/mercurial/cext/revlog.c Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/cext/revlog.c Mon Apr 05 12:22:25 2021 +0200
@@ -15,6 +15,7 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <structmember.h>
#include "bitmanipulation.h"
#include "charencode.h"
@@ -2866,6 +2867,12 @@
{NULL} /* Sentinel */
};
+static PyMemberDef index_members[] = {
+ {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
+ "size of an index entry"},
+ {NULL} /* Sentinel */
+};
+
PyTypeObject HgRevlogIndex_Type = {
PyVarObject_HEAD_INIT(NULL, 0) /* header */
"parsers.index", /* tp_name */
@@ -2895,7 +2902,7 @@
0, /* tp_iter */
0, /* tp_iternext */
index_methods, /* tp_methods */
- 0, /* tp_members */
+ index_members, /* tp_members */
index_getset, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
--- a/mercurial/pure/parsers.py Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/pure/parsers.py Mon Apr 05 12:22:25 2021 +0200
@@ -49,11 +49,13 @@
big_int_size = struct.calcsize(b'>Q')
# Size of a C long int, platform independent
int_size = struct.calcsize(b'>i')
- # Size of the entire index format
- index_size = revlog_constants.INDEX_ENTRY_V1.size
# An empty index entry, used as a default value to be overridden, or nullrev
null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
+ @util.propertycache
+ def entry_size(self):
+ return self.index_format.size
+
@property
def nodemap(self):
msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -116,7 +118,7 @@
data = self._extra[i - self._lgt]
else:
index = self._calculate_index(i)
- data = self._data[index : index + self.index_size]
+ data = self._data[index : index + self.entry_size]
r = self.index_format.unpack(data)
if self._lgt and i == 0:
r = (offset_type(0, gettype(r[0])),) + r[1:]
@@ -125,13 +127,13 @@
class IndexObject(BaseIndexObject):
def __init__(self, data):
- assert len(data) % self.index_size == 0
+ assert len(data) % self.entry_size == 0
self._data = data
- self._lgt = len(data) // self.index_size
+ self._lgt = len(data) // self.entry_size
self._extra = []
def _calculate_index(self, i):
- return i * self.index_size
+ return i * self.entry_size
def __delitem__(self, i):
if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
@@ -140,7 +142,7 @@
self._check_index(i)
self._stripnodes(i)
if i < self._lgt:
- self._data = self._data[: i * self.index_size]
+ self._data = self._data[: i * self.entry_size]
self._lgt = i
self._extra = []
else:
@@ -203,7 +205,7 @@
if lgt is not None:
self._offsets = [0] * lgt
count = 0
- while off <= len(self._data) - self.index_size:
+ while off <= len(self._data) - self.entry_size:
start = off + self.big_int_size
(s,) = struct.unpack(
b'>i',
@@ -212,7 +214,7 @@
if lgt is not None:
self._offsets[count] = off
count += 1
- off += self.index_size + s
+ off += self.entry_size + s
if off != len(self._data):
raise ValueError(b"corrupted data")
return count
@@ -244,7 +246,6 @@
class Index2Mixin(object):
index_format = revlog_constants.INDEX_ENTRY_V2
- index_size = revlog_constants.INDEX_ENTRY_V2.size
null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
@@ -280,7 +281,7 @@
if lgt is not None:
self._offsets = [0] * lgt
count = 0
- while off <= len(self._data) - self.index_size:
+ while off <= len(self._data) - self.entry_size:
start = off + self.big_int_size
(data_size,) = struct.unpack(
b'>i',
@@ -293,7 +294,7 @@
if lgt is not None:
self._offsets[count] = off
count += 1
- off += self.index_size + data_size + side_data_size
+ off += self.entry_size + data_size + side_data_size
if off != len(self._data):
raise ValueError(b"corrupted data")
return count
--- a/mercurial/revlog.py Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/revlog.py Mon Apr 05 12:22:25 2021 +0200
@@ -222,6 +222,8 @@
class revlogoldindex(list):
+ entry_size = INDEX_ENTRY_V0.size
+
@property
def nodemap(self):
msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -273,11 +275,8 @@
class revlogoldio(object):
- def __init__(self):
- self.size = INDEX_ENTRY_V0.size
-
def parseindex(self, data, inline):
- s = self.size
+ s = INDEX_ENTRY_V0.size
index = []
nodemap = nodemaputil.NodeMap({nullid: nullrev})
n = off = 0
@@ -334,9 +333,6 @@
class revlogio(object):
- def __init__(self):
- self.size = INDEX_ENTRY_V1.size
-
def parseindex(self, data, inline):
# call the C implementation to parse the index data
index, cache = parsers.parse_index2(data, inline)
@@ -350,9 +346,6 @@
class revlogv2io(object):
- def __init__(self):
- self.size = INDEX_ENTRY_V2.size
-
def parseindex(self, data, inline):
index, cache = parsers.parse_index2(data, inline, revlogv2=True)
return index, cache
@@ -1716,8 +1709,8 @@
end = int(iend[0] >> 16) + iend[1]
if self._inline:
- start += (startrev + 1) * self._io.size
- end += (endrev + 1) * self._io.size
+ start += (startrev + 1) * self.index.entry_size
+ end += (endrev + 1) * self.index.entry_size
length = end - start
return start, self._getsegment(start, length, df=df)
@@ -1751,7 +1744,7 @@
start = self.start
length = self.length
inline = self._inline
- iosize = self._io.size
+ iosize = self.index.entry_size
buffer = util.buffer
l = []
@@ -1979,7 +1972,7 @@
sidedata_size = index_entry[9]
if self._inline:
- sidedata_offset += self._io.size * (1 + rev)
+ sidedata_offset += self.index.entry_size * (1 + rev)
if sidedata_size == 0:
return {}
@@ -2079,7 +2072,7 @@
# the temp file replace the real index when we exit the context
# manager
- tr.replace(self.indexfile, trindex * self._io.size)
+ tr.replace(self.indexfile, trindex * self.index.entry_size)
nodemaputil.setup_persistent_nodemap(tr, self)
self._chunkclear()
@@ -2335,12 +2328,12 @@
# offset is "as if" it were in the .d file, so we need to add on
# the size of the entry metadata.
self._concurrencychecker(
- ifh, self.indexfile, offset + curr * self._io.size
+ ifh, self.indexfile, offset + curr * self.index.entry_size
)
else:
# Entries in the .i are a consistent size.
self._concurrencychecker(
- ifh, self.indexfile, curr * self._io.size
+ ifh, self.indexfile, curr * self.index.entry_size
)
self._concurrencychecker(dfh, self.datafile, offset)
@@ -2464,7 +2457,7 @@
dfh.write(sidedata)
ifh.write(entry)
else:
- offset += curr * self._io.size
+ offset += curr * self.index.entry_size
transaction.add(self.indexfile, offset)
ifh.write(entry)
ifh.write(data[0])
@@ -2502,7 +2495,7 @@
if r:
end = self.end(r - 1)
ifh = self._indexfp(b"a+")
- isize = r * self._io.size
+ isize = r * self.index.entry_size
if self._inline:
transaction.add(self.indexfile, end + isize)
dfh = None
@@ -2658,9 +2651,9 @@
end = self.start(rev)
if not self._inline:
transaction.add(self.datafile, end)
- end = rev * self._io.size
+ end = rev * self.index.entry_size
else:
- end += rev * self._io.size
+ end += rev * self.index.entry_size
transaction.add(self.indexfile, end)
@@ -2699,7 +2692,7 @@
f.seek(0, io.SEEK_END)
actual = f.tell()
f.close()
- s = self._io.size
+ s = self.index.entry_size
i = max(0, actual // s)
di = actual - (i * s)
if self._inline:
@@ -3241,7 +3234,7 @@
# rewrite the new index entries
with self._indexfp(b'w+') as fp:
- fp.seek(startrev * self._io.size)
+ fp.seek(startrev * self.index.entry_size)
for i, entry in enumerate(new_entries):
rev = startrev + i
self.index.replace_sidedata_info(rev, entry[8], entry[9])
--- a/rust/hg-cpython/src/revlog.rs Wed Apr 14 20:32:53 2021 +0200
+++ b/rust/hg-cpython/src/revlog.rs Mon Apr 05 12:22:25 2021 +0200
@@ -12,8 +12,8 @@
use cpython::{
buffer::{Element, PyBuffer},
exc::{IndexError, ValueError},
- ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
- PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
+ ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
+ PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
};
use hg::{
nodemap::{Block, NodeMapError, NodeTree},
@@ -285,6 +285,10 @@
self.inner_update_nodemap_data(py, docket, nm_data)
}
+ @property
+ def entry_size(&self) -> PyResult<PyInt> {
+ self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
+ }
});