Mercurial > hg
changeset 46974:3c9208702db3
revlog: replace revlog._io.size with a new revlog.index.entry_size
The `revlogio` class is mostly a relic from the past. Once in charge of the full
revlog related Input/Output code, that class gradually lost responsibilities to
the point where more IO are now done by `revlog.index` objects or revlog objects
themself. I would like to ultimately remove the `revlogio` class, to do so I
start simple with move the "entry size" information on the index. (The index is
already responsible of the binary unpacking, so it knows the size.
Differential Revision: https://phab.mercurial-scm.org/D10309
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 05 Apr 2021 12:22:25 +0200 |
parents | 92029a43debb |
children | 14ddb1dca2c0 |
files | contrib/perf.py mercurial/cext/revlog.c mercurial/pure/parsers.py mercurial/revlog.py rust/hg-cpython/src/revlog.rs |
diffstat | 5 files changed, 47 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/perf.py Wed Apr 14 20:32:53 2021 +0200 +++ b/contrib/perf.py Mon Apr 05 12:22:25 2021 +0200 @@ -3228,7 +3228,10 @@ start = r.start length = r.length inline = r._inline - iosize = r._io.size + try: + iosize = r.index.entry_size + except AttributeError: + iosize = r._io.size buffer = util.buffer chunks = []
--- a/mercurial/cext/revlog.c Wed Apr 14 20:32:53 2021 +0200 +++ b/mercurial/cext/revlog.c Mon Apr 05 12:22:25 2021 +0200 @@ -15,6 +15,7 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <structmember.h> #include "bitmanipulation.h" #include "charencode.h" @@ -2866,6 +2867,12 @@ {NULL} /* Sentinel */ }; +static PyMemberDef index_members[] = { + {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0, + "size of an index entry"}, + {NULL} /* Sentinel */ +}; + PyTypeObject HgRevlogIndex_Type = { PyVarObject_HEAD_INIT(NULL, 0) /* header */ "parsers.index", /* tp_name */ @@ -2895,7 +2902,7 @@ 0, /* tp_iter */ 0, /* tp_iternext */ index_methods, /* tp_methods */ - 0, /* tp_members */ + index_members, /* tp_members */ index_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */
--- a/mercurial/pure/parsers.py Wed Apr 14 20:32:53 2021 +0200 +++ b/mercurial/pure/parsers.py Mon Apr 05 12:22:25 2021 +0200 @@ -49,11 +49,13 @@ big_int_size = struct.calcsize(b'>Q') # Size of a C long int, platform independent int_size = struct.calcsize(b'>i') - # Size of the entire index format - index_size = revlog_constants.INDEX_ENTRY_V1.size # An empty index entry, used as a default value to be overridden, or nullrev null_item = (0, 0, 0, -1, -1, -1, -1, nullid) + @util.propertycache + def entry_size(self): + return self.index_format.size + @property def nodemap(self): msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" @@ -116,7 +118,7 @@ data = self._extra[i - self._lgt] else: index = self._calculate_index(i) - data = self._data[index : index + self.index_size] + data = self._data[index : index + self.entry_size] r = self.index_format.unpack(data) if self._lgt and i == 0: r = (offset_type(0, gettype(r[0])),) + r[1:] @@ -125,13 +127,13 @@ class IndexObject(BaseIndexObject): def __init__(self, data): - assert len(data) % self.index_size == 0 + assert len(data) % self.entry_size == 0 self._data = data - self._lgt = len(data) // self.index_size + self._lgt = len(data) // self.entry_size self._extra = [] def _calculate_index(self, i): - return i * self.index_size + return i * self.entry_size def __delitem__(self, i): if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: @@ -140,7 +142,7 @@ self._check_index(i) self._stripnodes(i) if i < self._lgt: - self._data = self._data[: i * self.index_size] + self._data = self._data[: i * self.entry_size] self._lgt = i self._extra = [] else: @@ -203,7 +205,7 @@ if lgt is not None: self._offsets = [0] * lgt count = 0 - while off <= len(self._data) - self.index_size: + while off <= len(self._data) - self.entry_size: start = off + self.big_int_size (s,) = struct.unpack( b'>i', @@ -212,7 +214,7 @@ if lgt is not None: self._offsets[count] = off count += 1 - off += self.index_size + s + off += self.entry_size + s if off != len(self._data): raise ValueError(b"corrupted data") return count @@ -244,7 +246,6 @@ class Index2Mixin(object): index_format = revlog_constants.INDEX_ENTRY_V2 - index_size = revlog_constants.INDEX_ENTRY_V2.size null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0) def replace_sidedata_info(self, i, sidedata_offset, sidedata_length): @@ -280,7 +281,7 @@ if lgt is not None: self._offsets = [0] * lgt count = 0 - while off <= len(self._data) - self.index_size: + while off <= len(self._data) - self.entry_size: start = off + self.big_int_size (data_size,) = struct.unpack( b'>i', @@ -293,7 +294,7 @@ if lgt is not None: self._offsets[count] = off count += 1 - off += self.index_size + data_size + side_data_size + off += self.entry_size + data_size + side_data_size if off != len(self._data): raise ValueError(b"corrupted data") return count
--- a/mercurial/revlog.py Wed Apr 14 20:32:53 2021 +0200 +++ b/mercurial/revlog.py Mon Apr 05 12:22:25 2021 +0200 @@ -222,6 +222,8 @@ class revlogoldindex(list): + entry_size = INDEX_ENTRY_V0.size + @property def nodemap(self): msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" @@ -273,11 +275,8 @@ class revlogoldio(object): - def __init__(self): - self.size = INDEX_ENTRY_V0.size - def parseindex(self, data, inline): - s = self.size + s = INDEX_ENTRY_V0.size index = [] nodemap = nodemaputil.NodeMap({nullid: nullrev}) n = off = 0 @@ -334,9 +333,6 @@ class revlogio(object): - def __init__(self): - self.size = INDEX_ENTRY_V1.size - def parseindex(self, data, inline): # call the C implementation to parse the index data index, cache = parsers.parse_index2(data, inline) @@ -350,9 +346,6 @@ class revlogv2io(object): - def __init__(self): - self.size = INDEX_ENTRY_V2.size - def parseindex(self, data, inline): index, cache = parsers.parse_index2(data, inline, revlogv2=True) return index, cache @@ -1716,8 +1709,8 @@ end = int(iend[0] >> 16) + iend[1] if self._inline: - start += (startrev + 1) * self._io.size - end += (endrev + 1) * self._io.size + start += (startrev + 1) * self.index.entry_size + end += (endrev + 1) * self.index.entry_size length = end - start return start, self._getsegment(start, length, df=df) @@ -1751,7 +1744,7 @@ start = self.start length = self.length inline = self._inline - iosize = self._io.size + iosize = self.index.entry_size buffer = util.buffer l = [] @@ -1979,7 +1972,7 @@ sidedata_size = index_entry[9] if self._inline: - sidedata_offset += self._io.size * (1 + rev) + sidedata_offset += self.index.entry_size * (1 + rev) if sidedata_size == 0: return {} @@ -2079,7 +2072,7 @@ # the temp file replace the real index when we exit the context # manager - tr.replace(self.indexfile, trindex * self._io.size) + tr.replace(self.indexfile, trindex * self.index.entry_size) nodemaputil.setup_persistent_nodemap(tr, self) self._chunkclear() @@ -2335,12 +2328,12 @@ # offset is "as if" it were in the .d file, so we need to add on # the size of the entry metadata. self._concurrencychecker( - ifh, self.indexfile, offset + curr * self._io.size + ifh, self.indexfile, offset + curr * self.index.entry_size ) else: # Entries in the .i are a consistent size. self._concurrencychecker( - ifh, self.indexfile, curr * self._io.size + ifh, self.indexfile, curr * self.index.entry_size ) self._concurrencychecker(dfh, self.datafile, offset) @@ -2464,7 +2457,7 @@ dfh.write(sidedata) ifh.write(entry) else: - offset += curr * self._io.size + offset += curr * self.index.entry_size transaction.add(self.indexfile, offset) ifh.write(entry) ifh.write(data[0]) @@ -2502,7 +2495,7 @@ if r: end = self.end(r - 1) ifh = self._indexfp(b"a+") - isize = r * self._io.size + isize = r * self.index.entry_size if self._inline: transaction.add(self.indexfile, end + isize) dfh = None @@ -2658,9 +2651,9 @@ end = self.start(rev) if not self._inline: transaction.add(self.datafile, end) - end = rev * self._io.size + end = rev * self.index.entry_size else: - end += rev * self._io.size + end += rev * self.index.entry_size transaction.add(self.indexfile, end) @@ -2699,7 +2692,7 @@ f.seek(0, io.SEEK_END) actual = f.tell() f.close() - s = self._io.size + s = self.index.entry_size i = max(0, actual // s) di = actual - (i * s) if self._inline: @@ -3241,7 +3234,7 @@ # rewrite the new index entries with self._indexfp(b'w+') as fp: - fp.seek(startrev * self._io.size) + fp.seek(startrev * self.index.entry_size) for i, entry in enumerate(new_entries): rev = startrev + i self.index.replace_sidedata_info(rev, entry[8], entry[9])
--- a/rust/hg-cpython/src/revlog.rs Wed Apr 14 20:32:53 2021 +0200 +++ b/rust/hg-cpython/src/revlog.rs Mon Apr 05 12:22:25 2021 +0200 @@ -12,8 +12,8 @@ use cpython::{ buffer::{Element, PyBuffer}, exc::{IndexError, ValueError}, - ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject, - PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, + ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, + PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ nodemap::{Block, NodeMapError, NodeTree}, @@ -285,6 +285,10 @@ self.inner_update_nodemap_data(py, docket, nm_data) } + @property + def entry_size(&self) -> PyResult<PyInt> { + self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) + } });