changeset 46974:3c9208702db3

revlog: replace revlog._io.size with a new revlog.index.entry_size The `revlogio` class is mostly a relic from the past. Once in charge of the full revlog related Input/Output code, that class gradually lost responsibilities to the point where more IO are now done by `revlog.index` objects or revlog objects themself. I would like to ultimately remove the `revlogio` class, to do so I start simple with move the "entry size" information on the index. (The index is already responsible of the binary unpacking, so it knows the size. Differential Revision: https://phab.mercurial-scm.org/D10309
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 05 Apr 2021 12:22:25 +0200
parents 92029a43debb
children 14ddb1dca2c0
files contrib/perf.py mercurial/cext/revlog.c mercurial/pure/parsers.py mercurial/revlog.py rust/hg-cpython/src/revlog.rs
diffstat 5 files changed, 47 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/perf.py	Wed Apr 14 20:32:53 2021 +0200
+++ b/contrib/perf.py	Mon Apr 05 12:22:25 2021 +0200
@@ -3228,7 +3228,10 @@
         start = r.start
         length = r.length
         inline = r._inline
-        iosize = r._io.size
+        try:
+            iosize = r.index.entry_size
+        except AttributeError:
+            iosize = r._io.size
         buffer = util.buffer
 
         chunks = []
--- a/mercurial/cext/revlog.c	Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/cext/revlog.c	Mon Apr 05 12:22:25 2021 +0200
@@ -15,6 +15,7 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
+#include <structmember.h>
 
 #include "bitmanipulation.h"
 #include "charencode.h"
@@ -2866,6 +2867,12 @@
     {NULL} /* Sentinel */
 };
 
+static PyMemberDef index_members[] = {
+    {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
+     "size of an index entry"},
+    {NULL} /* Sentinel */
+};
+
 PyTypeObject HgRevlogIndex_Type = {
     PyVarObject_HEAD_INIT(NULL, 0) /* header */
     "parsers.index",               /* tp_name */
@@ -2895,7 +2902,7 @@
     0,                             /* tp_iter */
     0,                             /* tp_iternext */
     index_methods,                 /* tp_methods */
-    0,                             /* tp_members */
+    index_members,                 /* tp_members */
     index_getset,                  /* tp_getset */
     0,                             /* tp_base */
     0,                             /* tp_dict */
--- a/mercurial/pure/parsers.py	Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/pure/parsers.py	Mon Apr 05 12:22:25 2021 +0200
@@ -49,11 +49,13 @@
     big_int_size = struct.calcsize(b'>Q')
     # Size of a C long int, platform independent
     int_size = struct.calcsize(b'>i')
-    # Size of the entire index format
-    index_size = revlog_constants.INDEX_ENTRY_V1.size
     # An empty index entry, used as a default value to be overridden, or nullrev
     null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
 
+    @util.propertycache
+    def entry_size(self):
+        return self.index_format.size
+
     @property
     def nodemap(self):
         msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -116,7 +118,7 @@
             data = self._extra[i - self._lgt]
         else:
             index = self._calculate_index(i)
-            data = self._data[index : index + self.index_size]
+            data = self._data[index : index + self.entry_size]
         r = self.index_format.unpack(data)
         if self._lgt and i == 0:
             r = (offset_type(0, gettype(r[0])),) + r[1:]
@@ -125,13 +127,13 @@
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
-        assert len(data) % self.index_size == 0
+        assert len(data) % self.entry_size == 0
         self._data = data
-        self._lgt = len(data) // self.index_size
+        self._lgt = len(data) // self.entry_size
         self._extra = []
 
     def _calculate_index(self, i):
-        return i * self.index_size
+        return i * self.entry_size
 
     def __delitem__(self, i):
         if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
@@ -140,7 +142,7 @@
         self._check_index(i)
         self._stripnodes(i)
         if i < self._lgt:
-            self._data = self._data[: i * self.index_size]
+            self._data = self._data[: i * self.entry_size]
             self._lgt = i
             self._extra = []
         else:
@@ -203,7 +205,7 @@
         if lgt is not None:
             self._offsets = [0] * lgt
         count = 0
-        while off <= len(self._data) - self.index_size:
+        while off <= len(self._data) - self.entry_size:
             start = off + self.big_int_size
             (s,) = struct.unpack(
                 b'>i',
@@ -212,7 +214,7 @@
             if lgt is not None:
                 self._offsets[count] = off
             count += 1
-            off += self.index_size + s
+            off += self.entry_size + s
         if off != len(self._data):
             raise ValueError(b"corrupted data")
         return count
@@ -244,7 +246,6 @@
 
 class Index2Mixin(object):
     index_format = revlog_constants.INDEX_ENTRY_V2
-    index_size = revlog_constants.INDEX_ENTRY_V2.size
     null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
 
     def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
@@ -280,7 +281,7 @@
         if lgt is not None:
             self._offsets = [0] * lgt
         count = 0
-        while off <= len(self._data) - self.index_size:
+        while off <= len(self._data) - self.entry_size:
             start = off + self.big_int_size
             (data_size,) = struct.unpack(
                 b'>i',
@@ -293,7 +294,7 @@
             if lgt is not None:
                 self._offsets[count] = off
             count += 1
-            off += self.index_size + data_size + side_data_size
+            off += self.entry_size + data_size + side_data_size
         if off != len(self._data):
             raise ValueError(b"corrupted data")
         return count
--- a/mercurial/revlog.py	Wed Apr 14 20:32:53 2021 +0200
+++ b/mercurial/revlog.py	Mon Apr 05 12:22:25 2021 +0200
@@ -222,6 +222,8 @@
 
 
 class revlogoldindex(list):
+    entry_size = INDEX_ENTRY_V0.size
+
     @property
     def nodemap(self):
         msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -273,11 +275,8 @@
 
 
 class revlogoldio(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V0.size
-
     def parseindex(self, data, inline):
-        s = self.size
+        s = INDEX_ENTRY_V0.size
         index = []
         nodemap = nodemaputil.NodeMap({nullid: nullrev})
         n = off = 0
@@ -334,9 +333,6 @@
 
 
 class revlogio(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V1.size
-
     def parseindex(self, data, inline):
         # call the C implementation to parse the index data
         index, cache = parsers.parse_index2(data, inline)
@@ -350,9 +346,6 @@
 
 
 class revlogv2io(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V2.size
-
     def parseindex(self, data, inline):
         index, cache = parsers.parse_index2(data, inline, revlogv2=True)
         return index, cache
@@ -1716,8 +1709,8 @@
             end = int(iend[0] >> 16) + iend[1]
 
         if self._inline:
-            start += (startrev + 1) * self._io.size
-            end += (endrev + 1) * self._io.size
+            start += (startrev + 1) * self.index.entry_size
+            end += (endrev + 1) * self.index.entry_size
         length = end - start
 
         return start, self._getsegment(start, length, df=df)
@@ -1751,7 +1744,7 @@
         start = self.start
         length = self.length
         inline = self._inline
-        iosize = self._io.size
+        iosize = self.index.entry_size
         buffer = util.buffer
 
         l = []
@@ -1979,7 +1972,7 @@
         sidedata_size = index_entry[9]
 
         if self._inline:
-            sidedata_offset += self._io.size * (1 + rev)
+            sidedata_offset += self.index.entry_size * (1 + rev)
         if sidedata_size == 0:
             return {}
 
@@ -2079,7 +2072,7 @@
             # the temp file replace the real index when we exit the context
             # manager
 
-        tr.replace(self.indexfile, trindex * self._io.size)
+        tr.replace(self.indexfile, trindex * self.index.entry_size)
         nodemaputil.setup_persistent_nodemap(tr, self)
         self._chunkclear()
 
@@ -2335,12 +2328,12 @@
                 # offset is "as if" it were in the .d file, so we need to add on
                 # the size of the entry metadata.
                 self._concurrencychecker(
-                    ifh, self.indexfile, offset + curr * self._io.size
+                    ifh, self.indexfile, offset + curr * self.index.entry_size
                 )
             else:
                 # Entries in the .i are a consistent size.
                 self._concurrencychecker(
-                    ifh, self.indexfile, curr * self._io.size
+                    ifh, self.indexfile, curr * self.index.entry_size
                 )
                 self._concurrencychecker(dfh, self.datafile, offset)
 
@@ -2464,7 +2457,7 @@
                 dfh.write(sidedata)
             ifh.write(entry)
         else:
-            offset += curr * self._io.size
+            offset += curr * self.index.entry_size
             transaction.add(self.indexfile, offset)
             ifh.write(entry)
             ifh.write(data[0])
@@ -2502,7 +2495,7 @@
         if r:
             end = self.end(r - 1)
         ifh = self._indexfp(b"a+")
-        isize = r * self._io.size
+        isize = r * self.index.entry_size
         if self._inline:
             transaction.add(self.indexfile, end + isize)
             dfh = None
@@ -2658,9 +2651,9 @@
         end = self.start(rev)
         if not self._inline:
             transaction.add(self.datafile, end)
-            end = rev * self._io.size
+            end = rev * self.index.entry_size
         else:
-            end += rev * self._io.size
+            end += rev * self.index.entry_size
 
         transaction.add(self.indexfile, end)
 
@@ -2699,7 +2692,7 @@
             f.seek(0, io.SEEK_END)
             actual = f.tell()
             f.close()
-            s = self._io.size
+            s = self.index.entry_size
             i = max(0, actual // s)
             di = actual - (i * s)
             if self._inline:
@@ -3241,7 +3234,7 @@
 
         # rewrite the new index entries
         with self._indexfp(b'w+') as fp:
-            fp.seek(startrev * self._io.size)
+            fp.seek(startrev * self.index.entry_size)
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
--- a/rust/hg-cpython/src/revlog.rs	Wed Apr 14 20:32:53 2021 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Mon Apr 05 12:22:25 2021 +0200
@@ -12,8 +12,8 @@
 use cpython::{
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
-    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
-    PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
+    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
+    PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
 };
 use hg::{
     nodemap::{Block, NodeMapError, NodeTree},
@@ -285,6 +285,10 @@
         self.inner_update_nodemap_data(py, docket, nm_data)
     }
 
+    @property
+    def entry_size(&self) -> PyResult<PyInt> {
+        self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
+    }
 
 });