revlog: add the glue to use the Rust `InnerRevlog` from Python
authorRaphaël Gomès <rgomes@octobus.net>
Wed, 19 Jun 2024 19:10:49 +0200
changeset 52167 7346f93be7a4
parent 52166 13815c9decd4
child 52168 6b7ffa3f9199
revlog: add the glue to use the Rust `InnerRevlog` from Python The performance of this has been looked at for quite some time, and some workflows are actually quite a bit faster than with the Python + C code. However, we are still (up to 20%) slower in some crucial places like cloning certain repos, log, cat, which makes this an incomplete rewrite. This is mostly due to the high amount of overhead in Python <-> Rust FFI, especially around the VFS code. A future patch series will rewrite the VFS code in pure Rust, which should hopefully get us up to par with current perfomance, if not better in all important cases. This is a "save state" of sorts, as this is a ton of code, and I don't want to pile up even more things in a single review. Continuing to try to match the current performance will take an extremely long time, if it's not impossible, without the aforementioned VFS work.
contrib/perf.py
mercurial/pure/parsers.py
mercurial/revlog.py
mercurial/revlogutils/rewrite.py
mercurial/statichttprepo.py
mercurial/vfs.py
rust/Cargo.lock
rust/hg-cpython/Cargo.toml
rust/hg-cpython/src/lib.rs
rust/hg-cpython/src/pybytes_deref.rs
rust/hg-cpython/src/revlog.rs
rust/hg-cpython/src/vfs.rs
tests/test-bundle.t
tests/test-contrib-perf.t
tests/test-journal-exists.t
tests/test-permissions.t
tests/test-remotefilelog-bgprefetch.t
tests/test-remotefilelog-prefetch.t
tests/test-repo-compengines.t
tests/test-rust-revlog.py
tests/test-treemanifest.t
tests/test-verify.t
--- a/contrib/perf.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/contrib/perf.py	Wed Jun 19 19:10:49 2024 +0200
@@ -3780,6 +3780,11 @@
 
     rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
 
+    if rl.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogchunks is not implemented for the Rust revlog"
+        )
+
     # - _chunkraw was renamed to _getsegmentforrevs
     # - _getsegmentforrevs was moved on the inner object
     try:
@@ -3960,6 +3965,10 @@
         raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
 
     r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
+    if r.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogrevision is not implemented for the Rust revlog"
+        )
 
     # _chunkraw was renamed to _getsegmentforrevs.
     try:
--- a/mercurial/pure/parsers.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/pure/parsers.py	Wed Jun 19 19:10:49 2024 +0200
@@ -672,6 +672,9 @@
             r = (offset,) + r[1:]
         return r
 
+    def __delitem__(self, i):
+        raise NotImplementedError()
+
     def _unpack_entry(self, rev, data):
         r = self.index_format.unpack(data)
         r = r + (
--- a/mercurial/revlog.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/revlog.py	Wed Jun 19 19:10:49 2024 +0200
@@ -17,7 +17,6 @@
 import binascii
 import collections
 import contextlib
-import functools
 import io
 import os
 import struct
@@ -83,6 +82,7 @@
 if typing.TYPE_CHECKING:
     # noinspection PyPackageRequirements
     import attr
+    from .pure.parsers import BaseIndexObject
 
 from . import (
     ancestor,
@@ -381,7 +381,7 @@
         default_compression_header,
     ):
         self.opener = opener
-        self.index = index
+        self.index: BaseIndexObject = index
 
         self.index_file = index_file
         self.data_file = data_file
@@ -528,7 +528,9 @@
         generaldelta = self.delta_config.general_delta
         # Try C implementation.
         try:
-            return self.index.deltachain(rev, stoprev, generaldelta)
+            return self.index.deltachain(
+                rev, stoprev, generaldelta
+            )  # pytype: disable=attribute-error
         except AttributeError:
             pass
 
@@ -1246,6 +1248,71 @@
         return self.canonical_index_file
 
 
+if typing.TYPE_CHECKING:
+    # Tell Pytype what kind of object we expect
+    ProxyBase = BaseIndexObject
+else:
+    ProxyBase = object
+
+
+class RustIndexProxy(ProxyBase):
+    """Wrapper around the Rust index to fake having direct access to the index.
+
+    Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
+    so we can't expose the index from Rust directly, since the `InnerRevlog`
+    already has ownership of the index. This object redirects all calls to the
+    index through the Rust-backed `InnerRevlog` glue which defines all
+    necessary forwarding methods.
+    """
+
+    def __init__(self, inner):
+        # Do not rename as it's being used to access the index from Rust
+        self.inner = inner
+
+    # TODO possibly write all index methods manually to save on overhead?
+    def __getattr__(self, name):
+        return getattr(self.inner, f"_index_{name}")
+
+    # Magic methods need to be defined explicitely
+    def __len__(self):
+        return self.inner._index___len__()
+
+    def __getitem__(self, key):
+        return self.inner._index___getitem__(key)
+
+    def __contains__(self, key):
+        return self.inner._index___contains__(key)
+
+    def __delitem__(self, key):
+        return self.inner._index___delitem__(key)
+
+
+class RustVFSWrapper:
+    """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
+    calling back multiple times into Python.
+    """
+
+    def __init__(self, inner):
+        self.inner = inner
+
+    def __call__(
+        self,
+        path: bytes,
+        mode: bytes = b"rb",
+        atomictemp=False,
+        checkambig=False,
+    ):
+        fd = self.inner.__call__(
+            path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
+        )
+        # Information that Rust needs to get ownership of the file that's
+        # being opened.
+        return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
+
+    def __getattr__(self, name):
+        return getattr(self.inner, name)
+
+
 class revlog:
     """
     the underlying revision storage object
@@ -1358,6 +1425,7 @@
         self._trypending = trypending
         self._try_split = try_split
         self._may_inline = may_inline
+        self.uses_rust = False
         self.opener = opener
         if persistentnodemap:
             self._nodemap_file = nodemaputil.get_nodemap_file(self)
@@ -1392,7 +1460,7 @@
         # Maps rev to chain base rev.
         self._chainbasecache = util.lrucachedict(100)
 
-        self.index = None
+        self.index: Optional[BaseIndexObject] = None
         self._docket = None
         self._nodemap_docket = None
         # Mapping of partial identifiers to full nodes.
@@ -1406,8 +1474,8 @@
         # prevent nesting of addgroup
         self._adding_group = None
 
-        chunk_cache = self._loadindex()
-        self._load_inner(chunk_cache)
+        index, chunk_cache = self._loadindex()
+        self._load_inner(index, chunk_cache)
         self._concurrencychecker = concurrencychecker
 
     def _init_opts(self):
@@ -1707,7 +1775,12 @@
         )
 
         use_rust_index = False
-        if rustrevlog is not None and self._nodemap_file is not None:
+        rust_applicable = self._nodemap_file is not None
+        rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
+        rust_applicable = rust_applicable and getattr(
+            self.opener, "rust_compatible", True
+        )
+        if rustrevlog is not None and rust_applicable:
             # we would like to use the rust_index in all case, especially
             # because it is necessary for AncestorsIterator and LazyAncestors
             # since the 6.7 cycle.
@@ -1717,6 +1790,9 @@
             # repository.
             use_rust_index = True
 
+            if self._format_version != REVLOGV1:
+                use_rust_index = False
+
         self._parse_index = parse_index_v1
         if self._format_version == REVLOGV0:
             self._parse_index = revlogv0.parse_index_v0
@@ -1726,58 +1802,84 @@
             self._parse_index = parse_index_cl_v2
         elif devel_nodemap:
             self._parse_index = parse_index_v1_nodemap
-        elif use_rust_index:
-            self._parse_index = functools.partial(
-                parse_index_v1_rust, default_header=new_header
-            )
-        try:
-            d = self._parse_index(index_data, self._inline)
-            index, chunkcache = d
-            use_nodemap = (
-                not self._inline
-                and self._nodemap_file is not None
-                and hasattr(index, 'update_nodemap_data')
-            )
-            if use_nodemap:
-                nodemap_data = nodemaputil.persisted_data(self)
-                if nodemap_data is not None:
-                    docket = nodemap_data[0]
-                    if (
-                        len(d[0]) > docket.tip_rev
-                        and d[0][docket.tip_rev][7] == docket.tip_node
-                    ):
-                        # no changelog tampering
-                        self._nodemap_docket = docket
-                        index.update_nodemap_data(*nodemap_data)
-        except (ValueError, IndexError):
-            raise error.RevlogError(
-                _(b"index %s is corrupted") % self.display_id
-            )
-        self.index = index
+
+        if use_rust_index:
+            # Let the Rust code parse its own index
+            index, chunkcache = (index_data, None)
+            self.uses_rust = True
+        else:
+            try:
+                d = self._parse_index(index_data, self._inline)
+                index, chunkcache = d
+                self._register_nodemap_info(index)
+            except (ValueError, IndexError):
+                raise error.RevlogError(
+                    _(b"index %s is corrupted") % self.display_id
+                )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)
 
-        return chunkcache
-
-    def _load_inner(self, chunk_cache):
+        return index, chunkcache
+
+    def _load_inner(self, index, chunk_cache):
         if self._docket is None:
             default_compression_header = None
         else:
             default_compression_header = self._docket.default_compression_header
 
-        self._inner = _InnerRevlog(
-            opener=self.opener,
-            index=self.index,
-            index_file=self._indexfile,
-            data_file=self._datafile,
-            sidedata_file=self._sidedatafile,
-            inline=self._inline,
-            data_config=self.data_config,
-            delta_config=self.delta_config,
-            feature_config=self.feature_config,
-            chunk_cache=chunk_cache,
-            default_compression_header=default_compression_header,
+        if self.uses_rust:
+            self._inner = rustrevlog.InnerRevlog(
+                opener=RustVFSWrapper(self.opener),
+                index_data=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+                revlog_type=self.target[0],
+            )
+            self.index = RustIndexProxy(self._inner)
+            self._register_nodemap_info(self.index)
+            self.uses_rust = True
+        else:
+            self._inner = _InnerRevlog(
+                opener=self.opener,
+                index=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+            )
+            self.index = self._inner.index
+
+    def _register_nodemap_info(self, index):
+        use_nodemap = (
+            not self._inline
+            and self._nodemap_file is not None
+            and hasattr(index, 'update_nodemap_data')
         )
+        if use_nodemap:
+            nodemap_data = nodemaputil.persisted_data(self)
+            if nodemap_data is not None:
+                docket = nodemap_data[0]
+                if (
+                    len(index) > docket.tip_rev
+                    and index[docket.tip_rev][7] == docket.tip_node
+                ):
+                    # no changelog tampering
+                    self._nodemap_docket = docket
+                    index.update_nodemap_data(
+                        *nodemap_data
+                    )  # pytype: disable=attribute-error
 
     def get_revlog(self):
         """simple function to mirror API of other not-really-revlog API"""
@@ -1869,7 +1971,9 @@
             nodemap_data = nodemaputil.persisted_data(self)
             if nodemap_data is not None:
                 self._nodemap_docket = nodemap_data[0]
-                self.index.update_nodemap_data(*nodemap_data)
+                self.index.update_nodemap_data(
+                    *nodemap_data
+                )  # pytype: disable=attribute-error
 
     def rev(self, node):
         """return the revision number associated with a <nodeid>"""
@@ -2368,23 +2472,26 @@
     def headrevs(self, revs=None, stop_rev=None):
         if revs is None:
             return self.index.headrevs(None, stop_rev)
-        assert stop_rev is None
         if rustdagop is not None and self.index.rust_ext_compat:
             return rustdagop.headrevs(self.index, revs)
         return dagop.headrevs(revs, self._uncheckedparentrevs)
 
     def headrevsdiff(self, start, stop):
         try:
-            return self.index.headrevsdiff(start, stop)
+            return self.index.headrevsdiff(
+                start, stop
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
 
     def computephases(self, roots):
-        return self.index.computephasesmapsets(roots)
+        return self.index.computephasesmapsets(
+            roots
+        )  # pytype: disable=attribute-error
 
     def _head_node_ids(self):
         try:
-            return self.index.head_node_ids()
+            return self.index.head_node_ids()  # pytype: disable=attribute-error
         except AttributeError:
             return [self.node(r) for r in self.headrevs()]
 
@@ -2442,7 +2549,9 @@
     def _commonancestorsheads(self, *revs):
         """calculate all the heads of the common ancestors of revs"""
         try:
-            ancs = self.index.commonancestorsheads(*revs)
+            ancs = self.index.commonancestorsheads(
+                *revs
+            )  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):  # C implementation failed
             ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
         return ancs
@@ -2476,7 +2585,7 @@
         try:
             return self.index.reachableroots2(
                 minroot, heads, roots, includepath
-            )
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop._reachablerootspure(
                 self.parentrevs, minroot, roots, heads, includepath
@@ -2487,7 +2596,7 @@
 
         a, b = self.rev(a), self.rev(b)
         try:
-            ancs = self.index.ancestors(a, b)
+            ancs = self.index.ancestors(a, b)  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):
             ancs = ancestor.ancestors(self.parentrevs, a, b)
         if ancs:
@@ -2534,7 +2643,9 @@
         maybewdir = self.nodeconstants.wdirhex.startswith(id)
         ambiguous = False
         try:
-            partial = self.index.partialmatch(id)
+            partial = self.index.partialmatch(
+                id
+            )  # pytype: disable=attribute-error
             if partial and self.hasnode(partial):
                 if maybewdir:
                     # single 'ff...' match in radix tree, ambiguous with wdir
@@ -2636,7 +2747,10 @@
 
         if not getattr(self, 'filteredrevs', None):
             try:
-                length = max(self.index.shortest(node), minlength)
+                shortest = self.index.shortest(
+                    node
+                )  # pytype: disable=attribute-error
+                length = max(shortest, minlength)
                 return disambiguate(hexnode, length)
             except error.RevlogError:
                 if node != self.nodeconstants.wdirid:
@@ -4089,7 +4203,9 @@
             ifh.seek(startrev * self.index.entry_size)
             for i, e in enumerate(new_entries):
                 rev = startrev + i
-                self.index.replace_sidedata_info(rev, *e)
+                self.index.replace_sidedata_info(
+                    rev, *e
+                )  # pytype: disable=attribute-error
                 packed = self.index.entry_binary(rev)
                 if rev == 0 and self._docket is None:
                     header = self._format_flags | self._format_version
--- a/mercurial/revlogutils/rewrite.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/revlogutils/rewrite.py	Wed Jun 19 19:10:49 2024 +0200
@@ -136,8 +136,8 @@
         rl.opener.rename(newrl._datafile, rl._datafile)
 
     rl.clearcaches()
-    chunk_cache = rl._loadindex()
-    rl._load_inner(chunk_cache)
+    index, chunk_cache = rl._loadindex()
+    rl._load_inner(index, chunk_cache)
 
 
 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
@@ -327,7 +327,8 @@
 
     # reload the revlog internal information
     revlog.clearcaches()
-    revlog._loadindex(docket=docket)
+    index, chunk_cache = revlog._loadindex(docket=docket)
+    revlog._load_inner(index, chunk_cache)
 
     @contextlib.contextmanager
     def all_files_opener():
@@ -569,7 +570,8 @@
 
             rl.opener.rename(new_file_path, index_file)
             rl.clearcaches()
-            rl._loadindex()
+            index, chunk_cache = rl._loadindex()
+            rl._load_inner(index, chunk_cache)
         finally:
             util.tryunlink(new_file_path)
 
--- a/mercurial/statichttprepo.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/statichttprepo.py	Wed Jun 19 19:10:49 2024 +0200
@@ -219,6 +219,9 @@
         self.store = localrepo.makestore(requirements, self.path, vfsclass)
         self.spath = self.store.path
         self.svfs = self.store.opener
+        # We can't use Rust because the Rust code cannot cope with the
+        # `httprangereader` (yet?)
+        self.svfs.rust_compatible = False
         self.sjoin = self.store.join
         self._filecache = {}
         self.requirements = requirements
--- a/mercurial/vfs.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/vfs.py	Wed Jun 19 19:10:49 2024 +0200
@@ -82,6 +82,10 @@
     # encoded vfs (see issue6546)
     _dir_sep: bytes = b'/'
 
+    # Used to disable the Rust `InnerRevlog` in case the VFS is not supported
+    # by the Rust code
+    rust_compatible = True
+
     # TODO: type return, which is util.posixfile wrapped by a proxy
     @abc.abstractmethod
     def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs) -> Any:
--- a/rust/Cargo.lock	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/Cargo.lock	Wed Jun 19 19:10:49 2024 +0200
@@ -666,6 +666,8 @@
  "hg-core",
  "libc",
  "log",
+ "logging_timer",
+ "python3-sys",
  "stable_deref_trait",
  "vcsgraph",
 ]
--- a/rust/hg-cpython/Cargo.toml	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/Cargo.toml	Wed Jun 19 19:10:49 2024 +0200
@@ -17,3 +17,5 @@
 env_logger = "0.9.3"
 stable_deref_trait = "1.2.0"
 vcsgraph = "0.2.0"
+logging_timer = "1.1.0"
+python3-sys = "0.7.1"
--- a/rust/hg-cpython/src/lib.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/lib.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -47,6 +47,7 @@
 pub mod revlog;
 pub mod update;
 pub mod utils;
+pub mod vfs;
 
 /// Revision as exposed to/from the Python layer.
 ///
--- a/rust/hg-cpython/src/pybytes_deref.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/pybytes_deref.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -1,4 +1,7 @@
-use cpython::{PyBytes, Python};
+use crate::cpython::buffer::Element;
+use cpython::{
+    buffer::PyBuffer, exc::ValueError, PyBytes, PyErr, PyResult, Python,
+};
 use stable_deref_trait::StableDeref;
 
 /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice
@@ -55,3 +58,67 @@
 // but here sending one to another thread is fine since we ensure it stays
 // valid.
 unsafe impl Send for PyBytesDeref {}
+
+///
+/// It also enables using a (wrapped) `PyBuffer` in GIL-unaware generic code.
+pub struct PyBufferDeref {
+    #[allow(unused)]
+    keep_alive: PyBuffer,
+
+    /// Borrows the buffer inside `self.keep_alive`,
+    /// but the borrow-checker cannot express self-referential structs.
+    data: *const [u8],
+}
+
+fn get_buffer<'a>(py: Python, buf: &'a PyBuffer) -> PyResult<&'a [u8]> {
+    let len = buf.item_count();
+
+    let cbuf = buf.buf_ptr();
+    let has_correct_item_size = std::mem::size_of::<u8>() == buf.item_size();
+    let is_valid_buffer = has_correct_item_size
+        && buf.is_c_contiguous()
+        && u8::is_compatible_format(buf.format())
+        && buf.readonly();
+
+    let bytes = if is_valid_buffer {
+        unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
+    } else {
+        return Err(PyErr::new::<ValueError, _>(
+            py,
+            "Buffer has an invalid memory representation",
+        ));
+    };
+    Ok(bytes)
+}
+
+impl PyBufferDeref {
+    pub fn new(py: Python, buf: PyBuffer) -> PyResult<Self> {
+        Ok(Self {
+            data: get_buffer(py, &buf)?,
+            keep_alive: buf,
+        })
+    }
+}
+
+impl std::ops::Deref for PyBufferDeref {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        // Safety: the raw pointer is valid as long as the PyBuffer is still
+        // alive, and the returned slice borrows `self`.
+        unsafe { &*self.data }
+    }
+}
+
+unsafe impl StableDeref for PyBufferDeref {}
+
+#[allow(unused)]
+fn static_assert_pybuffer_is_send() {
+    #[allow(clippy::no_effect)]
+    require_send::<PyBuffer>;
+}
+
+// Safety: PyBuffer is Send. Raw pointers are not by default,
+// but here sending one to another thread is fine since we ensure it stays
+// valid.
+unsafe impl Send for PyBufferDeref {}
--- a/rust/hg-cpython/src/revlog.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -4,32 +4,43 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
+#![allow(non_snake_case)]
 
 use crate::{
     conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
+    pybytes_deref::{PyBufferDeref, PyBytesDeref},
     utils::{node_from_py_bytes, node_from_py_object},
+    vfs::PyVfs,
     PyRevision,
 };
 use cpython::{
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
     ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
-    PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
+    PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
     PythonObject, ToPyObject, UnsafePyLeaked,
 };
 use hg::{
     errors::HgError,
-    index::{
-        IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
-        INDEX_ENTRY_SIZE,
+    index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
+    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
+    revlog::compression::CompressionConfig,
+    revlog::inner_revlog::InnerRevlog as CoreInnerRevlog,
+    revlog::inner_revlog::RevisionBuffer,
+    revlog::options::{
+        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+        RevlogOpenOptions,
     },
-    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
     revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
-    BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
+    transaction::Transaction,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
+    NULL_REVISION,
 };
 use std::{
-    cell::RefCell,
+    cell::{Cell, RefCell},
     collections::{HashMap, HashSet},
+    sync::OnceLock,
 };
 use vcsgraph::graph::Graph as VCSGraph;
 
@@ -41,12 +52,13 @@
 /// Return a Struct implementing the Graph trait
 pub(crate) fn py_rust_index_to_graph(
     py: Python,
-    index: PyObject,
+    index_proxy: PyObject,
 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
-    let midx = index.extract::<Index>(py)?;
-    let leaked = midx.index(py).leak_immutable();
+    let inner_revlog = index_proxy.getattr(py, "inner")?;
+    let inner_revlog = inner_revlog.extract::<InnerRevlog>(py)?;
+    let leaked = inner_revlog.inner(py).leak_immutable();
     // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
-    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
+    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) })
 }
 
 impl Clone for PySharedIndex {
@@ -91,398 +103,6 @@
     }
 }
 
-py_class!(pub class Index |py| {
-    @shared data index: hg::index::Index;
-    data nt: RefCell<Option<CoreNodeTree>>;
-    data docket: RefCell<Option<PyObject>>;
-    // Holds a reference to the mmap'ed persistent nodemap data
-    data nodemap_mmap: RefCell<Option<PyBuffer>>;
-    // Holds a reference to the mmap'ed persistent index data
-    data index_mmap: RefCell<Option<PyBuffer>>;
-    data head_revs_py_list: RefCell<Option<PyList>>;
-    data head_node_ids_py_list: RefCell<Option<PyList>>;
-
-    def __new__(
-        _cls,
-        data: PyObject,
-        default_header: u32,
-    ) -> PyResult<Self> {
-        Self::new(py, data, default_header)
-    }
-
-    /// Compatibility layer used for Python consumers needing access to the C index
-    ///
-    /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
-    /// that may need to build a custom `nodetree`, based on a specified revset.
-    /// With a Rust implementation of the nodemap, we will be able to get rid of
-    /// this, by exposing our own standalone nodemap class,
-    /// ready to accept `Index`.
-/*    def get_cindex(&self) -> PyResult<PyObject> {
-        Ok(self.cindex(py).borrow().inner().clone_ref(py))
-    }
-*/
-    // Index API involving nodemap, as defined in mercurial/pure/parsers.py
-
-    /// Return Revision if found, raises a bare `error.RevlogError`
-    /// in case of ambiguity, same as C version does
-    def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let ridx = &*self.index(py).borrow();
-        let node = node_from_py_bytes(py, &node)?;
-        let rust_rev =
-            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
-        Ok(rust_rev.map(Into::into))
-
-    }
-
-    /// same as `get_rev()` but raises a bare `error.RevlogError` if node
-    /// is not found.
-    ///
-    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
-    /// will catch and rewrap with it
-    def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
-        self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
-    }
-
-    /// return True if the node exist in the index
-    def has_node(&self, node: PyBytes) -> PyResult<bool> {
-        // TODO OPTIM we could avoid a needless conversion here,
-        // to do when scaffolding for pure Rust switch is removed,
-        // as `get_rev()` currently does the necessary assertions
-        self.get_rev(py, node).map(|opt| opt.is_some())
-    }
-
-    /// find length of shortest hex nodeid of a binary ID
-    def shortest(&self, node: PyBytes) -> PyResult<usize> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
-        {
-            Ok(Some(l)) => Ok(l),
-            Ok(None) => Err(revlog_error(py)),
-            Err(e) => Err(nodemap_error(py, e)),
-        }
-    }
-
-    def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-
-        let node_as_string = if cfg!(feature = "python3-sys") {
-            node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
-        }
-        else {
-            let node = node.extract::<PyBytes>(py)?;
-            String::from_utf8_lossy(node.data(py)).to_string()
-        };
-
-        let prefix = NodePrefix::from_hex(&node_as_string)
-            .map_err(|_| PyErr::new::<ValueError, _>(
-                py, format!("Invalid node or prefix '{}'", node_as_string))
-            )?;
-
-        nt.find_bin(idx, prefix)
-            // TODO make an inner API returning the node directly
-            .map(|opt| opt.map(
-                |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
-            .map_err(|e| nodemap_error(py, e))
-
-    }
-
-    /// append an index entry
-    def append(&self, tup: PyTuple) -> PyResult<PyObject> {
-        if tup.len(py) < 8 {
-            // this is better than the panic promised by tup.get_item()
-            return Err(
-                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
-        }
-        let node_bytes = tup.get_item(py, 7).extract(py)?;
-        let node = node_from_py_object(py, &node_bytes)?;
-
-        let rev = self.len(py)? as BaseRevision;
-
-        // This is ok since we will just add the revision to the index
-        let rev = Revision(rev);
-        self.index(py)
-            .borrow_mut()
-            .append(py_tuple_to_revision_data_params(py, tup)?)
-            .unwrap();
-        let idx = &*self.index(py).borrow();
-        self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
-            .insert(idx, &node, rev)
-            .map_err(|e| nodemap_error(py, e))?;
-        Ok(py.None())
-    }
-
-    def __delitem__(&self, key: PyObject) -> PyResult<()> {
-        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
-        let start = if let Ok(rev) = key.extract(py) {
-            UncheckedRevision(rev)
-        } else {
-            let start = key.getattr(py, "start")?;
-            UncheckedRevision(start.extract(py)?)
-        };
-        let start = self.index(py)
-            .borrow()
-            .check_revision(start)
-            .ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
-            })?;
-        self.index(py).borrow_mut().remove(start).unwrap();
-        let mut opt = self.get_nodetree(py)?.borrow_mut();
-        let nt = opt.as_mut().unwrap();
-        nt.invalidate_all();
-        self.fill_nodemap(py, nt)?;
-        Ok(())
-    }
-
-    //
-    // Index methods previously reforwarded to C index (tp_methods)
-    // Same ordering as in revlog.c
-    //
-
-    /// return the gca set of the given revs
-    def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_ancestors(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// return the heads of the common ancestors of the given revs
-    def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_commonancestorsheads(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// Clear the index caches and inner py_class data.
-    /// It is Python's responsibility to call `update_nodemap_data` again.
-    def clearcaches(&self) -> PyResult<PyObject> {
-        self.nt(py).borrow_mut().take();
-        self.docket(py).borrow_mut().take();
-        self.nodemap_mmap(py).borrow_mut().take();
-        self.head_revs_py_list(py).borrow_mut().take();
-        self.head_node_ids_py_list(py).borrow_mut().take();
-        self.index(py).borrow().clear_caches();
-        Ok(py.None())
-    }
-
-    /// return the raw binary string representing a revision
-    def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
-        let rust_bytes = rindex.check_revision(rev).and_then(
-            |r| rindex.entry_binary(r))
-            .ok_or_else(|| rev_not_in_index(py, rev))?;
-        let rust_res = PyBytes::new(py, rust_bytes).into_object();
-        Ok(rust_res)
-    }
-
-    /// return a binary packed version of the header
-    def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
-        let rust_res = PyBytes::new(py, &packed).into_object();
-        Ok(rust_res)
-    }
-
-    /// compute phases
-    def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
-        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
-        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
-        Ok(rust_res)
-    }
-
-    /// reachableroots
-    def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_reachableroots2(
-            py,
-            UncheckedRevision(args.get_item(py, 0).extract(py)?),
-            args.get_item(py, 1),
-            args.get_item(py, 2),
-            args.get_item(py, 3).extract(py)?,
-        )?;
-        Ok(rust_res)
-    }
-
-    /// get head revisions
-    def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
-        let (filtered_revs, stop_rev) = match &args.len(py) {
-             0 => Ok((py.None(), py.None())),
-             1 => Ok((args.get_item(py, 0), py.None())),
-             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
-             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
-        }?;
-        self.inner_headrevs(py, &filtered_revs, &stop_rev)
-    }
-
-    /// get head nodeids
-    def head_node_ids(&self) -> PyResult<PyObject> {
-        let rust_res = self.inner_head_node_ids(py)?;
-        Ok(rust_res)
-    }
-
-    /// get diff in head revisions
-    def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_headrevsdiff(
-          py,
-          &args.get_item(py, 0),
-          &args.get_item(py, 1))?;
-        Ok(rust_res)
-    }
-
-    /// True if the object is a snapshot
-    def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
-        let index = self.index(py).borrow();
-        let result = index
-            .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
-            .map_err(|e| {
-                PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-            })?;
-        Ok(result)
-    }
-
-    /// Gather snapshot data in a cache dict
-    def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let cache: PyDict = args.get_item(py, 0).extract(py)?;
-        // this methods operates by setting new values in the cache,
-        // hence we will compare results by letting the C implementation
-        // operate over a deepcopy of the cache, and finally compare both
-        // caches.
-        let c_cache = PyDict::new(py);
-        for (k, v) in cache.items(py) {
-            c_cache.set_item(py, k, PySet::new(py, v)?)?;
-        }
-
-        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
-        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
-        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
-        index.find_snapshots(
-            start_rev,
-            end_rev,
-            &mut cache_wrapper,
-        ).map_err(|_| revlog_error(py))?;
-        Ok(py.None())
-    }
-
-    /// determine revisions with deltas to reconstruct fulltext
-    def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
-        let stop_rev =
-            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
-        let rev = index.check_revision(rev).ok_or_else(|| {
-            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
-        })?;
-        let stop_rev = if let Some(stop_rev) = stop_rev {
-            let stop_rev = UncheckedRevision(stop_rev);
-            Some(index.check_revision(stop_rev).ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
-            })?)
-        } else {None};
-        let using_general_delta = args.get_item(py, 2)
-            .extract::<Option<u32>>(py)?
-            .map(|i| i != 0);
-        let (chain, stopped) = index.delta_chain(
-            rev, stop_rev, using_general_delta
-        ).map_err(|e| {
-            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-        })?;
-
-        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
-        Ok(
-            PyTuple::new(
-                py,
-                &[
-                    chain.into_py_object(py).into_object(),
-                    stopped.into_py_object(py).into_object()
-                ]
-            ).into_object()
-        )
-
-    }
-
-    /// slice planned chunk read to reach a density threshold
-    def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_slicechunktodensity(
-            py,
-            args.get_item(py, 0),
-            args.get_item(py, 1).extract(py)?,
-            args.get_item(py, 2).extract(py)?
-        )?;
-        Ok(rust_res)
-    }
-
-    // index_sequence_methods and index_mapping_methods.
-    //
-    // Since we call back through the high level Python API,
-    // there's no point making a distinction between index_get
-    // and index_getitem.
-    // gracinet 2023: this above is no longer true for the pure Rust impl
-
-    def __len__(&self) -> PyResult<usize> {
-        self.len(py)
-    }
-
-    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
-        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
-        Ok(rust_res)
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        // ObjectProtocol does not seem to provide contains(), so
-        // this is an equivalent implementation of the index_contains()
-        // defined in revlog.c
-        match item.extract::<i32>(py) {
-            Ok(rev) => {
-                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
-            }
-            Err(_) => {
-                let item_bytes: PyBytes = item.extract(py)?;
-                let rust_res = self.has_node(py, item_bytes)?;
-                Ok(rust_res)
-            }
-        }
-    }
-
-    def nodemap_data_all(&self) -> PyResult<PyBytes> {
-        self.inner_nodemap_data_all(py)
-    }
-
-    def nodemap_data_incremental(&self) -> PyResult<PyObject> {
-        self.inner_nodemap_data_incremental(py)
-    }
-    def update_nodemap_data(
-        &self,
-        docket: PyObject,
-        nm_data: PyObject
-    ) -> PyResult<PyObject> {
-        self.inner_update_nodemap_data(py, docket, nm_data)
-    }
-
-    @property
-    def entry_size(&self) -> PyResult<PyInt> {
-        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def rust_ext_compat(&self) -> PyResult<PyInt> {
-        // will be entirely removed when the Rust index yet useful to
-        // implement in Rust to detangle things when removing `self.cindex`
-        let rust_res: PyInt = 1.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def is_rust(&self) -> PyResult<PyBool> {
-        Ok(false.to_py_object(py))
-    }
-
-});
-
 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
 /// buffer along with the Rust slice into said buffer. We need to keep the
 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
@@ -538,7 +158,7 @@
         .extract::<PyBytes>(py)?
         .data(py)
         .try_into()
-        .unwrap();
+        .expect("nodeid should be set");
     let flags = (offset_or_flags & 0xFFFF) as u16;
     let data_offset = offset_or_flags >> 16;
     Ok(RevisionDataParams {
@@ -622,35 +242,1168 @@
     }
 }
 
-impl Index {
-    fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
-        // Safety: we keep the buffer around inside the class as `index_mmap`
-        let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
+// There are no static generics in Rust (because their implementation is hard,
+// I'm guessing it's due to different compilation stages, etc.).
+// So manually generate all three caches and use them in `with_filelog_cache`.
+static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> =
+    OnceLock::new();
+static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> =
+    OnceLock::new();
+static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> =
+    OnceLock::new();
+
+/// Cache the first conversion from Python -> Rust config for all filelogs to
+/// save on conversion time when called in a loop.
+fn with_filelog_cache<T: Copy>(
+    py: Python,
+    py_config: &PyObject,
+    revlog_type: RevlogType,
+    cache: &OnceLock<(PyObject, T)>,
+    callback: impl Fn() -> PyResult<T>,
+) -> PyResult<T> {
+    let mut was_cached = false;
+    if revlog_type == RevlogType::Filelog {
+        if let Some((cached_py_config, rust_config)) = cache.get() {
+            was_cached = true;
+            // All filelogs in a given repository *most likely* have the
+            // exact same config, but it's not impossible that some extensions
+            // do some magic with configs or that this code will be used
+            // for longer-running processes. So compare the source `PyObject`
+            // in case the source changed, at the cost of some overhead.
+            // We can't use `py_config.eq(cached_py_config)` because all config
+            // objects are different in Python and `a is b` is false.
+            if py_config.compare(py, cached_py_config)?.is_eq() {
+                return Ok(*rust_config);
+            }
+        }
+    }
+    let config = callback()?;
+    // Do not call the lock unnecessarily if it's already been set.
+    if !was_cached && revlog_type == RevlogType::Filelog {
+        cache.set((py_config.clone_ref(py), config)).ok();
+    }
+    Ok(config)
+}
+
+fn extract_delta_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDeltaConfig> {
+    let get_delta_config = || {
+        let max_deltachain_span = py_config
+            .getattr(py, "max_deltachain_span")?
+            .extract::<i64>(py)?;
+
+        let revlog_delta_config = RevlogDeltaConfig {
+            general_delta: py_config
+                .getattr(py, "general_delta")?
+                .extract(py)?,
+            sparse_revlog: py_config
+                .getattr(py, "sparse_revlog")?
+                .extract(py)?,
+            max_chain_len: py_config
+                .getattr(py, "max_chain_len")?
+                .extract(py)?,
+            max_deltachain_span: if max_deltachain_span < 0 {
+                None
+            } else {
+                Some(max_deltachain_span as u64)
+            },
+            upper_bound_comp: py_config
+                .getattr(py, "upper_bound_comp")?
+                .extract(py)?,
+            delta_both_parents: py_config
+                .getattr(py, "delta_both_parents")?
+                .extract(py)?,
+            candidate_group_chunk_size: py_config
+                .getattr(py, "candidate_group_chunk_size")?
+                .extract(py)?,
+            debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?,
+            lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?,
+            lazy_delta_base: py_config
+                .getattr(py, "lazy_delta_base")?
+                .extract(py)?,
+        };
+        Ok(revlog_delta_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DELTA_CONFIG_CACHE,
+        get_delta_config,
+    )
+}
+
+fn extract_data_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDataConfig> {
+    let get_data_config = || {
+        Ok(RevlogDataConfig {
+            try_pending: py_config.getattr(py, "try_pending")?.extract(py)?,
+            try_split: py_config.getattr(py, "try_split")?.extract(py)?,
+            check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?,
+            mmap_large_index: py_config
+                .getattr(py, "mmap_large_index")?
+                .extract(py)?,
+            mmap_index_threshold: py_config
+                .getattr(py, "mmap_index_threshold")?
+                .extract(py)?,
+            chunk_cache_size: py_config
+                .getattr(py, "chunk_cache_size")?
+                .extract(py)?,
+            uncompressed_cache_factor: py_config
+                .getattr(py, "uncompressed_cache_factor")?
+                .extract(py)?,
+            uncompressed_cache_count: py_config
+                .getattr(py, "uncompressed_cache_count")?
+                .extract(py)?,
+            with_sparse_read: py_config
+                .getattr(py, "with_sparse_read")?
+                .extract(py)?,
+            sr_density_threshold: py_config
+                .getattr(py, "sr_density_threshold")?
+                .extract(py)?,
+            sr_min_gap_size: py_config
+                .getattr(py, "sr_min_gap_size")?
+                .extract(py)?,
+            general_delta: py_config
+                .getattr(py, "generaldelta")?
+                .extract(py)?,
+        })
+    };
+
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DATA_CONFIG_CACHE,
+        get_data_config,
+    )
+}
 
-        Self::create_instance(
+fn extract_feature_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogFeatureConfig> {
+    let get_feature_config = || {
+        let engine_bytes = &py_config
+            .getattr(py, "compression_engine")?
+            .extract::<PyBytes>(py)?;
+        let compression_engine = engine_bytes.data(py);
+        let compression_engine = match compression_engine {
+            b"zlib" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zlib_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zlib.level"[..]));
+                let level = if let Some(level) = zlib_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    None
+                };
+                let mut engine = CompressionConfig::default();
+                if let Some(level) = level {
+                    engine
+                        .set_level(level)
+                        .expect("invalid compression level from Python");
+                }
+                engine
+            }
+            b"zstd" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zstd_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zstd.level"[..]));
+                let level = if let Some(level) = zstd_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    let level = compression_options
+                        .get_item(py, PyBytes::new(py, &b"level"[..]));
+                    if let Some(level) = level {
+                        if level.is_none(py) {
+                            None
+                        } else {
+                            Some(level.extract(py)?)
+                        }
+                    } else {
+                        None
+                    }
+                };
+                CompressionConfig::zstd(level)
+                    .expect("invalid compression level from Python")
+            }
+            b"none" => CompressionConfig::None,
+            e => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    format!(
+                        "invalid compression engine {}",
+                        String::from_utf8_lossy(e)
+                    ),
+                ))
+            }
+        };
+        let revlog_feature_config = RevlogFeatureConfig {
+            compression_engine,
+            censorable: py_config.getattr(py, "censorable")?.extract(py)?,
+            has_side_data: py_config
+                .getattr(py, "has_side_data")?
+                .extract(py)?,
+            compute_rank: py_config
+                .getattr(py, "compute_rank")?
+                .extract(py)?,
+            canonical_parent_order: py_config
+                .getattr(py, "canonical_parent_order")?
+                .extract(py)?,
+            enable_ellipsis: py_config
+                .getattr(py, "enable_ellipsis")?
+                .extract(py)?,
+        };
+        Ok(revlog_feature_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &FEATURE_CONFIG_CACHE,
+        get_feature_config,
+    )
+}
+
+fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr {
+    let msg = e.to_string();
+
+    match py
+        .import("mercurial.error")
+        .and_then(|m| m.get(py, "RevlogError"))
+    {
+        Err(e) => e,
+        Ok(cls) => {
+            let msg = PyBytes::new(py, msg.as_bytes());
+            PyErr::from_instance(
+                py,
+                cls.call(py, (msg,), None).ok().into_py_object(py),
+            )
+        }
+    }
+}
+
+py_class!(pub class ReadingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .enter_reading_context()
+            .map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow()
+                .inner(py)
+                .borrow()
+                .exit_reading_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .exit_reading_context();
+        Ok(py.None())
+    }
+});
+
+// Only used from Python *tests*
+py_class!(pub class PyFileHandle |py| {
+    data inner_file: RefCell<std::os::fd::RawFd>;
+
+    def tell(&self) -> PyResult<PyObject> {
+        let locals = PyDict::new(py);
+        locals.set_item(py, "os", py.import("os")?)?;
+        locals.set_item(py, "fd", *self.inner_file(py).borrow())?;
+        let f = py.eval("os.fdopen(fd)", None, Some(&locals))?;
+
+        // Prevent Python from closing the file after garbage collecting.
+        // This is fine since Rust is still holding on to the actual File.
+        // (and also because it's only used in tests).
+        std::mem::forget(f.clone_ref(py));
+
+        locals.set_item(py, "f", f)?;
+        let res = py.eval("f.tell()", None, Some(&locals))?;
+        Ok(res)
+    }
+});
+
+/// Wrapper around a Python transaction object, to keep `hg-core` oblivious
+/// of the fact it's being called from Python.
+pub struct PyTransaction {
+    inner: PyObject,
+}
+
+impl PyTransaction {
+    pub fn new(inner: PyObject) -> Self {
+        Self { inner }
+    }
+}
+
+impl Clone for PyTransaction {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl Transaction for PyTransaction {
+    fn add(&mut self, file: impl AsRef<std::path::Path>, offset: usize) {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref()));
+        self.inner
+            .call_method(py, "add", (file, offset), None)
+            .expect("transaction add failed");
+    }
+}
+
+py_class!(pub class WritingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+    data transaction: RefCell<PyTransaction>;
+    data data_end: Cell<Option<usize>>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .enter_writing_context(
+                self.data_end(py).get(),
+                &mut *self.transaction(py).borrow_mut()
+            ).map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow_mut()
+                .inner(py)
+                .borrow_mut()
+                .exit_writing_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .exit_writing_context();
+        Ok(py.None())
+    }
+});
+
+py_class!(pub class InnerRevlog |py| {
+    @shared data inner: CoreInnerRevlog;
+    data nt: RefCell<Option<CoreNodeTree>>;
+    data docket: RefCell<Option<PyObject>>;
+    // Holds a reference to the mmap'ed persistent nodemap data
+    data nodemap_mmap: RefCell<Option<PyBuffer>>;
+    // Holds a reference to the mmap'ed persistent index data
+    data index_mmap: RefCell<PyBuffer>;
+    data head_revs_py_list: RefCell<Option<PyList>>;
+    data head_node_ids_py_list: RefCell<Option<PyList>>;
+    data revision_cache: RefCell<Option<PyObject>>;
+
+    def __new__(
+        _cls,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        chunk_cache: PyObject,
+        default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        Self::inner_new(
             py,
-            hg::index::Index::new(
-                bytes,
-                IndexHeader::parse(&header.to_be_bytes())
-                    .expect("default header is broken"),
+            opener,
+            index_data,
+            index_file,
+            data_file,
+            sidedata_file,
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+            chunk_cache,
+            default_compression_header,
+            revlog_type
+        )
+    }
+
+    def clear_cache(&self) -> PyResult<PyObject> {
+        assert!(!self.is_delaying(py)?);
+        self.revision_cache(py).borrow_mut().take();
+        self.inner(py).borrow_mut().clear_cache();
+        Ok(py.None())
+    }
+
+    @property def canonical_index_file(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py).borrow().canonical_index_file();
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    @property def is_delaying(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_delaying())
+    }
+
+    @property def _revisioncache(&self) -> PyResult<PyObject> {
+        let cache = &*self.revision_cache(py).borrow();
+        match cache {
+            None => Ok(py.None()),
+            Some(cache) => {
+                Ok(cache.clone_ref(py))
+            }
+        }
+
+    }
+
+    @property def _writinghandles(&self) -> PyResult<PyObject> {
+        use std::os::fd::AsRawFd;
+
+        let inner = self.inner(py).borrow();
+        let handles = inner.python_writing_handles();
+
+        match handles.as_ref() {
+            None => Ok(py.None()),
+            Some(handles) => {
+                let d_handle = if let Some(d_handle) = &handles.data_handle {
+                    let handle = RefCell::new(d_handle.file.as_raw_fd());
+                    Some(PyFileHandle::create_instance(py, handle)?)
+                } else {
+                    None
+                };
+                let handle =
+                    RefCell::new(handles.index_handle.file.as_raw_fd());
+                Ok(
+                    (
+                        PyFileHandle::create_instance(py, handle)?,
+                        d_handle,
+                        py.None(),  // Sidedata handle
+
+                    ).to_py_object(py).into_object()
+                )
+            }
+        }
+
+    }
+
+    @_revisioncache.setter def set_revision_cache(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        *self.revision_cache(py).borrow_mut() = value.clone_ref(py);
+        match value {
+            None => {
+                // This means the property has been deleted, *not* that the
+                // property has been set to `None`. Whatever happens is up
+                // to the implementation. Here we just set it to `None`.
+                self
+                    .inner(py)
+                    .borrow()
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held")
+                    .take();
+            },
+            Some(tuple) => {
+                if tuple.is_none(py) {
+                    self
+                        .inner(py)
+                        .borrow()
+                        .last_revision_cache
+                        .lock()
+                        .expect("lock should not be held")
+                        .take();
+                    return Ok(())
+                }
+                let node = tuple.get_item(py, 0)?.extract::<PyBytes>(py)?;
+                let node = node_from_py_bytes(py, &node)?;
+                let rev = tuple.get_item(py, 1)?.extract::<BaseRevision>(py)?;
+                // Ok because Python only sets this if the revision has been
+                // checked
+                let rev = Revision(rev);
+                let data = tuple.get_item(py, 2)?.extract::<PyBytes>(py)?;
+                let inner = self.inner(py).borrow();
+                let mut last_revision_cache = inner
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held");
+                *last_revision_cache =
+                    Some((node, rev, Box::new(PyBytesDeref::new(py, data))));
+            }
+        }
+        Ok(())
+    }
+
+    @property def inline(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_inline())
+    }
+
+    @inline.setter def set_inline(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        if let Some(v) = value {
+            self.inner(py).borrow_mut().inline = v.extract(py)?;
+        };
+        Ok(())
+    }
+
+    @property def index_file(&self) -> PyResult<PyBytes> {
+        Ok(
+            PyBytes::new(
+                py,
+                &get_bytes_from_path(&self.inner(py).borrow().index_file)
             )
-            .map_err(|e| {
-                revlog_error_with_msg(py, e.to_string().as_bytes())
-            })?,
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(Some(buf)),
-            RefCell::new(None),
-            RefCell::new(None),
         )
     }
 
+    @index_file.setter def set_index_file(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        let path = get_path_from_bytes(
+            value
+                .expect("don't delete the index path")
+                .extract::<PyBytes>(py)?
+                .data(py)
+        ).to_owned();
+        self.inner(py).borrow_mut().index_file = path;
+        Ok(())
+    }
+
+    @property def is_writing(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_writing())
+    }
+
+    @property def is_open(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_open())
+    }
+
+    def issnapshot(&self, rev: PyRevision) -> PyResult<bool> {
+        self.inner_issnapshot(py, UncheckedRevision(rev.0))
+    }
+
+    def _deltachain(&self, *args, **kw) -> PyResult<PyObject> {
+        let inner = self.inner(py).borrow();
+        let general_delta = inner.index.uses_generaldelta();
+        let args = PyTuple::new(
+            py,
+            &[
+                args.get_item(py, 0),
+                kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py),
+                general_delta.to_py_object(py).into_object(),
+            ]
+        );
+        self._index_deltachain(py, &args, kw)
+    }
+
+    def compress(&self, data: PyObject) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let py_buffer = PyBuffer::get(py, &data)?;
+        let deref = PyBufferDeref::new(py, py_buffer)?;
+        let compressed = inner.compress(&deref)
+        .map_err(|e| revlog_error_from_msg(py, e))?;
+        let compressed = compressed.as_deref();
+        let header = if compressed.is_some() {
+            PyBytes::new(py, &b""[..])
+        } else {
+            PyBytes::new(py, &b"u"[..])
+        };
+        Ok(
+            (
+                header,
+                PyBytes::new(py, compressed.unwrap_or(&deref))
+            ).to_py_object(py)
+        )
+    }
+
+    def reading(&self) -> PyResult<ReadingContextManager> {
+        ReadingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+        )
+    }
+
+    def writing(
+        &self,
+        transaction: PyObject,
+        data_end: Option<usize>,
+        sidedata_end: Option<usize>,
+    ) -> PyResult<WritingContextManager> {
+        // Silence unused argument (only relevant for changelog v2)
+        let _ = sidedata_end;
+        WritingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+            RefCell::new(PyTransaction::new(transaction)),
+            Cell::new(data_end)
+        )
+    }
+
+    def split_inline(
+        &self,
+        _tr: PyObject,
+        header: i32,
+        new_index_file_path: Option<PyObject>
+    ) -> PyResult<PyBytes> {
+        let mut inner = self.inner(py).borrow_mut();
+        let new_index_file_path = match new_index_file_path {
+            Some(path) => {
+                let path = path.extract::<PyBytes>(py)?;
+                Some(get_path_from_bytes(path.data(py)).to_owned())
+            },
+            None => None,
+        };
+        let header = hg::index::IndexHeader::parse(&header.to_be_bytes());
+        let header = header.expect("invalid header bytes");
+        let path = inner
+            .split_inline(header, new_index_file_path)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    def get_segment_for_revs(
+        &self,
+        startrev: PyRevision,
+        endrev: PyRevision,
+    ) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let (offset, data) = inner
+            .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data = PyBytes::new(py, &data);
+        Ok((offset, data).to_py_object(py))
+    }
+
+    def raw_text(
+        &self,
+        _node: PyObject,
+        rev: PyRevision
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let mut py_bytes = PyBytes::new(py, &[]);
+        inner
+            .raw_text(Revision(rev.0), |size, f| {
+                py_bytes = with_pybytes_buffer(py, size, f)?;
+                Ok(())
+            }).map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(py_bytes)
+    }
+
+    def _chunk(
+        &self,
+        rev: PyRevision,
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let chunk = inner
+            .chunk_for_rev(Revision(rev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let chunk = PyBytes::new(py, &chunk);
+        Ok(chunk)
+    }
+
+    def write_entry(
+        &self,
+        transaction: PyObject,
+        entry: PyObject,
+        data: PyTuple,
+        _link: PyObject,
+        offset: usize,
+        _sidedata: PyObject,
+        _sidedata_offset: PyInt,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+        _sidedata_end: Option<PyInt>,
+    ) -> PyResult<PyTuple> {
+        let mut inner = self.inner(py).borrow_mut();
+        let transaction = PyTransaction::new(transaction);
+        let py_bytes = entry.extract(py)?;
+        let entry = PyBytesDeref::new(py, py_bytes);
+        let header = data.get_item(py, 0).extract::<PyBytes>(py)?;
+        let header = header.data(py);
+        let data = data.get_item(py, 1);
+        let py_bytes = data.extract(py)?;
+        let data = PyBytesDeref::new(py, py_bytes);
+        Ok(
+            inner.write_entry(
+                transaction,
+                &entry,
+                (header, &data),
+                offset,
+                index_end,
+                data_end
+            ).map_err(|e| revlog_error_from_msg(py, e))?
+             .to_py_object(py)
+        )
+    }
+
+    def delay(&self) -> PyResult<Option<PyBytes>> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .delay()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p))))
+    }
+
+    def write_pending(&self) -> PyResult<PyTuple> {
+        let (path, any_pending) = self.inner(py)
+            .borrow_mut()
+            .write_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let maybe_path = match path {
+            Some(path) => {
+                PyBytes::new(py, &get_bytes_from_path(path)).into_object()
+            },
+            None => {
+                py.None()
+            }
+        };
+        Ok(
+            (
+                maybe_path,
+                any_pending
+            ).to_py_object(py)
+        )
+    }
+
+    def finalize_pending(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .finalize_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    // -- forwarded index methods --
+
+    def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let ridx = &self.inner(py).borrow().index;
+        let node = node_from_py_bytes(py, &node)?;
+        let rust_rev =
+            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
+        Ok(rust_rev.map(Into::into))
+    }
+
+    /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node
+    /// is not found.
+    ///
+    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+    /// will catch and rewrap with it
+    def _index_rev(&self, node: PyBytes) -> PyResult<PyRevision> {
+        self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py))
+    }
+
+    /// return True if the node exist in the index
+    def _index_has_node(&self, node: PyBytes) -> PyResult<bool> {
+        // TODO OPTIM we could avoid a needless conversion here,
+        // to do when scaffolding for pure Rust switch is removed,
+        // as `_index_get_rev()` currently does the necessary assertions
+        self._index_get_rev(py, node).map(|opt| opt.is_some())
+    }
+
+    /// find length of shortest hex nodeid of a binary ID
+    def _index_shortest(&self, node: PyBytes) -> PyResult<usize> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
+        {
+            Ok(Some(l)) => Ok(l),
+            Ok(None) => Err(revlog_error(py)),
+            Err(e) => Err(nodemap_error(py, e)),
+        }
+    }
+
+    def _index_partialmatch(
+        &self,
+        node: PyObject
+    ) -> PyResult<Option<PyBytes>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+
+        let node = node.extract::<PyBytes>(py)?;
+        let node_as_string = String::from_utf8_lossy(node.data(py));
+
+        let prefix = NodePrefix::from_hex(node_as_string.to_string())
+            .map_err(|_| PyErr::new::<ValueError, _>(
+                py, format!("Invalid node or prefix '{}'", node_as_string))
+            )?;
+
+        nt.find_bin(idx, prefix)
+            // TODO make an inner API returning the node directly
+            .map(|opt| opt.map(|rev| {
+                    PyBytes::new(
+                        py,
+                        idx.node(rev).expect("node should exist").as_bytes()
+                    )
+            }))
+            .map_err(|e| nodemap_error(py, e))
+
+    }
+
+    /// append an index entry
+    def _index_append(&self, tup: PyTuple) -> PyResult<PyObject> {
+        if tup.len(py) < 8 {
+            // this is better than the panic promised by tup.get_item()
+            return Err(
+                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
+        }
+        let node_bytes = tup.get_item(py, 7).extract(py)?;
+        let node = node_from_py_object(py, &node_bytes)?;
+
+        let rev = self.len(py)? as BaseRevision;
+
+        // This is ok since we will just add the revision to the index
+        let rev = Revision(rev);
+        self.inner(py)
+            .borrow_mut()
+            .index
+            .append(py_tuple_to_revision_data_params(py, tup)?)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let idx = &self.inner(py).borrow().index;
+        self.get_nodetree(py)?
+            .borrow_mut()
+            .as_mut()
+            .expect("nodetree should be set")
+            .insert(idx, &node, rev)
+            .map_err(|e| nodemap_error(py, e))?;
+        Ok(py.None())
+    }
+
+    def _index___delitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
+        let start = if let Ok(rev) = key.extract(py) {
+            UncheckedRevision(rev)
+        } else {
+            let start = key.getattr(py, "start")?;
+            UncheckedRevision(start.extract(py)?)
+        };
+        let mut borrow = self.inner(py).borrow_mut();
+        let start = borrow
+            .index
+            .check_revision(start)
+            .ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
+            })?;
+        borrow.index
+            .remove(start)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        drop(borrow);
+        let mut opt = self.get_nodetree(py)?.borrow_mut();
+        let nt = opt.as_mut().expect("nodetree should be set");
+        nt.invalidate_all();
+        self.fill_nodemap(py, nt)?;
+        Ok(py.None())
+    }
+
+    /// return the gca set of the given revs
+    def _index_ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_ancestors(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// return the heads of the common ancestors of the given revs
+    def _index_commonancestorsheads(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let rust_res = self.inner_commonancestorsheads(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// Clear the index caches and inner py_class data.
+    /// It is Python's responsibility to call `update_nodemap_data` again.
+    def _index_clearcaches(&self) -> PyResult<PyObject> {
+        self.nt(py).borrow_mut().take();
+        self.docket(py).borrow_mut().take();
+        self.nodemap_mmap(py).borrow_mut().take();
+        self.head_revs_py_list(py).borrow_mut().take();
+        self.head_node_ids_py_list(py).borrow_mut().take();
+        self.inner(py).borrow_mut().index.clear_caches();
+        Ok(py.None())
+    }
+
+    /// return the raw binary string representing a revision
+    def _index_entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        let rust_bytes = rindex.check_revision(rev).and_then(
+            |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev)
+        )?;
+        let rust_res = PyBytes::new(py, rust_bytes).into_object();
+        Ok(rust_res)
+    }
+
+
+    /// return a binary packed version of the header
+    def _index_pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
+        let rust_res = PyBytes::new(py, &packed).into_object();
+        Ok(rust_res)
+    }
+
+    /// compute phases
+    def _index_computephasesmapsets(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
+        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
+        Ok(rust_res)
+    }
+
+    /// reachableroots
+    def _index_reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_reachableroots2(
+            py,
+            UncheckedRevision(args.get_item(py, 0).extract(py)?),
+            args.get_item(py, 1),
+            args.get_item(py, 2),
+            args.get_item(py, 3).extract(py)?,
+        )?;
+        Ok(rust_res)
+    }
+
+    /// get head revisions
+    def _index_headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
+        let (filtered_revs, stop_rev) = match &args.len(py) {
+             0 => Ok((py.None(), py.None())),
+             1 => Ok((args.get_item(py, 0), py.None())),
+             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
+             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
+        }?;
+        self.inner_headrevs(py, &filtered_revs, &stop_rev)
+    }
+
+    /// get head nodeids
+    def _index_head_node_ids(&self) -> PyResult<PyObject> {
+        let rust_res = self.inner_head_node_ids(py)?;
+        Ok(rust_res)
+    }
+
+    /// get diff in head revisions
+    def _index_headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_headrevsdiff(
+          py,
+          &args.get_item(py, 0),
+          &args.get_item(py, 1))?;
+        Ok(rust_res)
+    }
+
+    /// True if the object is a snapshot
+    def _index_issnapshot(&self, *args, **_kw) -> PyResult<bool> {
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        self.inner_issnapshot(py, rev)
+    }
+
+    /// Gather snapshot data in a cache dict
+    def _index_findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let cache: PyDict = args.get_item(py, 0).extract(py)?;
+        // this methods operates by setting new values in the cache,
+        // hence we will compare results by letting the C implementation
+        // operate over a deepcopy of the cache, and finally compare both
+        // caches.
+        let c_cache = PyDict::new(py);
+        for (k, v) in cache.items(py) {
+            c_cache.set_item(py, k, PySet::new(py, v)?)?;
+        }
+
+        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
+        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
+        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
+        index.find_snapshots(
+            start_rev,
+            end_rev,
+            &mut cache_wrapper,
+        ).map_err(|_| revlog_error(py))?;
+        Ok(py.None())
+    }
+
+    /// determine revisions with deltas to reconstruct fulltext
+    def _index_deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
+        let stop_rev =
+            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
+        let rev = index.check_revision(rev).ok_or_else(|| {
+            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
+        })?;
+        let stop_rev = if let Some(stop_rev) = stop_rev {
+            let stop_rev = UncheckedRevision(stop_rev);
+            Some(index.check_revision(stop_rev).ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
+            })?)
+        } else {None};
+        let using_general_delta = args.get_item(py, 2)
+            .extract::<Option<u32>>(py)?
+            .map(|i| i != 0);
+        let (chain, stopped) = index.delta_chain(
+            rev, stop_rev, using_general_delta
+        ).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+
+        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
+        Ok(
+            PyTuple::new(
+                py,
+                &[
+                    chain.into_py_object(py).into_object(),
+                    stopped.into_py_object(py).into_object()
+                ]
+            ).into_object()
+        )
+    }
+
+    /// slice planned chunk read to reach a density threshold
+    def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_slicechunktodensity(
+            py,
+            args.get_item(py, 0),
+            args.get_item(py, 1).extract(py)?,
+            args.get_item(py, 2).extract(py)?
+        )?;
+        Ok(rust_res)
+    }
+
+    def _index___len__(&self) -> PyResult<usize> {
+        self.len(py)
+    }
+
+    def _index___getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+        Ok(rust_res)
+    }
+
+    def _index___contains__(&self, item: PyObject) -> PyResult<bool> {
+        // ObjectProtocol does not seem to provide contains(), so
+        // this is an equivalent implementation of the index_contains()
+        // defined in revlog.c
+        match item.extract::<i32>(py) {
+            Ok(rev) => {
+                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
+            }
+            Err(_) => {
+                let item_bytes: PyBytes = item.extract(py)?;
+                let rust_res = self._index_has_node(py, item_bytes)?;
+                Ok(rust_res)
+            }
+        }
+    }
+
+    def _index_nodemap_data_all(&self) -> PyResult<PyBytes> {
+        self.inner_nodemap_data_all(py)
+    }
+
+    def _index_nodemap_data_incremental(&self) -> PyResult<PyObject> {
+        self.inner_nodemap_data_incremental(py)
+    }
+
+    def _index_update_nodemap_data(
+        &self,
+        docket: PyObject,
+        nm_data: PyObject
+    ) -> PyResult<PyObject> {
+        self.inner_update_nodemap_data(py, docket, nm_data)
+    }
+
+    @property
+    def _index_entry_size(&self) -> PyResult<PyInt> {
+        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_rust_ext_compat(&self) -> PyResult<PyInt> {
+        // will be entirely removed when the Rust index yet useful to
+        // implement in Rust to detangle things when removing `self.cindex`
+        let rust_res: PyInt = 1.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_is_rust(&self) -> PyResult<PyBool> {
+        Ok(false.to_py_object(py))
+    }
+
+
+});
+
+/// Forwarded index methods?
+impl InnerRevlog {
     fn len(&self, py: Python) -> PyResult<usize> {
-        let rust_index_len = self.index(py).borrow().len();
+        let rust_index_len = self.inner(py).borrow().index.len();
         Ok(rust_index_len)
     }
-
     /// This is scaffolding at this point, but it could also become
     /// a way to start a persistent nodemap or perform a
     /// vacuum / repack operation
@@ -659,11 +1412,11 @@
         py: Python,
         nt: &mut CoreNodeTree,
     ) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         for r in 0..self.len(py)? {
             let rev = Revision(r as BaseRevision);
             // in this case node() won't ever return None
-            nt.insert(&*index, index.node(rev).unwrap(), rev)
+            nt.insert(index, index.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
         Ok(py.None())
@@ -684,7 +1437,11 @@
 
     /// Returns the full nodemap bytes to be written as-is to disk
     fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
-        let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let nodemap = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
 
         // If there's anything readonly, we need to build the data again from
@@ -717,7 +1474,11 @@
             None => return Ok(py.None()),
         };
 
-        let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let node_tree = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let masked_blocks = node_tree.masked_readonly_blocks();
         let (_, data) = node_tree.into_readonly_and_added_bytes();
         let changed = masked_blocks * std::mem::size_of::<Block>();
@@ -747,7 +1508,7 @@
             .extract::<BaseRevision>(py)?
             .into();
         self.docket(py).borrow_mut().replace(docket.clone_ref(py));
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
             nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
         })?;
@@ -756,7 +1517,7 @@
         for r in (data_tip.0 + 1)..current_tip as BaseRevision {
             let rev = Revision(r);
             // in this case node() won't ever return None
-            nt.insert(&*idx, idx.node(rev).unwrap(), rev)
+            nt.insert(idx, idx.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
 
@@ -766,7 +1527,7 @@
     }
 
     fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         Ok(match key.extract::<BaseRevision>(py) {
             Ok(key_as_int) => {
                 let entry_params = if key_as_int == NULL_REVISION.0 {
@@ -786,15 +1547,17 @@
                 revision_data_params_to_py_tuple(py, entry_params)
                     .into_object()
             }
-            _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
-                || py.None(),
-                |py_rev| py_rev.into_py_object(py).into_object(),
-            ),
+            _ => self
+                ._index_get_rev(py, key.extract::<PyBytes>(py)?)?
+                .map_or_else(
+                    || py.None(),
+                    |py_rev| py_rev.into_py_object(py).into_object(),
+                ),
         })
     }
 
     fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
 
         // We don't use the shortcut here, as it's actually slower to loop
         // through the cached `PyList` than to re-do the whole computation for
@@ -826,7 +1589,7 @@
         filtered_revs: &PyObject,
         stop_rev: &PyObject,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let stop_rev = if stop_rev.is_none(py) {
             None
         } else {
@@ -899,7 +1662,7 @@
     ) -> PyResult<PyObject> {
         let begin = begin.extract::<BaseRevision>(py)?;
         let end = end.extract::<BaseRevision>(py)?;
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let begin =
             Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
         let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
@@ -918,7 +1681,7 @@
         new_heads: &[Revision],
         py: Python<'_>,
     ) -> PyList {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let as_vec: Vec<PyObject> = new_heads
             .iter()
             .map(|r| {
@@ -958,7 +1721,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .ancestors(&revs)
@@ -974,7 +1737,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .common_ancestor_heads(&revs)
@@ -990,7 +1753,7 @@
         py: Python,
         py_roots: PyDict,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
             .items_list(py)
             .iter(py)
@@ -1037,7 +1800,7 @@
         target_density: f64,
         min_gap_size: usize,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
         let as_nested_vec =
             index.slice_chunk_to_density(&revs, target_density, min_gap_size);
@@ -1069,7 +1832,7 @@
         roots: PyObject,
         include_path: bool,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
             PyErr::new::<IndexError, _>(py, "head out of range")
         })?;
@@ -1091,6 +1854,84 @@
             .collect();
         Ok(PyList::new(py, &as_vec).into_object())
     }
+    fn inner_issnapshot(
+        &self,
+        py: Python,
+        rev: UncheckedRevision,
+    ) -> PyResult<bool> {
+        let inner = &self.inner(py).borrow();
+        let index = &self.inner(py).borrow().index;
+        let rev = index
+            .check_revision(rev)
+            .ok_or_else(|| rev_not_in_index(py, rev))?;
+        let result = inner.is_snapshot(rev).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+        Ok(result)
+    }
+}
+
+impl InnerRevlog {
+    pub fn inner_new(
+        py: Python,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        _sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        _chunk_cache: PyObject,
+        _default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        let vfs = Box::new(PyVfs::new(py, opener)?);
+        let index_file =
+            get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let data_file =
+            get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let revlog_type = RevlogType::try_from(revlog_type)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data_config = extract_data_config(py, data_config, revlog_type)?;
+        let delta_config =
+            extract_delta_config(py, delta_config, revlog_type)?;
+        let feature_config =
+            extract_feature_config(py, feature_config, revlog_type)?;
+        let options = RevlogOpenOptions::new(
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        // Safety: we keep the buffer around inside the class as `index_mmap`
+        let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
+        let index = hg::index::Index::new(bytes, options.index_header())
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let core = CoreInnerRevlog::new(
+            vfs,
+            index,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        Self::create_instance(
+            py,
+            core,
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(buf),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+        )
+    }
 }
 
 py_class!(pub class NodeTree |py| {
@@ -1111,7 +1952,7 @@
     /// (generation-based guard), same as iterating on a `dict` that has
     /// been meanwhile mutated.
     def is_invalidated(&self) -> PyResult<bool> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let result = unsafe { leaked.try_borrow(py) };
         // two cases for result to be an error:
@@ -1123,7 +1964,7 @@
     }
 
     def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1135,7 +1976,7 @@
             return Err(rev_not_in_index(py, rev.into()))
         }
 
-        let entry = index.inner.get_entry(rev).unwrap();
+        let entry = index.inner.get_entry(rev).expect("entry should exist");
         let mut nt = self.nt(py).borrow_mut();
         nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
 
@@ -1158,7 +1999,7 @@
             )?;
 
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1170,7 +2011,7 @@
 
     def shortest(&self, node: PyBytes) -> PyResult<usize> {
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let idx = &*unsafe { leaked.try_borrow(py)? };
         match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
@@ -1182,6 +2023,120 @@
     }
 });
 
+fn panic_after_error(_py: Python) -> ! {
+    unsafe {
+        python3_sys::PyErr_Print();
+    }
+    panic!("Python API called failed");
+}
+
+/// # Safety
+///
+/// Don't call this. Its only caller is taken from `PyO3`.
+unsafe fn cast_from_owned_ptr_or_panic<T>(
+    py: Python,
+    p: *mut python3_sys::PyObject,
+) -> T
+where
+    T: cpython::PythonObjectWithCheckedDowncast,
+{
+    if p.is_null() {
+        panic_after_error(py);
+    } else {
+        PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+    }
+}
+
+fn with_pybytes_buffer<F>(
+    py: Python,
+    len: usize,
+    init: F,
+) -> Result<PyBytes, RevlogError>
+where
+    F: FnOnce(
+        &mut dyn RevisionBuffer<Target = PyBytes>,
+    ) -> Result<(), RevlogError>,
+{
+    // Largely inspired by code in PyO3
+    // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with
+    unsafe {
+        let pyptr = python3_sys::PyBytes_FromStringAndSize(
+            std::ptr::null(),
+            len as python3_sys::Py_ssize_t,
+        );
+        let pybytes = cast_from_owned_ptr_or_panic::<PyBytes>(py, pyptr);
+        let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast();
+        debug_assert!(!buffer.is_null());
+        let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len);
+        // Initialise the bytestring in init
+        // If init returns an Err, the buffer is deallocated by `pybytes`
+        init(&mut rev_buf).map(|_| rev_buf.finish())
+    }
+}
+
+/// Wrapper around a Python-provided buffer into which the revision contents
+/// will be written. Done for speed in order to save a large allocation + copy.
+struct PyRevisionBuffer {
+    py_bytes: PyBytes,
+    _buf: *mut u8,
+    len: usize,
+    current_buf: *mut u8,
+    current_len: usize,
+}
+
+impl PyRevisionBuffer {
+    /// # Safety
+    ///
+    /// `buf` should be the start of the allocated bytes of `bytes`, and `len`
+    /// exactly the length of said allocated bytes.
+    #[inline]
+    unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self {
+        Self {
+            py_bytes: bytes,
+            _buf: buf,
+            len,
+            current_len: 0,
+            current_buf: buf,
+        }
+    }
+
+    /// Number of bytes that have been copied to. Will be different to the
+    /// total allocated length of the buffer unless the revision is done being
+    /// written.
+    #[inline]
+    fn current_len(&self) -> usize {
+        self.current_len
+    }
+}
+
+impl RevisionBuffer for PyRevisionBuffer {
+    type Target = PyBytes;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        assert!(self.current_len + slice.len() <= self.len);
+        unsafe {
+            // We cannot use `copy_from_nonoverlapping` since it's *possible*
+            // to create a slice from the same Python memory region using
+            // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway?
+            self.current_buf.copy_from(slice.as_ptr(), slice.len());
+            self.current_buf = self.current_buf.add(slice.len());
+        }
+        self.current_len += slice.len()
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        // catch unzeroed bytes before it becomes undefined behavior
+        assert_eq!(
+            self.current_len(),
+            self.len,
+            "not enough bytes read for revision"
+        );
+        self.py_bytes
+    }
+}
+
 fn revlog_error(py: Python) -> PyErr {
     match py
         .import("mercurial.error")
@@ -1195,21 +2150,6 @@
     }
 }
 
-fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
-    match py
-        .import("mercurial.error")
-        .and_then(|m| m.get(py, "RevlogError"))
-    {
-        Err(e) => e,
-        Ok(cls) => PyErr::from_instance(
-            py,
-            cls.call(py, (PyBytes::new(py, msg),), None)
-                .ok()
-                .into_py_object(py),
-        ),
-    }
-}
-
 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
     // ParentOutOfRange is currently the only alternative
     // in `hg::GraphError`. The C index always raises this simple ValueError.
@@ -1249,8 +2189,8 @@
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "RevLog - Rust implementations")?;
 
-    m.add_class::<Index>(py)?;
     m.add_class::<NodeTree>(py)?;
+    m.add_class::<InnerRevlog>(py)?;
 
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/vfs.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -0,0 +1,289 @@
+use std::{
+    cell::Cell,
+    fs::File,
+    io::Error,
+    os::fd::{AsRawFd, FromRawFd},
+    path::{Path, PathBuf},
+};
+
+use cpython::{
+    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use hg::{
+    errors::{HgError, IoResultExt},
+    exit_codes,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    vfs::Vfs,
+};
+
+/// Wrapper around a Python VFS object to call back into Python from `hg-core`.
+pub struct PyVfs {
+    inner: PyObject,
+}
+
+impl Clone for PyVfs {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl PyVfs {
+    pub fn new(_py: Python, py_vfs: PyObject) -> PyResult<Self> {
+        Ok(Self { inner: py_vfs })
+    }
+
+    fn inner_open(
+        &self,
+        filename: &Path,
+        create: bool,
+        check_ambig: bool,
+        atomic_temp: bool,
+        write: bool,
+    ) -> Result<(File, Option<PathBuf>), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let mode = if atomic_temp {
+            PyBytes::new(py, b"w")
+        } else if create {
+            PyBytes::new(py, b"w+")
+        } else if write {
+            PyBytes::new(py, b"r+")
+        } else {
+            PyBytes::new(py, b"rb")
+        };
+        let res = self.inner.call(
+            py,
+            (
+                PyBytes::new(py, &get_bytes_from_path(filename)),
+                mode,
+                atomic_temp,
+                check_ambig,
+            ),
+            None,
+        );
+        match res {
+            Ok(tup) => {
+                let tup = tup
+                    .extract::<PyTuple>(py)
+                    .map_err(|e| vfs_error("vfs did not return a tuple", e))?;
+                let fileno = tup.get_item(py, 0).extract(py).map_err(|e| {
+                    vfs_error("vfs did not return a valid fileno", e)
+                })?;
+                let temp_name = tup.get_item(py, 1);
+                // Safety: this must be a valid owned file descriptor, and
+                // Python has just given it to us, it will only exist here now
+                let file = unsafe { File::from_raw_fd(fileno) };
+                let temp_name = if atomic_temp {
+                    Some(
+                        get_path_from_bytes(
+                            temp_name
+                                .extract::<PyBytes>(py)
+                                .map_err(|e| vfs_error("invalid tempname", e))?
+                                .data(py),
+                        )
+                        .to_owned(),
+                    )
+                } else {
+                    None
+                };
+                Ok((file, temp_name))
+            }
+            Err(mut e) => {
+                // TODO surely there is a better way of comparing
+                if e.instance(py).get_type(py).name(py) == "FileNotFoundError"
+                {
+                    return Err(HgError::IoError {
+                        error: Error::new(
+                            std::io::ErrorKind::NotFound,
+                            e.instance(py).to_string(),
+                        ),
+                        context: hg::errors::IoErrorContext::ReadingFile(
+                            filename.to_owned(),
+                        ),
+                    });
+                }
+                Err(vfs_error("failed to call opener", e))
+            }
+        }
+    }
+}
+
+fn vfs_error(reason: impl Into<String>, mut error: PyErr) -> HgError {
+    let gil = &Python::acquire_gil();
+    let py = gil.python();
+    HgError::abort(
+        format!("{}: {}", reason.into(), error.instance(py)),
+        exit_codes::ABORT,
+        None,
+    )
+}
+
+py_class!(pub class PyFile |py| {
+    data number: Cell<i32>;
+
+    def fileno(&self) -> PyResult<PyInt> {
+        Ok(self.number(py).get().to_py_object(py))
+    }
+});
+
+impl Vfs for PyVfs {
+    fn open(&self, filename: &Path) -> Result<File, HgError> {
+        self.inner_open(filename, false, false, false, true)
+            .map(|(f, _)| f)
+    }
+    fn open_read(&self, filename: &Path) -> Result<File, HgError> {
+        self.inner_open(filename, false, false, false, false)
+            .map(|(f, _)| f)
+    }
+
+    fn open_check_ambig(
+        &self,
+        filename: &Path,
+    ) -> Result<std::fs::File, HgError> {
+        self.inner_open(filename, false, true, false, true)
+            .map(|(f, _)| f)
+    }
+
+    fn create(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+        self.inner_open(filename, true, false, false, true)
+            .map(|(f, _)| f)
+    }
+
+    fn create_atomic(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<hg::vfs::AtomicFile, HgError> {
+        self.inner_open(filename, true, false, true, true).map(
+            |(fp, temp_name)| {
+                hg::vfs::AtomicFile::new(
+                    fp,
+                    check_ambig,
+                    temp_name.expect("temp name should exist"),
+                    filename.to_owned(),
+                )
+            },
+        )
+    }
+
+    fn file_size(&self, file: &File) -> Result<u64, HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let raw_fd = file.as_raw_fd();
+        let py_fd = PyFile::create_instance(py, Cell::new(raw_fd))
+            .expect("create_instance cannot fail");
+        let fstat = self
+            .inner
+            .call_method(py, "fstat", (py_fd,), None)
+            .map_err(|e| {
+                vfs_error(format!("failed to fstat fd '{}'", raw_fd), e)
+            })?;
+        fstat
+            .getattr(py, "st_size")
+            .map(|v| {
+                v.extract(py).map_err(|e| {
+                    vfs_error(format!("invalid size for fd '{}'", raw_fd), e)
+                })
+            })
+            .map_err(|e| {
+                vfs_error(format!("failed to get size of fd '{}'", raw_fd), e)
+            })?
+    }
+
+    fn exists(&self, filename: &Path) -> bool {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        self.inner
+            .call_method(
+                py,
+                "exists",
+                (PyBytes::new(py, &get_bytes_from_path(filename)),),
+                None,
+            )
+            .unwrap_or_else(|_| false.into_py_object(py).into_object())
+            .extract(py)
+            .unwrap()
+    }
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        if let Err(e) = self.inner.call_method(
+            py,
+            "unlink",
+            (PyBytes::new(py, &get_bytes_from_path(filename)),),
+            None,
+        ) {
+            return Err(vfs_error(
+                format!("failed to unlink '{}'", filename.display()),
+                e,
+            ));
+        }
+        Ok(())
+    }
+
+    fn rename(
+        &self,
+        from: &Path,
+        to: &Path,
+        check_ambig: bool,
+    ) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let kwargs = PyDict::new(py);
+        kwargs
+            .set_item(py, "checkambig", check_ambig)
+            .map_err(|e| vfs_error("dict setitem failed", e))?;
+        if let Err(e) = self.inner.call_method(
+            py,
+            "rename",
+            (
+                PyBytes::new(py, &get_bytes_from_path(from)),
+                PyBytes::new(py, &get_bytes_from_path(to)),
+            ),
+            Some(&kwargs),
+        ) {
+            let msg = format!(
+                "failed to rename '{}' to '{}'",
+                from.display(),
+                to.display()
+            );
+            return Err(vfs_error(msg, e));
+        }
+        Ok(())
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let from = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(from)),),
+                None,
+            )
+            .unwrap();
+        let from = from.extract::<PyBytes>(py).unwrap();
+        let from = get_path_from_bytes(from.data(py));
+        let to = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(to)),),
+                None,
+            )
+            .unwrap();
+        let to = to.extract::<PyBytes>(py).unwrap();
+        let to = get_path_from_bytes(to.data(py));
+        std::fs::copy(from, to).when_writing_file(to)?;
+        Ok(())
+    }
+}
--- a/tests/test-bundle.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-bundle.t	Wed Jun 19 19:10:49 2024 +0200
@@ -294,7 +294,7 @@
   packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 (no-rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| (no-rust !)
   0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| (no-rust !)
-  packed.hg: size=3181, sha1=b202787710a1c109246554be589506cd2916acb7 (rust !)
+  packed.hg: size=3181, sha1=3e865df183d388222969c5b19c844dd8697c85c6 (rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 09 00 00 |HGS1UN..........| (rust !)
   0010: 00 00 00 00 0b 67 00 3b 67 65 6e 65 72 61 6c 64 |.....g.;generald| (rust !)
   0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 2d 63 6f 6d 70 |elta,revlog-comp|
--- a/tests/test-contrib-perf.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-contrib-perf.t	Wed Jun 19 19:10:49 2024 +0200
@@ -283,8 +283,15 @@
 #if reporevlogstore
   $ hg perfrevlogrevisions .hg/store/data/a.i
 #endif
+
+#if no-rust
+Cannot test in Rust because this these are highly invasive and expect a certain
+structure from Python code.
+
   $ hg perfrevlogrevision -m 0
   $ hg perfrevlogchunks -c
+#endif
+
   $ hg perfrevrange
   $ hg perfrevset 'all()'
   $ hg perfstartup
--- a/tests/test-journal-exists.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-journal-exists.t	Wed Jun 19 19:10:49 2024 +0200
@@ -45,12 +45,21 @@
   $ hg bundle -qa repo.hg
   $ chmod -w foo/.hg/store/00changelog.i
 
+#if rust
+  $ hg -R foo unbundle repo.hg
+  adding changesets
+  transaction abort!
+  rollback completed
+  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
+  [50]
+#else
   $ hg -R foo unbundle repo.hg
   adding changesets
   transaction abort!
   rollback completed
   abort: $EACCES$: '$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
   [255]
+#endif
 
   $ if test -f foo/.hg/store/journal; then echo 'journal exists :-('; fi
 #endif
--- a/tests/test-permissions.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-permissions.t	Wed Jun 19 19:10:49 2024 +0200
@@ -34,10 +34,16 @@
   $ chmod -w .hg/store/data/a.i
 
   $ echo barber > a
+#if rust
+  $ hg commit -m "2"
+  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/t/.hg/store/data/a.i'
+  [50]
+#else
   $ hg commit -m "2"
   trouble committing a!
   abort: $EACCES$: '$TESTTMP/t/.hg/store/data/a.i'
   [255]
+#endif
 
   $ chmod -w .
 
--- a/tests/test-remotefilelog-bgprefetch.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-remotefilelog-bgprefetch.t	Wed Jun 19 19:10:49 2024 +0200
@@ -33,8 +33,8 @@
   transferred 776 bytes in * seconds (*/sec) (glob) (no-zstd !)
   3 files to transfer, 784 bytes of data (zstd no-rust !)
   transferred 784 bytes in * seconds (*/sec) (glob) (zstd no-rust !)
-  5 files to transfer, 910 bytes of data (rust !)
-  transferred 910 bytes in * seconds (*/sec) (glob) (rust !)
+  5 files to transfer, 911 bytes of data (rust !)
+  transferred 911 bytes in * seconds (*/sec) (glob) (rust !)
   searching for changes
   no changes found
 
--- a/tests/test-remotefilelog-prefetch.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-remotefilelog-prefetch.t	Wed Jun 19 19:10:49 2024 +0200
@@ -26,8 +26,8 @@
   transferred 528 bytes in * seconds (* */sec) (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   $ cd shallow
@@ -172,8 +172,8 @@
   transferred 528 bytes in * seconds * (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   updating to branch default
--- a/tests/test-repo-compengines.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-repo-compengines.t	Wed Jun 19 19:10:49 2024 +0200
@@ -194,9 +194,11 @@
   > done
 
   $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/*
-  zstd-level-1/.hg/store/data/a.i: size=4114
+  zstd-level-1/.hg/store/data/a.i: size=4114 (no-rust !)
+  zstd-level-1/.hg/store/data/a.i: size=4112 (rust !)
   zstd-level-22/.hg/store/data/a.i: size=4091
-  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re)
+  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) (no-rust !)
+  zstd-level-default/.hg/store/data/a.i: size=4097 (rust !)
 
 Test error cases
 
--- a/tests/test-rust-revlog.py	Wed Jun 19 17:03:13 2024 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-import struct
-import unittest
-
-from mercurial.node import hex
-
-try:
-    from mercurial import rustext
-
-    rustext.__name__  # trigger immediate actual import
-except ImportError:
-    rustext = None
-else:
-    from mercurial.rustext import revlog
-
-    # this would fail already without appropriate ancestor.__package__
-    from mercurial.rustext.ancestor import LazyAncestors
-
-from mercurial.testing import revlog as revlogtesting
-
-header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
-    def test_heads(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(rustidx.headrevs(), idx.headrevs())
-
-    def test_len(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(len(rustidx), len(idx))
-
-    def test_ancestors(self):
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        lazy = LazyAncestors(rustidx, [3], 0, True)
-        # we have two more references to the index:
-        # - in its inner iterator for __contains__ and __bool__
-        # - in the LazyAncestors instance itself (to spawn new iterators)
-        self.assertTrue(2 in lazy)
-        self.assertTrue(bool(lazy))
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-        # a second time to validate that we spawn new iterators
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-
-        # let's check bool for an empty one
-        self.assertFalse(LazyAncestors(rustidx, [0], 0, False))
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogNodeTreeClassTest(revlogtesting.RustRevlogBasedTestBase):
-    def test_standalone_nodetree(self):
-        idx = self.parserustindex()
-        nt = revlog.NodeTree(idx)
-        for i in range(4):
-            nt.insert(i)
-
-        bin_nodes = [entry[7] for entry in idx]
-        hex_nodes = [hex(n) for n in bin_nodes]
-
-        for i, node in enumerate(hex_nodes):
-            self.assertEqual(nt.prefix_rev_lookup(node), i)
-            self.assertEqual(nt.prefix_rev_lookup(node[:5]), i)
-
-        # all 4 revisions in idx (standard data set) have different
-        # first nybbles in their Node IDs,
-        # hence `nt.shortest()` should return 1 for them, except when
-        # the leading nybble is 0 (ambiguity with NULL_NODE)
-        for i, (bin_node, hex_node) in enumerate(zip(bin_nodes, hex_nodes)):
-            shortest = nt.shortest(bin_node)
-            expected = 2 if hex_node[0] == ord('0') else 1
-            self.assertEqual(shortest, expected)
-            self.assertEqual(nt.prefix_rev_lookup(hex_node[:shortest]), i)
-
-        # test invalidation (generation poisoning) detection
-        del idx[3]
-        self.assertTrue(nt.is_invalidated())
-
-
-if __name__ == '__main__':
-    import silenttestrunner
-
-    silenttestrunner.main(__name__)
--- a/tests/test-treemanifest.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-treemanifest.t	Wed Jun 19 19:10:49 2024 +0200
@@ -869,11 +869,13 @@
   > done
   $ hg ci -m 'have some content'
   $ f -s .hg/store/00manifest.*
-  .hg/store/00manifest.i: size=798 (no-pure !)
-  .hg/store/00manifest.i: size=784 (pure !)
+  .hg/store/00manifest.i: size=798 (no-pure no-rust !)
+  .hg/store/00manifest.i: size=800 (rust !)
+  .hg/store/00manifest.i: size=784 (pure no-rust !)
   $ f -s .hg/store/meta/dir/00manifest*
-  .hg/store/meta/dir/00manifest.i: size=556 (no-pure !)
-  .hg/store/meta/dir/00manifest.i: size=544 (pure !)
+  .hg/store/meta/dir/00manifest.i: size=556 (no-pure no-rust !)
+  .hg/store/meta/dir/00manifest.i: size=557 (rust !)
+  .hg/store/meta/dir/00manifest.i: size=544 (pure no-rust !)
   $ hg debugupgraderepo --config format.revlog-compression=none --config experimental.treemanifest=yes --run --quiet --no-backup
   upgrade will perform the following actions:
   
--- a/tests/test-verify.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-verify.t	Wed Jun 19 19:10:49 2024 +0200
@@ -321,7 +321,8 @@
   $ cat start b > .hg/store/data/a.i
 
   $ hg verify -q
-   a@1: broken revlog! (index a is corrupted)
+   a@1: broken revlog! (index a is corrupted) (no-rust !)
+   a@1: broken revlog! (abort: unexpected inline revlog length) (rust !)
   warning: orphan data file 'data/a.i'
   not checking dirstate because of previous errors
   1 warnings encountered!