Mercurial > hg

--- a/contrib/perf.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/contrib/perf.py	Wed Jun 19 19:10:49 2024 +0200
@@ -3780,6 +3780,11 @@

     rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)

+    if rl.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogchunks is not implemented for the Rust revlog"
+        )
+
     # - _chunkraw was renamed to _getsegmentforrevs
     # - _getsegmentforrevs was moved on the inner object
     try:
@@ -3960,6 +3965,10 @@
         raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')

     r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
+    if r.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogrevision is not implemented for the Rust revlog"
+        )

     # _chunkraw was renamed to _getsegmentforrevs.
     try:
--- a/mercurial/pure/parsers.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/pure/parsers.py	Wed Jun 19 19:10:49 2024 +0200
@@ -672,6 +672,9 @@
             r = (offset,) + r[1:]
         return r

+    def __delitem__(self, i):
+        raise NotImplementedError()
+
     def _unpack_entry(self, rev, data):
         r = self.index_format.unpack(data)
         r = r + (
--- a/mercurial/revlog.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/revlog.py	Wed Jun 19 19:10:49 2024 +0200
@@ -17,7 +17,6 @@
 import binascii
 import collections
 import contextlib
-import functools
 import io
 import os
 import struct
@@ -83,6 +82,7 @@
 if typing.TYPE_CHECKING:
     # noinspection PyPackageRequirements
     import attr
+    from .pure.parsers import BaseIndexObject

 from . import (
     ancestor,
@@ -381,7 +381,7 @@
         default_compression_header,
     ):
         self.opener = opener
-        self.index = index
+        self.index: BaseIndexObject = index

         self.index_file = index_file
         self.data_file = data_file
@@ -528,7 +528,9 @@
         generaldelta = self.delta_config.general_delta
         # Try C implementation.
         try:
-            return self.index.deltachain(rev, stoprev, generaldelta)
+            return self.index.deltachain(
+                rev, stoprev, generaldelta
+            )  # pytype: disable=attribute-error
         except AttributeError:
             pass

@@ -1246,6 +1248,71 @@
         return self.canonical_index_file


+if typing.TYPE_CHECKING:
+    # Tell Pytype what kind of object we expect
+    ProxyBase = BaseIndexObject
+else:
+    ProxyBase = object
+
+
+class RustIndexProxy(ProxyBase):
+    """Wrapper around the Rust index to fake having direct access to the index.
+
+    Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
+    so we can't expose the index from Rust directly, since the `InnerRevlog`
+    already has ownership of the index. This object redirects all calls to the
+    index through the Rust-backed `InnerRevlog` glue which defines all
+    necessary forwarding methods.
+    """
+
+    def __init__(self, inner):
+        # Do not rename as it's being used to access the index from Rust
+        self.inner = inner
+
+    # TODO possibly write all index methods manually to save on overhead?
+    def __getattr__(self, name):
+        return getattr(self.inner, f"_index_{name}")
+
+    # Magic methods need to be defined explicitely
+    def __len__(self):
+        return self.inner._index___len__()
+
+    def __getitem__(self, key):
+        return self.inner._index___getitem__(key)
+
+    def __contains__(self, key):
+        return self.inner._index___contains__(key)
+
+    def __delitem__(self, key):
+        return self.inner._index___delitem__(key)
+
+
+class RustVFSWrapper:
+    """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
+    calling back multiple times into Python.
+    """
+
+    def __init__(self, inner):
+        self.inner = inner
+
+    def __call__(
+        self,
+        path: bytes,
+        mode: bytes = b"rb",
+        atomictemp=False,
+        checkambig=False,
+    ):
+        fd = self.inner.__call__(
+            path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
+        )
+        # Information that Rust needs to get ownership of the file that's
+        # being opened.
+        return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
+
+    def __getattr__(self, name):
+        return getattr(self.inner, name)
+
+
 class revlog:
     """
     the underlying revision storage object
@@ -1358,6 +1425,7 @@
         self._trypending = trypending
         self._try_split = try_split
         self._may_inline = may_inline
+        self.uses_rust = False
         self.opener = opener
         if persistentnodemap:
             self._nodemap_file = nodemaputil.get_nodemap_file(self)
@@ -1392,7 +1460,7 @@
         # Maps rev to chain base rev.
         self._chainbasecache = util.lrucachedict(100)

-        self.index = None
+        self.index: Optional[BaseIndexObject] = None
         self._docket = None
         self._nodemap_docket = None
         # Mapping of partial identifiers to full nodes.
@@ -1406,8 +1474,8 @@
         # prevent nesting of addgroup
         self._adding_group = None

-        chunk_cache = self._loadindex()
-        self._load_inner(chunk_cache)
+        index, chunk_cache = self._loadindex()
+        self._load_inner(index, chunk_cache)
         self._concurrencychecker = concurrencychecker

     def _init_opts(self):
@@ -1707,7 +1775,12 @@
         )

         use_rust_index = False
-        if rustrevlog is not None and self._nodemap_file is not None:
+        rust_applicable = self._nodemap_file is not None
+        rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
+        rust_applicable = rust_applicable and getattr(
+            self.opener, "rust_compatible", True
+        )
+        if rustrevlog is not None and rust_applicable:
             # we would like to use the rust_index in all case, especially
             # because it is necessary for AncestorsIterator and LazyAncestors
             # since the 6.7 cycle.
@@ -1717,6 +1790,9 @@
             # repository.
             use_rust_index = True

+            if self._format_version != REVLOGV1:
+                use_rust_index = False
+
         self._parse_index = parse_index_v1
         if self._format_version == REVLOGV0:
             self._parse_index = revlogv0.parse_index_v0
@@ -1726,58 +1802,84 @@
             self._parse_index = parse_index_cl_v2
         elif devel_nodemap:
             self._parse_index = parse_index_v1_nodemap
-        elif use_rust_index:
-            self._parse_index = functools.partial(
-                parse_index_v1_rust, default_header=new_header
-            )
-        try:
-            d = self._parse_index(index_data, self._inline)
-            index, chunkcache = d
-            use_nodemap = (
-                not self._inline
-                and self._nodemap_file is not None
-                and hasattr(index, 'update_nodemap_data')
-            )
-            if use_nodemap:
-                nodemap_data = nodemaputil.persisted_data(self)
-                if nodemap_data is not None:
-                    docket = nodemap_data[0]
-                    if (
-                        len(d[0]) > docket.tip_rev
-                        and d[0][docket.tip_rev][7] == docket.tip_node
-                    ):
-                        # no changelog tampering
-                        self._nodemap_docket = docket
-                        index.update_nodemap_data(*nodemap_data)
-        except (ValueError, IndexError):
-            raise error.RevlogError(
-                _(b"index %s is corrupted") % self.display_id
-            )
-        self.index = index
+
+        if use_rust_index:
+            # Let the Rust code parse its own index
+            index, chunkcache = (index_data, None)
+            self.uses_rust = True
+        else:
+            try:
+                d = self._parse_index(index_data, self._inline)
+                index, chunkcache = d
+                self._register_nodemap_info(index)
+            except (ValueError, IndexError):
+                raise error.RevlogError(
+                    _(b"index %s is corrupted") % self.display_id
+                )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)

-        return chunkcache
-
-    def _load_inner(self, chunk_cache):
+        return index, chunkcache
+
+    def _load_inner(self, index, chunk_cache):
         if self._docket is None:
             default_compression_header = None
         else:
             default_compression_header = self._docket.default_compression_header

-        self._inner = _InnerRevlog(
-            opener=self.opener,
-            index=self.index,
-            index_file=self._indexfile,
-            data_file=self._datafile,
-            sidedata_file=self._sidedatafile,
-            inline=self._inline,
-            data_config=self.data_config,
-            delta_config=self.delta_config,
-            feature_config=self.feature_config,
-            chunk_cache=chunk_cache,
-            default_compression_header=default_compression_header,
+        if self.uses_rust:
+            self._inner = rustrevlog.InnerRevlog(
+                opener=RustVFSWrapper(self.opener),
+                index_data=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+                revlog_type=self.target[0],
+            )
+            self.index = RustIndexProxy(self._inner)
+            self._register_nodemap_info(self.index)
+            self.uses_rust = True
+        else:
+            self._inner = _InnerRevlog(
+                opener=self.opener,
+                index=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+            )
+            self.index = self._inner.index
+
+    def _register_nodemap_info(self, index):
+        use_nodemap = (
+            not self._inline
+            and self._nodemap_file is not None
+            and hasattr(index, 'update_nodemap_data')
         )
+        if use_nodemap:
+            nodemap_data = nodemaputil.persisted_data(self)
+            if nodemap_data is not None:
+                docket = nodemap_data[0]
+                if (
+                    len(index) > docket.tip_rev
+                    and index[docket.tip_rev][7] == docket.tip_node
+                ):
+                    # no changelog tampering
+                    self._nodemap_docket = docket
+                    index.update_nodemap_data(
+                        *nodemap_data
+                    )  # pytype: disable=attribute-error

     def get_revlog(self):
         """simple function to mirror API of other not-really-revlog API"""
@@ -1869,7 +1971,9 @@
             nodemap_data = nodemaputil.persisted_data(self)
             if nodemap_data is not None:
                 self._nodemap_docket = nodemap_data[0]
-                self.index.update_nodemap_data(*nodemap_data)
+                self.index.update_nodemap_data(
+                    *nodemap_data
+                )  # pytype: disable=attribute-error

     def rev(self, node):
         """return the revision number associated with a <nodeid>"""
@@ -2368,23 +2472,26 @@
     def headrevs(self, revs=None, stop_rev=None):
         if revs is None:
             return self.index.headrevs(None, stop_rev)
-        assert stop_rev is None
         if rustdagop is not None and self.index.rust_ext_compat:
             return rustdagop.headrevs(self.index, revs)
         return dagop.headrevs(revs, self._uncheckedparentrevs)

     def headrevsdiff(self, start, stop):
         try:
-            return self.index.headrevsdiff(start, stop)
+            return self.index.headrevsdiff(
+                start, stop
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)

     def computephases(self, roots):
-        return self.index.computephasesmapsets(roots)
+        return self.index.computephasesmapsets(
+            roots
+        )  # pytype: disable=attribute-error

     def _head_node_ids(self):
         try:
-            return self.index.head_node_ids()
+            return self.index.head_node_ids()  # pytype: disable=attribute-error
         except AttributeError:
             return [self.node(r) for r in self.headrevs()]

@@ -2442,7 +2549,9 @@
     def _commonancestorsheads(self, *revs):
         """calculate all the heads of the common ancestors of revs"""
         try:
-            ancs = self.index.commonancestorsheads(*revs)
+            ancs = self.index.commonancestorsheads(
+                *revs
+            )  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):  # C implementation failed
             ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
         return ancs
@@ -2476,7 +2585,7 @@
         try:
             return self.index.reachableroots2(
                 minroot, heads, roots, includepath
-            )
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop._reachablerootspure(
                 self.parentrevs, minroot, roots, heads, includepath
@@ -2487,7 +2596,7 @@

         a, b = self.rev(a), self.rev(b)
         try:
-            ancs = self.index.ancestors(a, b)
+            ancs = self.index.ancestors(a, b)  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):
             ancs = ancestor.ancestors(self.parentrevs, a, b)
         if ancs:
@@ -2534,7 +2643,9 @@
         maybewdir = self.nodeconstants.wdirhex.startswith(id)
         ambiguous = False
         try:
-            partial = self.index.partialmatch(id)
+            partial = self.index.partialmatch(
+                id
+            )  # pytype: disable=attribute-error
             if partial and self.hasnode(partial):
                 if maybewdir:
                     # single 'ff...' match in radix tree, ambiguous with wdir
@@ -2636,7 +2747,10 @@

         if not getattr(self, 'filteredrevs', None):
             try:
-                length = max(self.index.shortest(node), minlength)
+                shortest = self.index.shortest(
+                    node
+                )  # pytype: disable=attribute-error
+                length = max(shortest, minlength)
                 return disambiguate(hexnode, length)
             except error.RevlogError:
                 if node != self.nodeconstants.wdirid:
@@ -4089,7 +4203,9 @@
             ifh.seek(startrev * self.index.entry_size)
             for i, e in enumerate(new_entries):
                 rev = startrev + i
-                self.index.replace_sidedata_info(rev, *e)
+                self.index.replace_sidedata_info(
+                    rev, *e
+                )  # pytype: disable=attribute-error
                 packed = self.index.entry_binary(rev)
                 if rev == 0 and self._docket is None:
                     header = self._format_flags | self._format_version
--- a/mercurial/revlogutils/rewrite.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/revlogutils/rewrite.py	Wed Jun 19 19:10:49 2024 +0200
@@ -136,8 +136,8 @@
         rl.opener.rename(newrl._datafile, rl._datafile)

     rl.clearcaches()
-    chunk_cache = rl._loadindex()
-    rl._load_inner(chunk_cache)
+    index, chunk_cache = rl._loadindex()
+    rl._load_inner(index, chunk_cache)


 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
@@ -327,7 +327,8 @@

     # reload the revlog internal information
     revlog.clearcaches()
-    revlog._loadindex(docket=docket)
+    index, chunk_cache = revlog._loadindex(docket=docket)
+    revlog._load_inner(index, chunk_cache)

     @contextlib.contextmanager
     def all_files_opener():
@@ -569,7 +570,8 @@

             rl.opener.rename(new_file_path, index_file)
             rl.clearcaches()
-            rl._loadindex()
+            index, chunk_cache = rl._loadindex()
+            rl._load_inner(index, chunk_cache)
         finally:
             util.tryunlink(new_file_path)
--- a/mercurial/statichttprepo.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/statichttprepo.py	Wed Jun 19 19:10:49 2024 +0200
@@ -219,6 +219,9 @@
         self.store = localrepo.makestore(requirements, self.path, vfsclass)
         self.spath = self.store.path
         self.svfs = self.store.opener
+        # We can't use Rust because the Rust code cannot cope with the
+        # `httprangereader` (yet?)
+        self.svfs.rust_compatible = False
         self.sjoin = self.store.join
         self._filecache = {}
         self.requirements = requirements
--- a/mercurial/vfs.py	Wed Jun 19 17:03:13 2024 +0200
+++ b/mercurial/vfs.py	Wed Jun 19 19:10:49 2024 +0200
@@ -82,6 +82,10 @@
     # encoded vfs (see issue6546)
     _dir_sep: bytes = b'/'

+    # Used to disable the Rust `InnerRevlog` in case the VFS is not supported
+    # by the Rust code
+    rust_compatible = True
+
     # TODO: type return, which is util.posixfile wrapped by a proxy
     @abc.abstractmethod
     def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs) -> Any:
--- a/rust/Cargo.lock	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/Cargo.lock	Wed Jun 19 19:10:49 2024 +0200
@@ -666,6 +666,8 @@
  "hg-core",
  "libc",
  "log",
+ "logging_timer",
+ "python3-sys",
  "stable_deref_trait",
  "vcsgraph",
 ]
--- a/rust/hg-cpython/Cargo.toml	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/Cargo.toml	Wed Jun 19 19:10:49 2024 +0200
@@ -17,3 +17,5 @@
 env_logger = "0.9.3"
 stable_deref_trait = "1.2.0"
 vcsgraph = "0.2.0"
+logging_timer = "1.1.0"
+python3-sys = "0.7.1"
--- a/rust/hg-cpython/src/lib.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/lib.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -47,6 +47,7 @@
 pub mod revlog;
 pub mod update;
 pub mod utils;
+pub mod vfs;

 /// Revision as exposed to/from the Python layer.
 ///
--- a/rust/hg-cpython/src/pybytes_deref.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/pybytes_deref.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -1,4 +1,7 @@
-use cpython::{PyBytes, Python};
+use crate::cpython::buffer::Element;
+use cpython::{
+    buffer::PyBuffer, exc::ValueError, PyBytes, PyErr, PyResult, Python,
+};
 use stable_deref_trait::StableDeref;

 /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice
@@ -55,3 +58,67 @@
 // but here sending one to another thread is fine since we ensure it stays
 // valid.
 unsafe impl Send for PyBytesDeref {}
+
+///
+/// It also enables using a (wrapped) `PyBuffer` in GIL-unaware generic code.
+pub struct PyBufferDeref {
+    #[allow(unused)]
+    keep_alive: PyBuffer,
+
+    /// Borrows the buffer inside `self.keep_alive`,
+    /// but the borrow-checker cannot express self-referential structs.
+    data: *const [u8],
+}
+
+fn get_buffer<'a>(py: Python, buf: &'a PyBuffer) -> PyResult<&'a [u8]> {
+    let len = buf.item_count();
+
+    let cbuf = buf.buf_ptr();
+    let has_correct_item_size = std::mem::size_of::<u8>() == buf.item_size();
+    let is_valid_buffer = has_correct_item_size
+        && buf.is_c_contiguous()
+        && u8::is_compatible_format(buf.format())
+        && buf.readonly();
+
+    let bytes = if is_valid_buffer {
+        unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
+    } else {
+        return Err(PyErr::new::<ValueError, _>(
+            py,
+            "Buffer has an invalid memory representation",
+        ));
+    };
+    Ok(bytes)
+}
+
+impl PyBufferDeref {
+    pub fn new(py: Python, buf: PyBuffer) -> PyResult<Self> {
+        Ok(Self {
+            data: get_buffer(py, &buf)?,
+            keep_alive: buf,
+        })
+    }
+}
+
+impl std::ops::Deref for PyBufferDeref {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        // Safety: the raw pointer is valid as long as the PyBuffer is still
+        // alive, and the returned slice borrows `self`.
+        unsafe { &*self.data }
+    }
+}
+
+unsafe impl StableDeref for PyBufferDeref {}
+
+#[allow(unused)]
+fn static_assert_pybuffer_is_send() {
+    #[allow(clippy::no_effect)]
+    require_send::<PyBuffer>;
+}
+
+// Safety: PyBuffer is Send. Raw pointers are not by default,
+// but here sending one to another thread is fine since we ensure it stays
+// valid.
+unsafe impl Send for PyBufferDeref {}
--- a/rust/hg-cpython/src/revlog.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -4,32 +4,43 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
+#![allow(non_snake_case)]

 use crate::{
     conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
+    pybytes_deref::{PyBufferDeref, PyBytesDeref},
     utils::{node_from_py_bytes, node_from_py_object},
+    vfs::PyVfs,
     PyRevision,
 };
 use cpython::{
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
     ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
-    PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
+    PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
     PythonObject, ToPyObject, UnsafePyLeaked,
 };
 use hg::{
     errors::HgError,
-    index::{
-        IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
-        INDEX_ENTRY_SIZE,
+    index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
+    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
+    revlog::compression::CompressionConfig,
+    revlog::inner_revlog::InnerRevlog as CoreInnerRevlog,
+    revlog::inner_revlog::RevisionBuffer,
+    revlog::options::{
+        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+        RevlogOpenOptions,
     },
-    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
     revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
-    BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
+    transaction::Transaction,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
+    NULL_REVISION,
 };
 use std::{
-    cell::RefCell,
+    cell::{Cell, RefCell},
     collections::{HashMap, HashSet},
+    sync::OnceLock,
 };
 use vcsgraph::graph::Graph as VCSGraph;

@@ -41,12 +52,13 @@
 /// Return a Struct implementing the Graph trait
 pub(crate) fn py_rust_index_to_graph(
     py: Python,
-    index: PyObject,
+    index_proxy: PyObject,
 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
-    let midx = index.extract::<Index>(py)?;
-    let leaked = midx.index(py).leak_immutable();
+    let inner_revlog = index_proxy.getattr(py, "inner")?;
+    let inner_revlog = inner_revlog.extract::<InnerRevlog>(py)?;
+    let leaked = inner_revlog.inner(py).leak_immutable();
     // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
-    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
+    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) })
 }

 impl Clone for PySharedIndex {
@@ -91,398 +103,6 @@
     }
 }

-py_class!(pub class Index |py| {
-    @shared data index: hg::index::Index;
-    data nt: RefCell<Option<CoreNodeTree>>;
-    data docket: RefCell<Option<PyObject>>;
-    // Holds a reference to the mmap'ed persistent nodemap data
-    data nodemap_mmap: RefCell<Option<PyBuffer>>;
-    // Holds a reference to the mmap'ed persistent index data
-    data index_mmap: RefCell<Option<PyBuffer>>;
-    data head_revs_py_list: RefCell<Option<PyList>>;
-    data head_node_ids_py_list: RefCell<Option<PyList>>;
-
-    def __new__(
-        _cls,
-        data: PyObject,
-        default_header: u32,
-    ) -> PyResult<Self> {
-        Self::new(py, data, default_header)
-    }
-
-    /// Compatibility layer used for Python consumers needing access to the C index
-    ///
-    /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
-    /// that may need to build a custom `nodetree`, based on a specified revset.
-    /// With a Rust implementation of the nodemap, we will be able to get rid of
-    /// this, by exposing our own standalone nodemap class,
-    /// ready to accept `Index`.
-/*    def get_cindex(&self) -> PyResult<PyObject> {
-        Ok(self.cindex(py).borrow().inner().clone_ref(py))
-    }
-*/
-    // Index API involving nodemap, as defined in mercurial/pure/parsers.py
-
-    /// Return Revision if found, raises a bare `error.RevlogError`
-    /// in case of ambiguity, same as C version does
-    def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let ridx = &*self.index(py).borrow();
-        let node = node_from_py_bytes(py, &node)?;
-        let rust_rev =
-            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
-        Ok(rust_rev.map(Into::into))
-
-    }
-
-    /// same as `get_rev()` but raises a bare `error.RevlogError` if node
-    /// is not found.
-    ///
-    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
-    /// will catch and rewrap with it
-    def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
-        self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
-    }
-
-    /// return True if the node exist in the index
-    def has_node(&self, node: PyBytes) -> PyResult<bool> {
-        // TODO OPTIM we could avoid a needless conversion here,
-        // to do when scaffolding for pure Rust switch is removed,
-        // as `get_rev()` currently does the necessary assertions
-        self.get_rev(py, node).map(|opt| opt.is_some())
-    }
-
-    /// find length of shortest hex nodeid of a binary ID
-    def shortest(&self, node: PyBytes) -> PyResult<usize> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
-        {
-            Ok(Some(l)) => Ok(l),
-            Ok(None) => Err(revlog_error(py)),
-            Err(e) => Err(nodemap_error(py, e)),
-        }
-    }
-
-    def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-
-        let node_as_string = if cfg!(feature = "python3-sys") {
-            node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
-        }
-        else {
-            let node = node.extract::<PyBytes>(py)?;
-            String::from_utf8_lossy(node.data(py)).to_string()
-        };
-
-        let prefix = NodePrefix::from_hex(&node_as_string)
-            .map_err(|_| PyErr::new::<ValueError, _>(
-                py, format!("Invalid node or prefix '{}'", node_as_string))
-            )?;
-
-        nt.find_bin(idx, prefix)
-            // TODO make an inner API returning the node directly
-            .map(|opt| opt.map(
-                |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
-            .map_err(|e| nodemap_error(py, e))
-
-    }
-
-    /// append an index entry
-    def append(&self, tup: PyTuple) -> PyResult<PyObject> {
-        if tup.len(py) < 8 {
-            // this is better than the panic promised by tup.get_item()
-            return Err(
-                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
-        }
-        let node_bytes = tup.get_item(py, 7).extract(py)?;
-        let node = node_from_py_object(py, &node_bytes)?;
-
-        let rev = self.len(py)? as BaseRevision;
-
-        // This is ok since we will just add the revision to the index
-        let rev = Revision(rev);
-        self.index(py)
-            .borrow_mut()
-            .append(py_tuple_to_revision_data_params(py, tup)?)
-            .unwrap();
-        let idx = &*self.index(py).borrow();
-        self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
-            .insert(idx, &node, rev)
-            .map_err(|e| nodemap_error(py, e))?;
-        Ok(py.None())
-    }
-
-    def __delitem__(&self, key: PyObject) -> PyResult<()> {
-        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
-        let start = if let Ok(rev) = key.extract(py) {
-            UncheckedRevision(rev)
-        } else {
-            let start = key.getattr(py, "start")?;
-            UncheckedRevision(start.extract(py)?)
-        };
-        let start = self.index(py)
-            .borrow()
-            .check_revision(start)
-            .ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
-            })?;
-        self.index(py).borrow_mut().remove(start).unwrap();
-        let mut opt = self.get_nodetree(py)?.borrow_mut();
-        let nt = opt.as_mut().unwrap();
-        nt.invalidate_all();
-        self.fill_nodemap(py, nt)?;
-        Ok(())
-    }
-
-    //
-    // Index methods previously reforwarded to C index (tp_methods)
-    // Same ordering as in revlog.c
-    //
-
-    /// return the gca set of the given revs
-    def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_ancestors(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// return the heads of the common ancestors of the given revs
-    def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_commonancestorsheads(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// Clear the index caches and inner py_class data.
-    /// It is Python's responsibility to call `update_nodemap_data` again.
-    def clearcaches(&self) -> PyResult<PyObject> {
-        self.nt(py).borrow_mut().take();
-        self.docket(py).borrow_mut().take();
-        self.nodemap_mmap(py).borrow_mut().take();
-        self.head_revs_py_list(py).borrow_mut().take();
-        self.head_node_ids_py_list(py).borrow_mut().take();
-        self.index(py).borrow().clear_caches();
-        Ok(py.None())
-    }
-
-    /// return the raw binary string representing a revision
-    def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
-        let rust_bytes = rindex.check_revision(rev).and_then(
-            |r| rindex.entry_binary(r))
-            .ok_or_else(|| rev_not_in_index(py, rev))?;
-        let rust_res = PyBytes::new(py, rust_bytes).into_object();
-        Ok(rust_res)
-    }
-
-    /// return a binary packed version of the header
-    def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
-        let rust_res = PyBytes::new(py, &packed).into_object();
-        Ok(rust_res)
-    }
-
-    /// compute phases
-    def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
-        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
-        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
-        Ok(rust_res)
-    }
-
-    /// reachableroots
-    def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_reachableroots2(
-            py,
-            UncheckedRevision(args.get_item(py, 0).extract(py)?),
-            args.get_item(py, 1),
-            args.get_item(py, 2),
-            args.get_item(py, 3).extract(py)?,
-        )?;
-        Ok(rust_res)
-    }
-
-    /// get head revisions
-    def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
-        let (filtered_revs, stop_rev) = match &args.len(py) {
-             0 => Ok((py.None(), py.None())),
-             1 => Ok((args.get_item(py, 0), py.None())),
-             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
-             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
-        }?;
-        self.inner_headrevs(py, &filtered_revs, &stop_rev)
-    }
-
-    /// get head nodeids
-    def head_node_ids(&self) -> PyResult<PyObject> {
-        let rust_res = self.inner_head_node_ids(py)?;
-        Ok(rust_res)
-    }
-
-    /// get diff in head revisions
-    def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_headrevsdiff(
-          py,
-          &args.get_item(py, 0),
-          &args.get_item(py, 1))?;
-        Ok(rust_res)
-    }
-
-    /// True if the object is a snapshot
-    def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
-        let index = self.index(py).borrow();
-        let result = index
-            .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
-            .map_err(|e| {
-                PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-            })?;
-        Ok(result)
-    }
-
-    /// Gather snapshot data in a cache dict
-    def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let cache: PyDict = args.get_item(py, 0).extract(py)?;
-        // this methods operates by setting new values in the cache,
-        // hence we will compare results by letting the C implementation
-        // operate over a deepcopy of the cache, and finally compare both
-        // caches.
-        let c_cache = PyDict::new(py);
-        for (k, v) in cache.items(py) {
-            c_cache.set_item(py, k, PySet::new(py, v)?)?;
-        }
-
-        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
-        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
-        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
-        index.find_snapshots(
-            start_rev,
-            end_rev,
-            &mut cache_wrapper,
-        ).map_err(|_| revlog_error(py))?;
-        Ok(py.None())
-    }
-
-    /// determine revisions with deltas to reconstruct fulltext
-    def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
-        let stop_rev =
-            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
-        let rev = index.check_revision(rev).ok_or_else(|| {
-            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
-        })?;
-        let stop_rev = if let Some(stop_rev) = stop_rev {
-            let stop_rev = UncheckedRevision(stop_rev);
-            Some(index.check_revision(stop_rev).ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
-            })?)
-        } else {None};
-        let using_general_delta = args.get_item(py, 2)
-            .extract::<Option<u32>>(py)?
-            .map(|i| i != 0);
-        let (chain, stopped) = index.delta_chain(
-            rev, stop_rev, using_general_delta
-        ).map_err(|e| {
-            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-        })?;
-
-        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
-        Ok(
-            PyTuple::new(
-                py,
-                &[
-                    chain.into_py_object(py).into_object(),
-                    stopped.into_py_object(py).into_object()
-                ]
-            ).into_object()
-        )
-
-    }
-
-    /// slice planned chunk read to reach a density threshold
-    def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_slicechunktodensity(
-            py,
-            args.get_item(py, 0),
-            args.get_item(py, 1).extract(py)?,
-            args.get_item(py, 2).extract(py)?
-        )?;
-        Ok(rust_res)
-    }
-
-    // index_sequence_methods and index_mapping_methods.
-    //
-    // Since we call back through the high level Python API,
-    // there's no point making a distinction between index_get
-    // and index_getitem.
-    // gracinet 2023: this above is no longer true for the pure Rust impl
-
-    def __len__(&self) -> PyResult<usize> {
-        self.len(py)
-    }
-
-    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
-        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
-        Ok(rust_res)
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        // ObjectProtocol does not seem to provide contains(), so
-        // this is an equivalent implementation of the index_contains()
-        // defined in revlog.c
-        match item.extract::<i32>(py) {
-            Ok(rev) => {
-                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
-            }
-            Err(_) => {
-                let item_bytes: PyBytes = item.extract(py)?;
-                let rust_res = self.has_node(py, item_bytes)?;
-                Ok(rust_res)
-            }
-        }
-    }
-
-    def nodemap_data_all(&self) -> PyResult<PyBytes> {
-        self.inner_nodemap_data_all(py)
-    }
-
-    def nodemap_data_incremental(&self) -> PyResult<PyObject> {
-        self.inner_nodemap_data_incremental(py)
-    }
-    def update_nodemap_data(
-        &self,
-        docket: PyObject,
-        nm_data: PyObject
-    ) -> PyResult<PyObject> {
-        self.inner_update_nodemap_data(py, docket, nm_data)
-    }
-
-    @property
-    def entry_size(&self) -> PyResult<PyInt> {
-        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def rust_ext_compat(&self) -> PyResult<PyInt> {
-        // will be entirely removed when the Rust index yet useful to
-        // implement in Rust to detangle things when removing `self.cindex`
-        let rust_res: PyInt = 1.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def is_rust(&self) -> PyResult<PyBool> {
-        Ok(false.to_py_object(py))
-    }
-
-});
-
 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
 /// buffer along with the Rust slice into said buffer. We need to keep the
 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
@@ -538,7 +158,7 @@
         .extract::<PyBytes>(py)?
         .data(py)
         .try_into()
-        .unwrap();
+        .expect("nodeid should be set");
     let flags = (offset_or_flags & 0xFFFF) as u16;
     let data_offset = offset_or_flags >> 16;
     Ok(RevisionDataParams {
@@ -622,35 +242,1168 @@
     }
 }

-impl Index {
-    fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
-        // Safety: we keep the buffer around inside the class as `index_mmap`
-        let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
+// There are no static generics in Rust (because their implementation is hard,
+// I'm guessing it's due to different compilation stages, etc.).
+// So manually generate all three caches and use them in `with_filelog_cache`.
+static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> =
+    OnceLock::new();
+static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> =
+    OnceLock::new();
+static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> =
+    OnceLock::new();
+
+/// Cache the first conversion from Python -> Rust config for all filelogs to
+/// save on conversion time when called in a loop.
+fn with_filelog_cache<T: Copy>(
+    py: Python,
+    py_config: &PyObject,
+    revlog_type: RevlogType,
+    cache: &OnceLock<(PyObject, T)>,
+    callback: impl Fn() -> PyResult<T>,
+) -> PyResult<T> {
+    let mut was_cached = false;
+    if revlog_type == RevlogType::Filelog {
+        if let Some((cached_py_config, rust_config)) = cache.get() {
+            was_cached = true;
+            // All filelogs in a given repository *most likely* have the
+            // exact same config, but it's not impossible that some extensions
+            // do some magic with configs or that this code will be used
+            // for longer-running processes. So compare the source `PyObject`
+            // in case the source changed, at the cost of some overhead.
+            // We can't use `py_config.eq(cached_py_config)` because all config
+            // objects are different in Python and `a is b` is false.
+            if py_config.compare(py, cached_py_config)?.is_eq() {
+                return Ok(*rust_config);
+            }
+        }
+    }
+    let config = callback()?;
+    // Do not call the lock unnecessarily if it's already been set.
+    if !was_cached && revlog_type == RevlogType::Filelog {
+        cache.set((py_config.clone_ref(py), config)).ok();
+    }
+    Ok(config)
+}
+
+fn extract_delta_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDeltaConfig> {
+    let get_delta_config = || {
+        let max_deltachain_span = py_config
+            .getattr(py, "max_deltachain_span")?
+            .extract::<i64>(py)?;
+
+        let revlog_delta_config = RevlogDeltaConfig {
+            general_delta: py_config
+                .getattr(py, "general_delta")?
+                .extract(py)?,
+            sparse_revlog: py_config
+                .getattr(py, "sparse_revlog")?
+                .extract(py)?,
+            max_chain_len: py_config
+                .getattr(py, "max_chain_len")?
+                .extract(py)?,
+            max_deltachain_span: if max_deltachain_span < 0 {
+                None
+            } else {
+                Some(max_deltachain_span as u64)
+            },
+            upper_bound_comp: py_config
+                .getattr(py, "upper_bound_comp")?
+                .extract(py)?,
+            delta_both_parents: py_config
+                .getattr(py, "delta_both_parents")?
+                .extract(py)?,
+            candidate_group_chunk_size: py_config
+                .getattr(py, "candidate_group_chunk_size")?
+                .extract(py)?,
+            debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?,
+            lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?,
+            lazy_delta_base: py_config
+                .getattr(py, "lazy_delta_base")?
+                .extract(py)?,
+        };
+        Ok(revlog_delta_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DELTA_CONFIG_CACHE,
+        get_delta_config,
+    )
+}
+
+fn extract_data_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDataConfig> {
+    let get_data_config = || {
+        Ok(RevlogDataConfig {
+            try_pending: py_config.getattr(py, "try_pending")?.extract(py)?,
+            try_split: py_config.getattr(py, "try_split")?.extract(py)?,
+            check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?,
+            mmap_large_index: py_config
+                .getattr(py, "mmap_large_index")?
+                .extract(py)?,
+            mmap_index_threshold: py_config
+                .getattr(py, "mmap_index_threshold")?
+                .extract(py)?,
+            chunk_cache_size: py_config
+                .getattr(py, "chunk_cache_size")?
+                .extract(py)?,
+            uncompressed_cache_factor: py_config
+                .getattr(py, "uncompressed_cache_factor")?
+                .extract(py)?,
+            uncompressed_cache_count: py_config
+                .getattr(py, "uncompressed_cache_count")?
+                .extract(py)?,
+            with_sparse_read: py_config
+                .getattr(py, "with_sparse_read")?
+                .extract(py)?,
+            sr_density_threshold: py_config
+                .getattr(py, "sr_density_threshold")?
+                .extract(py)?,
+            sr_min_gap_size: py_config
+                .getattr(py, "sr_min_gap_size")?
+                .extract(py)?,
+            general_delta: py_config
+                .getattr(py, "generaldelta")?
+                .extract(py)?,
+        })
+    };
+
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DATA_CONFIG_CACHE,
+        get_data_config,
+    )
+}

-        Self::create_instance(
+fn extract_feature_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogFeatureConfig> {
+    let get_feature_config = || {
+        let engine_bytes = &py_config
+            .getattr(py, "compression_engine")?
+            .extract::<PyBytes>(py)?;
+        let compression_engine = engine_bytes.data(py);
+        let compression_engine = match compression_engine {
+            b"zlib" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zlib_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zlib.level"[..]));
+                let level = if let Some(level) = zlib_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    None
+                };
+                let mut engine = CompressionConfig::default();
+                if let Some(level) = level {
+                    engine
+                        .set_level(level)
+                        .expect("invalid compression level from Python");
+                }
+                engine
+            }
+            b"zstd" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zstd_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zstd.level"[..]));
+                let level = if let Some(level) = zstd_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    let level = compression_options
+                        .get_item(py, PyBytes::new(py, &b"level"[..]));
+                    if let Some(level) = level {
+                        if level.is_none(py) {
+                            None
+                        } else {
+                            Some(level.extract(py)?)
+                        }
+                    } else {
+                        None
+                    }
+                };
+                CompressionConfig::zstd(level)
+                    .expect("invalid compression level from Python")
+            }
+            b"none" => CompressionConfig::None,
+            e => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    format!(
+                        "invalid compression engine {}",
+                        String::from_utf8_lossy(e)
+                    ),
+                ))
+            }
+        };
+        let revlog_feature_config = RevlogFeatureConfig {
+            compression_engine,
+            censorable: py_config.getattr(py, "censorable")?.extract(py)?,
+            has_side_data: py_config
+                .getattr(py, "has_side_data")?
+                .extract(py)?,
+            compute_rank: py_config
+                .getattr(py, "compute_rank")?
+                .extract(py)?,
+            canonical_parent_order: py_config
+                .getattr(py, "canonical_parent_order")?
+                .extract(py)?,
+            enable_ellipsis: py_config
+                .getattr(py, "enable_ellipsis")?
+                .extract(py)?,
+        };
+        Ok(revlog_feature_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &FEATURE_CONFIG_CACHE,
+        get_feature_config,
+    )
+}
+
+fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr {
+    let msg = e.to_string();
+
+    match py
+        .import("mercurial.error")
+        .and_then(|m| m.get(py, "RevlogError"))
+    {
+        Err(e) => e,
+        Ok(cls) => {
+            let msg = PyBytes::new(py, msg.as_bytes());
+            PyErr::from_instance(
+                py,
+                cls.call(py, (msg,), None).ok().into_py_object(py),
+            )
+        }
+    }
+}
+
+py_class!(pub class ReadingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .enter_reading_context()
+            .map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow()
+                .inner(py)
+                .borrow()
+                .exit_reading_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .exit_reading_context();
+        Ok(py.None())
+    }
+});
+
+// Only used from Python *tests*
+py_class!(pub class PyFileHandle |py| {
+    data inner_file: RefCell<std::os::fd::RawFd>;
+
+    def tell(&self) -> PyResult<PyObject> {
+        let locals = PyDict::new(py);
+        locals.set_item(py, "os", py.import("os")?)?;
+        locals.set_item(py, "fd", *self.inner_file(py).borrow())?;
+        let f = py.eval("os.fdopen(fd)", None, Some(&locals))?;
+
+        // Prevent Python from closing the file after garbage collecting.
+        // This is fine since Rust is still holding on to the actual File.
+        // (and also because it's only used in tests).
+        std::mem::forget(f.clone_ref(py));
+
+        locals.set_item(py, "f", f)?;
+        let res = py.eval("f.tell()", None, Some(&locals))?;
+        Ok(res)
+    }
+});
+
+/// Wrapper around a Python transaction object, to keep `hg-core` oblivious
+/// of the fact it's being called from Python.
+pub struct PyTransaction {
+    inner: PyObject,
+}
+
+impl PyTransaction {
+    pub fn new(inner: PyObject) -> Self {
+        Self { inner }
+    }
+}
+
+impl Clone for PyTransaction {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl Transaction for PyTransaction {
+    fn add(&mut self, file: impl AsRef<std::path::Path>, offset: usize) {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref()));
+        self.inner
+            .call_method(py, "add", (file, offset), None)
+            .expect("transaction add failed");
+    }
+}
+
+py_class!(pub class WritingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+    data transaction: RefCell<PyTransaction>;
+    data data_end: Cell<Option<usize>>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .enter_writing_context(
+                self.data_end(py).get(),
+                &mut *self.transaction(py).borrow_mut()
+            ).map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow_mut()
+                .inner(py)
+                .borrow_mut()
+                .exit_writing_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .exit_writing_context();
+        Ok(py.None())
+    }
+});
+
+py_class!(pub class InnerRevlog |py| {
+    @shared data inner: CoreInnerRevlog;
+    data nt: RefCell<Option<CoreNodeTree>>;
+    data docket: RefCell<Option<PyObject>>;
+    // Holds a reference to the mmap'ed persistent nodemap data
+    data nodemap_mmap: RefCell<Option<PyBuffer>>;
+    // Holds a reference to the mmap'ed persistent index data
+    data index_mmap: RefCell<PyBuffer>;
+    data head_revs_py_list: RefCell<Option<PyList>>;
+    data head_node_ids_py_list: RefCell<Option<PyList>>;
+    data revision_cache: RefCell<Option<PyObject>>;
+
+    def __new__(
+        _cls,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        chunk_cache: PyObject,
+        default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        Self::inner_new(
             py,
-            hg::index::Index::new(
-                bytes,
-                IndexHeader::parse(&header.to_be_bytes())
-                    .expect("default header is broken"),
+            opener,
+            index_data,
+            index_file,
+            data_file,
+            sidedata_file,
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+            chunk_cache,
+            default_compression_header,
+            revlog_type
+        )
+    }
+
+    def clear_cache(&self) -> PyResult<PyObject> {
+        assert!(!self.is_delaying(py)?);
+        self.revision_cache(py).borrow_mut().take();
+        self.inner(py).borrow_mut().clear_cache();
+        Ok(py.None())
+    }
+
+    @property def canonical_index_file(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py).borrow().canonical_index_file();
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    @property def is_delaying(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_delaying())
+    }
+
+    @property def _revisioncache(&self) -> PyResult<PyObject> {
+        let cache = &*self.revision_cache(py).borrow();
+        match cache {
+            None => Ok(py.None()),
+            Some(cache) => {
+                Ok(cache.clone_ref(py))
+            }
+        }
+
+    }
+
+    @property def _writinghandles(&self) -> PyResult<PyObject> {
+        use std::os::fd::AsRawFd;
+
+        let inner = self.inner(py).borrow();
+        let handles = inner.python_writing_handles();
+
+        match handles.as_ref() {
+            None => Ok(py.None()),
+            Some(handles) => {
+                let d_handle = if let Some(d_handle) = &handles.data_handle {
+                    let handle = RefCell::new(d_handle.file.as_raw_fd());
+                    Some(PyFileHandle::create_instance(py, handle)?)
+                } else {
+                    None
+                };
+                let handle =
+                    RefCell::new(handles.index_handle.file.as_raw_fd());
+                Ok(
+                    (
+                        PyFileHandle::create_instance(py, handle)?,
+                        d_handle,
+                        py.None(),  // Sidedata handle
+
+                    ).to_py_object(py).into_object()
+                )
+            }
+        }
+
+    }
+
+    @_revisioncache.setter def set_revision_cache(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        *self.revision_cache(py).borrow_mut() = value.clone_ref(py);
+        match value {
+            None => {
+                // This means the property has been deleted, *not* that the
+                // property has been set to `None`. Whatever happens is up
+                // to the implementation. Here we just set it to `None`.
+                self
+                    .inner(py)
+                    .borrow()
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held")
+                    .take();
+            },
+            Some(tuple) => {
+                if tuple.is_none(py) {
+                    self
+                        .inner(py)
+                        .borrow()
+                        .last_revision_cache
+                        .lock()
+                        .expect("lock should not be held")
+                        .take();
+                    return Ok(())
+                }
+                let node = tuple.get_item(py, 0)?.extract::<PyBytes>(py)?;
+                let node = node_from_py_bytes(py, &node)?;
+                let rev = tuple.get_item(py, 1)?.extract::<BaseRevision>(py)?;
+                // Ok because Python only sets this if the revision has been
+                // checked
+                let rev = Revision(rev);
+                let data = tuple.get_item(py, 2)?.extract::<PyBytes>(py)?;
+                let inner = self.inner(py).borrow();
+                let mut last_revision_cache = inner
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held");
+                *last_revision_cache =
+                    Some((node, rev, Box::new(PyBytesDeref::new(py, data))));
+            }
+        }
+        Ok(())
+    }
+
+    @property def inline(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_inline())
+    }
+
+    @inline.setter def set_inline(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        if let Some(v) = value {
+            self.inner(py).borrow_mut().inline = v.extract(py)?;
+        };
+        Ok(())
+    }
+
+    @property def index_file(&self) -> PyResult<PyBytes> {
+        Ok(
+            PyBytes::new(
+                py,
+                &get_bytes_from_path(&self.inner(py).borrow().index_file)
             )
-            .map_err(|e| {
-                revlog_error_with_msg(py, e.to_string().as_bytes())
-            })?,
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(Some(buf)),
-            RefCell::new(None),
-            RefCell::new(None),
         )
     }

+    @index_file.setter def set_index_file(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        let path = get_path_from_bytes(
+            value
+                .expect("don't delete the index path")
+                .extract::<PyBytes>(py)?
+                .data(py)
+        ).to_owned();
+        self.inner(py).borrow_mut().index_file = path;
+        Ok(())
+    }
+
+    @property def is_writing(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_writing())
+    }
+
+    @property def is_open(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_open())
+    }
+
+    def issnapshot(&self, rev: PyRevision) -> PyResult<bool> {
+        self.inner_issnapshot(py, UncheckedRevision(rev.0))
+    }
+
+    def _deltachain(&self, *args, **kw) -> PyResult<PyObject> {
+        let inner = self.inner(py).borrow();
+        let general_delta = inner.index.uses_generaldelta();
+        let args = PyTuple::new(
+            py,
+            &[
+                args.get_item(py, 0),
+                kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py),
+                general_delta.to_py_object(py).into_object(),
+            ]
+        );
+        self._index_deltachain(py, &args, kw)
+    }
+
+    def compress(&self, data: PyObject) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let py_buffer = PyBuffer::get(py, &data)?;
+        let deref = PyBufferDeref::new(py, py_buffer)?;
+        let compressed = inner.compress(&deref)
+        .map_err(|e| revlog_error_from_msg(py, e))?;
+        let compressed = compressed.as_deref();
+        let header = if compressed.is_some() {
+            PyBytes::new(py, &b""[..])
+        } else {
+            PyBytes::new(py, &b"u"[..])
+        };
+        Ok(
+            (
+                header,
+                PyBytes::new(py, compressed.unwrap_or(&deref))
+            ).to_py_object(py)
+        )
+    }
+
+    def reading(&self) -> PyResult<ReadingContextManager> {
+        ReadingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+        )
+    }
+
+    def writing(
+        &self,
+        transaction: PyObject,
+        data_end: Option<usize>,
+        sidedata_end: Option<usize>,
+    ) -> PyResult<WritingContextManager> {
+        // Silence unused argument (only relevant for changelog v2)
+        let _ = sidedata_end;
+        WritingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+            RefCell::new(PyTransaction::new(transaction)),
+            Cell::new(data_end)
+        )
+    }
+
+    def split_inline(
+        &self,
+        _tr: PyObject,
+        header: i32,
+        new_index_file_path: Option<PyObject>
+    ) -> PyResult<PyBytes> {
+        let mut inner = self.inner(py).borrow_mut();
+        let new_index_file_path = match new_index_file_path {
+            Some(path) => {
+                let path = path.extract::<PyBytes>(py)?;
+                Some(get_path_from_bytes(path.data(py)).to_owned())
+            },
+            None => None,
+        };
+        let header = hg::index::IndexHeader::parse(&header.to_be_bytes());
+        let header = header.expect("invalid header bytes");
+        let path = inner
+            .split_inline(header, new_index_file_path)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    def get_segment_for_revs(
+        &self,
+        startrev: PyRevision,
+        endrev: PyRevision,
+    ) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let (offset, data) = inner
+            .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data = PyBytes::new(py, &data);
+        Ok((offset, data).to_py_object(py))
+    }
+
+    def raw_text(
+        &self,
+        _node: PyObject,
+        rev: PyRevision
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let mut py_bytes = PyBytes::new(py, &[]);
+        inner
+            .raw_text(Revision(rev.0), |size, f| {
+                py_bytes = with_pybytes_buffer(py, size, f)?;
+                Ok(())
+            }).map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(py_bytes)
+    }
+
+    def _chunk(
+        &self,
+        rev: PyRevision,
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let chunk = inner
+            .chunk_for_rev(Revision(rev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let chunk = PyBytes::new(py, &chunk);
+        Ok(chunk)
+    }
+
+    def write_entry(
+        &self,
+        transaction: PyObject,
+        entry: PyObject,
+        data: PyTuple,
+        _link: PyObject,
+        offset: usize,
+        _sidedata: PyObject,
+        _sidedata_offset: PyInt,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+        _sidedata_end: Option<PyInt>,
+    ) -> PyResult<PyTuple> {
+        let mut inner = self.inner(py).borrow_mut();
+        let transaction = PyTransaction::new(transaction);
+        let py_bytes = entry.extract(py)?;
+        let entry = PyBytesDeref::new(py, py_bytes);
+        let header = data.get_item(py, 0).extract::<PyBytes>(py)?;
+        let header = header.data(py);
+        let data = data.get_item(py, 1);
+        let py_bytes = data.extract(py)?;
+        let data = PyBytesDeref::new(py, py_bytes);
+        Ok(
+            inner.write_entry(
+                transaction,
+                &entry,
+                (header, &data),
+                offset,
+                index_end,
+                data_end
+            ).map_err(|e| revlog_error_from_msg(py, e))?
+             .to_py_object(py)
+        )
+    }
+
+    def delay(&self) -> PyResult<Option<PyBytes>> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .delay()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p))))
+    }
+
+    def write_pending(&self) -> PyResult<PyTuple> {
+        let (path, any_pending) = self.inner(py)
+            .borrow_mut()
+            .write_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let maybe_path = match path {
+            Some(path) => {
+                PyBytes::new(py, &get_bytes_from_path(path)).into_object()
+            },
+            None => {
+                py.None()
+            }
+        };
+        Ok(
+            (
+                maybe_path,
+                any_pending
+            ).to_py_object(py)
+        )
+    }
+
+    def finalize_pending(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .finalize_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    // -- forwarded index methods --
+
+    def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let ridx = &self.inner(py).borrow().index;
+        let node = node_from_py_bytes(py, &node)?;
+        let rust_rev =
+            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
+        Ok(rust_rev.map(Into::into))
+    }
+
+    /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node
+    /// is not found.
+    ///
+    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+    /// will catch and rewrap with it
+    def _index_rev(&self, node: PyBytes) -> PyResult<PyRevision> {
+        self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py))
+    }
+
+    /// return True if the node exist in the index
+    def _index_has_node(&self, node: PyBytes) -> PyResult<bool> {
+        // TODO OPTIM we could avoid a needless conversion here,
+        // to do when scaffolding for pure Rust switch is removed,
+        // as `_index_get_rev()` currently does the necessary assertions
+        self._index_get_rev(py, node).map(|opt| opt.is_some())
+    }
+
+    /// find length of shortest hex nodeid of a binary ID
+    def _index_shortest(&self, node: PyBytes) -> PyResult<usize> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
+        {
+            Ok(Some(l)) => Ok(l),
+            Ok(None) => Err(revlog_error(py)),
+            Err(e) => Err(nodemap_error(py, e)),
+        }
+    }
+
+    def _index_partialmatch(
+        &self,
+        node: PyObject
+    ) -> PyResult<Option<PyBytes>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+
+        let node = node.extract::<PyBytes>(py)?;
+        let node_as_string = String::from_utf8_lossy(node.data(py));
+
+        let prefix = NodePrefix::from_hex(node_as_string.to_string())
+            .map_err(|_| PyErr::new::<ValueError, _>(
+                py, format!("Invalid node or prefix '{}'", node_as_string))
+            )?;
+
+        nt.find_bin(idx, prefix)
+            // TODO make an inner API returning the node directly
+            .map(|opt| opt.map(|rev| {
+                    PyBytes::new(
+                        py,
+                        idx.node(rev).expect("node should exist").as_bytes()
+                    )
+            }))
+            .map_err(|e| nodemap_error(py, e))
+
+    }
+
+    /// append an index entry
+    def _index_append(&self, tup: PyTuple) -> PyResult<PyObject> {
+        if tup.len(py) < 8 {
+            // this is better than the panic promised by tup.get_item()
+            return Err(
+                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
+        }
+        let node_bytes = tup.get_item(py, 7).extract(py)?;
+        let node = node_from_py_object(py, &node_bytes)?;
+
+        let rev = self.len(py)? as BaseRevision;
+
+        // This is ok since we will just add the revision to the index
+        let rev = Revision(rev);
+        self.inner(py)
+            .borrow_mut()
+            .index
+            .append(py_tuple_to_revision_data_params(py, tup)?)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let idx = &self.inner(py).borrow().index;
+        self.get_nodetree(py)?
+            .borrow_mut()
+            .as_mut()
+            .expect("nodetree should be set")
+            .insert(idx, &node, rev)
+            .map_err(|e| nodemap_error(py, e))?;
+        Ok(py.None())
+    }
+
+    def _index___delitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
+        let start = if let Ok(rev) = key.extract(py) {
+            UncheckedRevision(rev)
+        } else {
+            let start = key.getattr(py, "start")?;
+            UncheckedRevision(start.extract(py)?)
+        };
+        let mut borrow = self.inner(py).borrow_mut();
+        let start = borrow
+            .index
+            .check_revision(start)
+            .ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
+            })?;
+        borrow.index
+            .remove(start)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        drop(borrow);
+        let mut opt = self.get_nodetree(py)?.borrow_mut();
+        let nt = opt.as_mut().expect("nodetree should be set");
+        nt.invalidate_all();
+        self.fill_nodemap(py, nt)?;
+        Ok(py.None())
+    }
+
+    /// return the gca set of the given revs
+    def _index_ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_ancestors(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// return the heads of the common ancestors of the given revs
+    def _index_commonancestorsheads(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let rust_res = self.inner_commonancestorsheads(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// Clear the index caches and inner py_class data.
+    /// It is Python's responsibility to call `update_nodemap_data` again.
+    def _index_clearcaches(&self) -> PyResult<PyObject> {
+        self.nt(py).borrow_mut().take();
+        self.docket(py).borrow_mut().take();
+        self.nodemap_mmap(py).borrow_mut().take();
+        self.head_revs_py_list(py).borrow_mut().take();
+        self.head_node_ids_py_list(py).borrow_mut().take();
+        self.inner(py).borrow_mut().index.clear_caches();
+        Ok(py.None())
+    }
+
+    /// return the raw binary string representing a revision
+    def _index_entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        let rust_bytes = rindex.check_revision(rev).and_then(
+            |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev)
+        )?;
+        let rust_res = PyBytes::new(py, rust_bytes).into_object();
+        Ok(rust_res)
+    }
+
+
+    /// return a binary packed version of the header
+    def _index_pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
+        let rust_res = PyBytes::new(py, &packed).into_object();
+        Ok(rust_res)
+    }
+
+    /// compute phases
+    def _index_computephasesmapsets(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
+        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
+        Ok(rust_res)
+    }
+
+    /// reachableroots
+    def _index_reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_reachableroots2(
+            py,
+            UncheckedRevision(args.get_item(py, 0).extract(py)?),
+            args.get_item(py, 1),
+            args.get_item(py, 2),
+            args.get_item(py, 3).extract(py)?,
+        )?;
+        Ok(rust_res)
+    }
+
+    /// get head revisions
+    def _index_headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
+        let (filtered_revs, stop_rev) = match &args.len(py) {
+             0 => Ok((py.None(), py.None())),
+             1 => Ok((args.get_item(py, 0), py.None())),
+             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
+             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
+        }?;
+        self.inner_headrevs(py, &filtered_revs, &stop_rev)
+    }
+
+    /// get head nodeids
+    def _index_head_node_ids(&self) -> PyResult<PyObject> {
+        let rust_res = self.inner_head_node_ids(py)?;
+        Ok(rust_res)
+    }
+
+    /// get diff in head revisions
+    def _index_headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_headrevsdiff(
+          py,
+          &args.get_item(py, 0),
+          &args.get_item(py, 1))?;
+        Ok(rust_res)
+    }
+
+    /// True if the object is a snapshot
+    def _index_issnapshot(&self, *args, **_kw) -> PyResult<bool> {
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        self.inner_issnapshot(py, rev)
+    }
+
+    /// Gather snapshot data in a cache dict
+    def _index_findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let cache: PyDict = args.get_item(py, 0).extract(py)?;
+        // this methods operates by setting new values in the cache,
+        // hence we will compare results by letting the C implementation
+        // operate over a deepcopy of the cache, and finally compare both
+        // caches.
+        let c_cache = PyDict::new(py);
+        for (k, v) in cache.items(py) {
+            c_cache.set_item(py, k, PySet::new(py, v)?)?;
+        }
+
+        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
+        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
+        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
+        index.find_snapshots(
+            start_rev,
+            end_rev,
+            &mut cache_wrapper,
+        ).map_err(|_| revlog_error(py))?;
+        Ok(py.None())
+    }
+
+    /// determine revisions with deltas to reconstruct fulltext
+    def _index_deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
+        let stop_rev =
+            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
+        let rev = index.check_revision(rev).ok_or_else(|| {
+            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
+        })?;
+        let stop_rev = if let Some(stop_rev) = stop_rev {
+            let stop_rev = UncheckedRevision(stop_rev);
+            Some(index.check_revision(stop_rev).ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
+            })?)
+        } else {None};
+        let using_general_delta = args.get_item(py, 2)
+            .extract::<Option<u32>>(py)?
+            .map(|i| i != 0);
+        let (chain, stopped) = index.delta_chain(
+            rev, stop_rev, using_general_delta
+        ).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+
+        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
+        Ok(
+            PyTuple::new(
+                py,
+                &[
+                    chain.into_py_object(py).into_object(),
+                    stopped.into_py_object(py).into_object()
+                ]
+            ).into_object()
+        )
+    }
+
+    /// slice planned chunk read to reach a density threshold
+    def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_slicechunktodensity(
+            py,
+            args.get_item(py, 0),
+            args.get_item(py, 1).extract(py)?,
+            args.get_item(py, 2).extract(py)?
+        )?;
+        Ok(rust_res)
+    }
+
+    def _index___len__(&self) -> PyResult<usize> {
+        self.len(py)
+    }
+
+    def _index___getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+        Ok(rust_res)
+    }
+
+    def _index___contains__(&self, item: PyObject) -> PyResult<bool> {
+        // ObjectProtocol does not seem to provide contains(), so
+        // this is an equivalent implementation of the index_contains()
+        // defined in revlog.c
+        match item.extract::<i32>(py) {
+            Ok(rev) => {
+                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
+            }
+            Err(_) => {
+                let item_bytes: PyBytes = item.extract(py)?;
+                let rust_res = self._index_has_node(py, item_bytes)?;
+                Ok(rust_res)
+            }
+        }
+    }
+
+    def _index_nodemap_data_all(&self) -> PyResult<PyBytes> {
+        self.inner_nodemap_data_all(py)
+    }
+
+    def _index_nodemap_data_incremental(&self) -> PyResult<PyObject> {
+        self.inner_nodemap_data_incremental(py)
+    }
+
+    def _index_update_nodemap_data(
+        &self,
+        docket: PyObject,
+        nm_data: PyObject
+    ) -> PyResult<PyObject> {
+        self.inner_update_nodemap_data(py, docket, nm_data)
+    }
+
+    @property
+    def _index_entry_size(&self) -> PyResult<PyInt> {
+        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_rust_ext_compat(&self) -> PyResult<PyInt> {
+        // will be entirely removed when the Rust index yet useful to
+        // implement in Rust to detangle things when removing `self.cindex`
+        let rust_res: PyInt = 1.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_is_rust(&self) -> PyResult<PyBool> {
+        Ok(false.to_py_object(py))
+    }
+
+
+});
+
+/// Forwarded index methods?
+impl InnerRevlog {
     fn len(&self, py: Python) -> PyResult<usize> {
-        let rust_index_len = self.index(py).borrow().len();
+        let rust_index_len = self.inner(py).borrow().index.len();
         Ok(rust_index_len)
     }
-
     /// This is scaffolding at this point, but it could also become
     /// a way to start a persistent nodemap or perform a
     /// vacuum / repack operation
@@ -659,11 +1412,11 @@
         py: Python,
         nt: &mut CoreNodeTree,
     ) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         for r in 0..self.len(py)? {
             let rev = Revision(r as BaseRevision);
             // in this case node() won't ever return None
-            nt.insert(&*index, index.node(rev).unwrap(), rev)
+            nt.insert(index, index.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
         Ok(py.None())
@@ -684,7 +1437,11 @@

     /// Returns the full nodemap bytes to be written as-is to disk
     fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
-        let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let nodemap = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();

         // If there's anything readonly, we need to build the data again from
@@ -717,7 +1474,11 @@
             None => return Ok(py.None()),
         };

-        let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let node_tree = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let masked_blocks = node_tree.masked_readonly_blocks();
         let (_, data) = node_tree.into_readonly_and_added_bytes();
         let changed = masked_blocks * std::mem::size_of::<Block>();
@@ -747,7 +1508,7 @@
             .extract::<BaseRevision>(py)?
             .into();
         self.docket(py).borrow_mut().replace(docket.clone_ref(py));
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
             nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
         })?;
@@ -756,7 +1517,7 @@
         for r in (data_tip.0 + 1)..current_tip as BaseRevision {
             let rev = Revision(r);
             // in this case node() won't ever return None
-            nt.insert(&*idx, idx.node(rev).unwrap(), rev)
+            nt.insert(idx, idx.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }

@@ -766,7 +1527,7 @@
     }

     fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         Ok(match key.extract::<BaseRevision>(py) {
             Ok(key_as_int) => {
                 let entry_params = if key_as_int == NULL_REVISION.0 {
@@ -786,15 +1547,17 @@
                 revision_data_params_to_py_tuple(py, entry_params)
                     .into_object()
             }
-            _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
-                || py.None(),
-                |py_rev| py_rev.into_py_object(py).into_object(),
-            ),
+            _ => self
+                ._index_get_rev(py, key.extract::<PyBytes>(py)?)?
+                .map_or_else(
+                    || py.None(),
+                    |py_rev| py_rev.into_py_object(py).into_object(),
+                ),
         })
     }

     fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;

         // We don't use the shortcut here, as it's actually slower to loop
         // through the cached `PyList` than to re-do the whole computation for
@@ -826,7 +1589,7 @@
         filtered_revs: &PyObject,
         stop_rev: &PyObject,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let stop_rev = if stop_rev.is_none(py) {
             None
         } else {
@@ -899,7 +1662,7 @@
     ) -> PyResult<PyObject> {
         let begin = begin.extract::<BaseRevision>(py)?;
         let end = end.extract::<BaseRevision>(py)?;
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let begin =
             Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
         let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
@@ -918,7 +1681,7 @@
         new_heads: &[Revision],
         py: Python<'_>,
     ) -> PyList {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let as_vec: Vec<PyObject> = new_heads
             .iter()
             .map(|r| {
@@ -958,7 +1721,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .ancestors(&revs)
@@ -974,7 +1737,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .common_ancestor_heads(&revs)
@@ -990,7 +1753,7 @@
         py: Python,
         py_roots: PyDict,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
             .items_list(py)
             .iter(py)
@@ -1037,7 +1800,7 @@
         target_density: f64,
         min_gap_size: usize,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
         let as_nested_vec =
             index.slice_chunk_to_density(&revs, target_density, min_gap_size);
@@ -1069,7 +1832,7 @@
         roots: PyObject,
         include_path: bool,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
             PyErr::new::<IndexError, _>(py, "head out of range")
         })?;
@@ -1091,6 +1854,84 @@
             .collect();
         Ok(PyList::new(py, &as_vec).into_object())
     }
+    fn inner_issnapshot(
+        &self,
+        py: Python,
+        rev: UncheckedRevision,
+    ) -> PyResult<bool> {
+        let inner = &self.inner(py).borrow();
+        let index = &self.inner(py).borrow().index;
+        let rev = index
+            .check_revision(rev)
+            .ok_or_else(|| rev_not_in_index(py, rev))?;
+        let result = inner.is_snapshot(rev).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+        Ok(result)
+    }
+}
+
+impl InnerRevlog {
+    pub fn inner_new(
+        py: Python,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        _sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        _chunk_cache: PyObject,
+        _default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        let vfs = Box::new(PyVfs::new(py, opener)?);
+        let index_file =
+            get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let data_file =
+            get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let revlog_type = RevlogType::try_from(revlog_type)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data_config = extract_data_config(py, data_config, revlog_type)?;
+        let delta_config =
+            extract_delta_config(py, delta_config, revlog_type)?;
+        let feature_config =
+            extract_feature_config(py, feature_config, revlog_type)?;
+        let options = RevlogOpenOptions::new(
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        // Safety: we keep the buffer around inside the class as `index_mmap`
+        let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
+        let index = hg::index::Index::new(bytes, options.index_header())
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let core = CoreInnerRevlog::new(
+            vfs,
+            index,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        Self::create_instance(
+            py,
+            core,
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(buf),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+        )
+    }
 }

 py_class!(pub class NodeTree |py| {
@@ -1111,7 +1952,7 @@
     /// (generation-based guard), same as iterating on a `dict` that has
     /// been meanwhile mutated.
     def is_invalidated(&self) -> PyResult<bool> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let result = unsafe { leaked.try_borrow(py) };
         // two cases for result to be an error:
@@ -1123,7 +1964,7 @@
     }

     def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };

@@ -1135,7 +1976,7 @@
             return Err(rev_not_in_index(py, rev.into()))
         }

-        let entry = index.inner.get_entry(rev).unwrap();
+        let entry = index.inner.get_entry(rev).expect("entry should exist");
         let mut nt = self.nt(py).borrow_mut();
         nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;

@@ -1158,7 +1999,7 @@
             )?;

         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };

@@ -1170,7 +2011,7 @@

     def shortest(&self, node: PyBytes) -> PyResult<usize> {
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let idx = &*unsafe { leaked.try_borrow(py)? };
         match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
@@ -1182,6 +2023,120 @@
     }
 });

+fn panic_after_error(_py: Python) -> ! {
+    unsafe {
+        python3_sys::PyErr_Print();
+    }
+    panic!("Python API called failed");
+}
+
+/// # Safety
+///
+/// Don't call this. Its only caller is taken from `PyO3`.
+unsafe fn cast_from_owned_ptr_or_panic<T>(
+    py: Python,
+    p: *mut python3_sys::PyObject,
+) -> T
+where
+    T: cpython::PythonObjectWithCheckedDowncast,
+{
+    if p.is_null() {
+        panic_after_error(py);
+    } else {
+        PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+    }
+}
+
+fn with_pybytes_buffer<F>(
+    py: Python,
+    len: usize,
+    init: F,
+) -> Result<PyBytes, RevlogError>
+where
+    F: FnOnce(
+        &mut dyn RevisionBuffer<Target = PyBytes>,
+    ) -> Result<(), RevlogError>,
+{
+    // Largely inspired by code in PyO3
+    // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with
+    unsafe {
+        let pyptr = python3_sys::PyBytes_FromStringAndSize(
+            std::ptr::null(),
+            len as python3_sys::Py_ssize_t,
+        );
+        let pybytes = cast_from_owned_ptr_or_panic::<PyBytes>(py, pyptr);
+        let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast();
+        debug_assert!(!buffer.is_null());
+        let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len);
+        // Initialise the bytestring in init
+        // If init returns an Err, the buffer is deallocated by `pybytes`
+        init(&mut rev_buf).map(|_| rev_buf.finish())
+    }
+}
+
+/// Wrapper around a Python-provided buffer into which the revision contents
+/// will be written. Done for speed in order to save a large allocation + copy.
+struct PyRevisionBuffer {
+    py_bytes: PyBytes,
+    _buf: *mut u8,
+    len: usize,
+    current_buf: *mut u8,
+    current_len: usize,
+}
+
+impl PyRevisionBuffer {
+    /// # Safety
+    ///
+    /// `buf` should be the start of the allocated bytes of `bytes`, and `len`
+    /// exactly the length of said allocated bytes.
+    #[inline]
+    unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self {
+        Self {
+            py_bytes: bytes,
+            _buf: buf,
+            len,
+            current_len: 0,
+            current_buf: buf,
+        }
+    }
+
+    /// Number of bytes that have been copied to. Will be different to the
+    /// total allocated length of the buffer unless the revision is done being
+    /// written.
+    #[inline]
+    fn current_len(&self) -> usize {
+        self.current_len
+    }
+}
+
+impl RevisionBuffer for PyRevisionBuffer {
+    type Target = PyBytes;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        assert!(self.current_len + slice.len() <= self.len);
+        unsafe {
+            // We cannot use `copy_from_nonoverlapping` since it's *possible*
+            // to create a slice from the same Python memory region using
+            // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway?
+            self.current_buf.copy_from(slice.as_ptr(), slice.len());
+            self.current_buf = self.current_buf.add(slice.len());
+        }
+        self.current_len += slice.len()
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        // catch unzeroed bytes before it becomes undefined behavior
+        assert_eq!(
+            self.current_len(),
+            self.len,
+            "not enough bytes read for revision"
+        );
+        self.py_bytes
+    }
+}
+
 fn revlog_error(py: Python) -> PyErr {
     match py
         .import("mercurial.error")
@@ -1195,21 +2150,6 @@
     }
 }

-fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
-    match py
-        .import("mercurial.error")
-        .and_then(|m| m.get(py, "RevlogError"))
-    {
-        Err(e) => e,
-        Ok(cls) => PyErr::from_instance(
-            py,
-            cls.call(py, (PyBytes::new(py, msg),), None)
-                .ok()
-                .into_py_object(py),
-        ),
-    }
-}
-
 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
     // ParentOutOfRange is currently the only alternative
     // in `hg::GraphError`. The C index always raises this simple ValueError.
@@ -1249,8 +2189,8 @@
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "RevLog - Rust implementations")?;

-    m.add_class::<Index>(py)?;
     m.add_class::<NodeTree>(py)?;
+    m.add_class::<InnerRevlog>(py)?;

     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/vfs.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -0,0 +1,289 @@
+use std::{
+    cell::Cell,
+    fs::File,
+    io::Error,
+    os::fd::{AsRawFd, FromRawFd},
+    path::{Path, PathBuf},
+};
+
+use cpython::{
+    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use hg::{
+    errors::{HgError, IoResultExt},
+    exit_codes,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    vfs::Vfs,
+};
+
+/// Wrapper around a Python VFS object to call back into Python from `hg-core`.
+pub struct PyVfs {
+    inner: PyObject,
+}
+
+impl Clone for PyVfs {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl PyVfs {
+    pub fn new(_py: Python, py_vfs: PyObject) -> PyResult<Self> {
+        Ok(Self { inner: py_vfs })
+    }
+
+    fn inner_open(
+        &self,
+        filename: &Path,
+        create: bool,
+        check_ambig: bool,
+        atomic_temp: bool,
+        write: bool,
+    ) -> Result<(File, Option<PathBuf>), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let mode = if atomic_temp {
+            PyBytes::new(py, b"w")
+        } else if create {
+            PyBytes::new(py, b"w+")
+        } else if write {
+            PyBytes::new(py, b"r+")
+        } else {
+            PyBytes::new(py, b"rb")
+        };
+        let res = self.inner.call(
+            py,
+            (
+                PyBytes::new(py, &get_bytes_from_path(filename)),
+                mode,
+                atomic_temp,
+                check_ambig,
+            ),
+            None,
+        );
+        match res {
+            Ok(tup) => {
+                let tup = tup
+                    .extract::<PyTuple>(py)
+                    .map_err(|e| vfs_error("vfs did not return a tuple", e))?;
+                let fileno = tup.get_item(py, 0).extract(py).map_err(|e| {
+                    vfs_error("vfs did not return a valid fileno", e)
+                })?;
+                let temp_name = tup.get_item(py, 1);
+                // Safety: this must be a valid owned file descriptor, and
+                // Python has just given it to us, it will only exist here now
+                let file = unsafe { File::from_raw_fd(fileno) };
+                let temp_name = if atomic_temp {
+                    Some(
+                        get_path_from_bytes(
+                            temp_name
+                                .extract::<PyBytes>(py)
+                                .map_err(|e| vfs_error("invalid tempname", e))?
+                                .data(py),
+                        )
+                        .to_owned(),
+                    )
+                } else {
+                    None
+                };
+                Ok((file, temp_name))
+            }
+            Err(mut e) => {
+                // TODO surely there is a better way of comparing
+                if e.instance(py).get_type(py).name(py) == "FileNotFoundError"
+                {
+                    return Err(HgError::IoError {
+                        error: Error::new(
+                            std::io::ErrorKind::NotFound,
+                            e.instance(py).to_string(),
+                        ),
+                        context: hg::errors::IoErrorContext::ReadingFile(
+                            filename.to_owned(),
+                        ),
+                    });
+                }
+                Err(vfs_error("failed to call opener", e))
+            }
+        }
+    }
+}
+
+fn vfs_error(reason: impl Into<String>, mut error: PyErr) -> HgError {
+    let gil = &Python::acquire_gil();
+    let py = gil.python();
+    HgError::abort(
+        format!("{}: {}", reason.into(), error.instance(py)),
+        exit_codes::ABORT,
+        None,
+    )
+}
+
+py_class!(pub class PyFile |py| {
+    data number: Cell<i32>;
+
+    def fileno(&self) -> PyResult<PyInt> {
+        Ok(self.number(py).get().to_py_object(py))
+    }
+});
+
+impl Vfs for PyVfs {
+    fn open(&self, filename: &Path) -> Result<File, HgError> {
+        self.inner_open(filename, false, false, false, true)
+            .map(|(f, _)| f)
+    }
+    fn open_read(&self, filename: &Path) -> Result<File, HgError> {
+        self.inner_open(filename, false, false, false, false)
+            .map(|(f, _)| f)
+    }
+
+    fn open_check_ambig(
+        &self,
+        filename: &Path,
+    ) -> Result<std::fs::File, HgError> {
+        self.inner_open(filename, false, true, false, true)
+            .map(|(f, _)| f)
+    }
+
+    fn create(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+        self.inner_open(filename, true, false, false, true)
+            .map(|(f, _)| f)
+    }
+
+    fn create_atomic(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<hg::vfs::AtomicFile, HgError> {
+        self.inner_open(filename, true, false, true, true).map(
+            |(fp, temp_name)| {
+                hg::vfs::AtomicFile::new(
+                    fp,
+                    check_ambig,
+                    temp_name.expect("temp name should exist"),
+                    filename.to_owned(),
+                )
+            },
+        )
+    }
+
+    fn file_size(&self, file: &File) -> Result<u64, HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let raw_fd = file.as_raw_fd();
+        let py_fd = PyFile::create_instance(py, Cell::new(raw_fd))
+            .expect("create_instance cannot fail");
+        let fstat = self
+            .inner
+            .call_method(py, "fstat", (py_fd,), None)
+            .map_err(|e| {
+                vfs_error(format!("failed to fstat fd '{}'", raw_fd), e)
+            })?;
+        fstat
+            .getattr(py, "st_size")
+            .map(|v| {
+                v.extract(py).map_err(|e| {
+                    vfs_error(format!("invalid size for fd '{}'", raw_fd), e)
+                })
+            })
+            .map_err(|e| {
+                vfs_error(format!("failed to get size of fd '{}'", raw_fd), e)
+            })?
+    }
+
+    fn exists(&self, filename: &Path) -> bool {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        self.inner
+            .call_method(
+                py,
+                "exists",
+                (PyBytes::new(py, &get_bytes_from_path(filename)),),
+                None,
+            )
+            .unwrap_or_else(|_| false.into_py_object(py).into_object())
+            .extract(py)
+            .unwrap()
+    }
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        if let Err(e) = self.inner.call_method(
+            py,
+            "unlink",
+            (PyBytes::new(py, &get_bytes_from_path(filename)),),
+            None,
+        ) {
+            return Err(vfs_error(
+                format!("failed to unlink '{}'", filename.display()),
+                e,
+            ));
+        }
+        Ok(())
+    }
+
+    fn rename(
+        &self,
+        from: &Path,
+        to: &Path,
+        check_ambig: bool,
+    ) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let kwargs = PyDict::new(py);
+        kwargs
+            .set_item(py, "checkambig", check_ambig)
+            .map_err(|e| vfs_error("dict setitem failed", e))?;
+        if let Err(e) = self.inner.call_method(
+            py,
+            "rename",
+            (
+                PyBytes::new(py, &get_bytes_from_path(from)),
+                PyBytes::new(py, &get_bytes_from_path(to)),
+            ),
+            Some(&kwargs),
+        ) {
+            let msg = format!(
+                "failed to rename '{}' to '{}'",
+                from.display(),
+                to.display()
+            );
+            return Err(vfs_error(msg, e));
+        }
+        Ok(())
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let from = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(from)),),
+                None,
+            )
+            .unwrap();
+        let from = from.extract::<PyBytes>(py).unwrap();
+        let from = get_path_from_bytes(from.data(py));
+        let to = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(to)),),
+                None,
+            )
+            .unwrap();
+        let to = to.extract::<PyBytes>(py).unwrap();
+        let to = get_path_from_bytes(to.data(py));
+        std::fs::copy(from, to).when_writing_file(to)?;
+        Ok(())
+    }
+}
--- a/tests/test-bundle.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-bundle.t	Wed Jun 19 19:10:49 2024 +0200
@@ -294,7 +294,7 @@
   packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 (no-rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| (no-rust !)
   0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| (no-rust !)
-  packed.hg: size=3181, sha1=b202787710a1c109246554be589506cd2916acb7 (rust !)
+  packed.hg: size=3181, sha1=3e865df183d388222969c5b19c844dd8697c85c6 (rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 09 00 00 |HGS1UN..........| (rust !)
   0010: 00 00 00 00 0b 67 00 3b 67 65 6e 65 72 61 6c 64 |.....g.;generald| (rust !)
   0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 2d 63 6f 6d 70 |elta,revlog-comp|
--- a/tests/test-contrib-perf.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-contrib-perf.t	Wed Jun 19 19:10:49 2024 +0200
@@ -283,8 +283,15 @@
 #if reporevlogstore
   $ hg perfrevlogrevisions .hg/store/data/a.i
 #endif
+
+#if no-rust
+Cannot test in Rust because this these are highly invasive and expect a certain
+structure from Python code.
+
   $ hg perfrevlogrevision -m 0
   $ hg perfrevlogchunks -c
+#endif
+
   $ hg perfrevrange
   $ hg perfrevset 'all()'
   $ hg perfstartup
--- a/tests/test-journal-exists.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-journal-exists.t	Wed Jun 19 19:10:49 2024 +0200
@@ -45,12 +45,21 @@
   $ hg bundle -qa repo.hg
   $ chmod -w foo/.hg/store/00changelog.i

+#if rust
+  $ hg -R foo unbundle repo.hg
+  adding changesets
+  transaction abort!
+  rollback completed
+  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
+  [50]
+#else
   $ hg -R foo unbundle repo.hg
   adding changesets
   transaction abort!
   rollback completed
   abort: $EACCES$: '$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
   [255]
+#endif

   $ if test -f foo/.hg/store/journal; then echo 'journal exists :-('; fi
 #endif
--- a/tests/test-permissions.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-permissions.t	Wed Jun 19 19:10:49 2024 +0200
@@ -34,10 +34,16 @@
   $ chmod -w .hg/store/data/a.i

   $ echo barber > a
+#if rust
+  $ hg commit -m "2"
+  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/t/.hg/store/data/a.i'
+  [50]
+#else
   $ hg commit -m "2"
   trouble committing a!
   abort: $EACCES$: '$TESTTMP/t/.hg/store/data/a.i'
   [255]
+#endif

   $ chmod -w .
--- a/tests/test-remotefilelog-bgprefetch.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-remotefilelog-bgprefetch.t	Wed Jun 19 19:10:49 2024 +0200
@@ -33,8 +33,8 @@
   transferred 776 bytes in * seconds (*/sec) (glob) (no-zstd !)
   3 files to transfer, 784 bytes of data (zstd no-rust !)
   transferred 784 bytes in * seconds (*/sec) (glob) (zstd no-rust !)
-  5 files to transfer, 910 bytes of data (rust !)
-  transferred 910 bytes in * seconds (*/sec) (glob) (rust !)
+  5 files to transfer, 911 bytes of data (rust !)
+  transferred 911 bytes in * seconds (*/sec) (glob) (rust !)
   searching for changes
   no changes found
--- a/tests/test-remotefilelog-prefetch.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-remotefilelog-prefetch.t	Wed Jun 19 19:10:49 2024 +0200
@@ -26,8 +26,8 @@
   transferred 528 bytes in * seconds (* */sec) (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   $ cd shallow
@@ -172,8 +172,8 @@
   transferred 528 bytes in * seconds * (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   updating to branch default
--- a/tests/test-repo-compengines.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-repo-compengines.t	Wed Jun 19 19:10:49 2024 +0200
@@ -194,9 +194,11 @@
   > done

   $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/*
-  zstd-level-1/.hg/store/data/a.i: size=4114
+  zstd-level-1/.hg/store/data/a.i: size=4114 (no-rust !)
+  zstd-level-1/.hg/store/data/a.i: size=4112 (rust !)
   zstd-level-22/.hg/store/data/a.i: size=4091
-  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re)
+  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) (no-rust !)
+  zstd-level-default/.hg/store/data/a.i: size=4097 (rust !)

 Test error cases
--- a/tests/test-rust-revlog.py	Wed Jun 19 17:03:13 2024 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-import struct
-import unittest
-
-from mercurial.node import hex
-
-try:
-    from mercurial import rustext
-
-    rustext.__name__  # trigger immediate actual import
-except ImportError:
-    rustext = None
-else:
-    from mercurial.rustext import revlog
-
-    # this would fail already without appropriate ancestor.__package__
-    from mercurial.rustext.ancestor import LazyAncestors
-
-from mercurial.testing import revlog as revlogtesting
-
-header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
-    def test_heads(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(rustidx.headrevs(), idx.headrevs())
-
-    def test_len(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(len(rustidx), len(idx))
-
-    def test_ancestors(self):
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        lazy = LazyAncestors(rustidx, [3], 0, True)
-        # we have two more references to the index:
-        # - in its inner iterator for __contains__ and __bool__
-        # - in the LazyAncestors instance itself (to spawn new iterators)
-        self.assertTrue(2 in lazy)
-        self.assertTrue(bool(lazy))
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-        # a second time to validate that we spawn new iterators
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-
-        # let's check bool for an empty one
-        self.assertFalse(LazyAncestors(rustidx, [0], 0, False))
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogNodeTreeClassTest(revlogtesting.RustRevlogBasedTestBase):
-    def test_standalone_nodetree(self):
-        idx = self.parserustindex()
-        nt = revlog.NodeTree(idx)
-        for i in range(4):
-            nt.insert(i)
-
-        bin_nodes = [entry[7] for entry in idx]
-        hex_nodes = [hex(n) for n in bin_nodes]
-
-        for i, node in enumerate(hex_nodes):
-            self.assertEqual(nt.prefix_rev_lookup(node), i)
-            self.assertEqual(nt.prefix_rev_lookup(node[:5]), i)
-
-        # all 4 revisions in idx (standard data set) have different
-        # first nybbles in their Node IDs,
-        # hence `nt.shortest()` should return 1 for them, except when
-        # the leading nybble is 0 (ambiguity with NULL_NODE)
-        for i, (bin_node, hex_node) in enumerate(zip(bin_nodes, hex_nodes)):
-            shortest = nt.shortest(bin_node)
-            expected = 2 if hex_node[0] == ord('0') else 1
-            self.assertEqual(shortest, expected)
-            self.assertEqual(nt.prefix_rev_lookup(hex_node[:shortest]), i)
-
-        # test invalidation (generation poisoning) detection
-        del idx[3]
-        self.assertTrue(nt.is_invalidated())
-
-
-if __name__ == '__main__':
-    import silenttestrunner
-
-    silenttestrunner.main(__name__)
--- a/tests/test-treemanifest.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-treemanifest.t	Wed Jun 19 19:10:49 2024 +0200
@@ -869,11 +869,13 @@
   > done
   $ hg ci -m 'have some content'
   $ f -s .hg/store/00manifest.*
-  .hg/store/00manifest.i: size=798 (no-pure !)
-  .hg/store/00manifest.i: size=784 (pure !)
+  .hg/store/00manifest.i: size=798 (no-pure no-rust !)
+  .hg/store/00manifest.i: size=800 (rust !)
+  .hg/store/00manifest.i: size=784 (pure no-rust !)
   $ f -s .hg/store/meta/dir/00manifest*
-  .hg/store/meta/dir/00manifest.i: size=556 (no-pure !)
-  .hg/store/meta/dir/00manifest.i: size=544 (pure !)
+  .hg/store/meta/dir/00manifest.i: size=556 (no-pure no-rust !)
+  .hg/store/meta/dir/00manifest.i: size=557 (rust !)
+  .hg/store/meta/dir/00manifest.i: size=544 (pure no-rust !)
   $ hg debugupgraderepo --config format.revlog-compression=none --config experimental.treemanifest=yes --run --quiet --no-backup
   upgrade will perform the following actions:
--- a/tests/test-verify.t	Wed Jun 19 17:03:13 2024 +0200
+++ b/tests/test-verify.t	Wed Jun 19 19:10:49 2024 +0200
@@ -321,7 +321,8 @@
   $ cat start b > .hg/store/data/a.i

   $ hg verify -q
-   a@1: broken revlog! (index a is corrupted)
+   a@1: broken revlog! (index a is corrupted) (no-rust !)
+   a@1: broken revlog! (abort: unexpected inline revlog length) (rust !)
   warning: orphan data file 'data/a.i'
   not checking dirstate because of previous errors
   1 warnings encountered!