changeset 52282:51a350a22d0c

branching: merge stable into default
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 20 Nov 2024 15:53:19 +0100
parents 65d516db7309 (diff) 854e2b9bca57 (current diff)
children fad30cb98579
files rust/rhg/src/commands/status.rs
diffstat 50 files changed, 6247 insertions(+), 1766 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/perf.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/contrib/perf.py	Wed Nov 20 15:53:19 2024 +0100
@@ -3780,6 +3780,11 @@
 
     rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
 
+    if rl.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogchunks is not implemented for the Rust revlog"
+        )
+
     # - _chunkraw was renamed to _getsegmentforrevs
     # - _getsegmentforrevs was moved on the inner object
     try:
@@ -3960,6 +3965,10 @@
         raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
 
     r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
+    if r.uses_rust:
+        raise NotImplementedError(
+            "perfrevlogrevision is not implemented for the Rust revlog"
+        )
 
     # _chunkraw was renamed to _getsegmentforrevs.
     try:
--- a/mercurial/changelog.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/changelog.py	Wed Nov 20 15:53:19 2024 +0100
@@ -324,6 +324,7 @@
 
             self._format_flags &= ~revlog.FLAG_GENERALDELTA
             self.delta_config.general_delta = False
+            self.data_config.generaldelta = False
 
         # Delta chains for changelogs tend to be very small because entries
         # tend to be small and don't delta well with each. So disable delta
--- a/mercurial/pure/parsers.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/pure/parsers.py	Wed Nov 20 15:53:19 2024 +0100
@@ -672,6 +672,9 @@
             r = (offset,) + r[1:]
         return r
 
+    def __delitem__(self, i):
+        raise NotImplementedError()
+
     def _unpack_entry(self, rev, data):
         r = self.index_format.unpack(data)
         r = r + (
--- a/mercurial/revlog.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/revlog.py	Wed Nov 20 15:53:19 2024 +0100
@@ -17,7 +17,6 @@
 import binascii
 import collections
 import contextlib
-import functools
 import io
 import os
 import struct
@@ -83,6 +82,7 @@
 if typing.TYPE_CHECKING:
     # noinspection PyPackageRequirements
     import attr
+    from .pure.parsers import BaseIndexObject
 
 from . import (
     ancestor,
@@ -381,7 +381,7 @@
         default_compression_header,
     ):
         self.opener = opener
-        self.index = index
+        self.index: BaseIndexObject = index
 
         self.index_file = index_file
         self.data_file = data_file
@@ -528,7 +528,9 @@
         generaldelta = self.delta_config.general_delta
         # Try C implementation.
         try:
-            return self.index.deltachain(rev, stoprev, generaldelta)
+            return self.index.deltachain(
+                rev, stoprev, generaldelta
+            )  # pytype: disable=attribute-error
         except AttributeError:
             pass
 
@@ -1246,6 +1248,71 @@
         return self.canonical_index_file
 
 
+if typing.TYPE_CHECKING:
+    # Tell Pytype what kind of object we expect
+    ProxyBase = BaseIndexObject
+else:
+    ProxyBase = object
+
+
+class RustIndexProxy(ProxyBase):
+    """Wrapper around the Rust index to fake having direct access to the index.
+
+    Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
+    so we can't expose the index from Rust directly, since the `InnerRevlog`
+    already has ownership of the index. This object redirects all calls to the
+    index through the Rust-backed `InnerRevlog` glue which defines all
+    necessary forwarding methods.
+    """
+
+    def __init__(self, inner):
+        # Do not rename as it's being used to access the index from Rust
+        self.inner = inner
+
+    # TODO possibly write all index methods manually to save on overhead?
+    def __getattr__(self, name):
+        return getattr(self.inner, f"_index_{name}")
+
+    # Magic methods need to be defined explicitely
+    def __len__(self):
+        return self.inner._index___len__()
+
+    def __getitem__(self, key):
+        return self.inner._index___getitem__(key)
+
+    def __contains__(self, key):
+        return self.inner._index___contains__(key)
+
+    def __delitem__(self, key):
+        return self.inner._index___delitem__(key)
+
+
+class RustVFSWrapper:
+    """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
+    calling back multiple times into Python.
+    """
+
+    def __init__(self, inner):
+        self.inner = inner
+
+    def __call__(
+        self,
+        path: bytes,
+        mode: bytes = b"rb",
+        atomictemp=False,
+        checkambig=False,
+    ):
+        fd = self.inner.__call__(
+            path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
+        )
+        # Information that Rust needs to get ownership of the file that's
+        # being opened.
+        return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
+
+    def __getattr__(self, name):
+        return getattr(self.inner, name)
+
+
 class revlog:
     """
     the underlying revision storage object
@@ -1358,6 +1425,7 @@
         self._trypending = trypending
         self._try_split = try_split
         self._may_inline = may_inline
+        self.uses_rust = False
         self.opener = opener
         if persistentnodemap:
             self._nodemap_file = nodemaputil.get_nodemap_file(self)
@@ -1392,7 +1460,7 @@
         # Maps rev to chain base rev.
         self._chainbasecache = util.lrucachedict(100)
 
-        self.index = None
+        self.index: Optional[BaseIndexObject] = None
         self._docket = None
         self._nodemap_docket = None
         # Mapping of partial identifiers to full nodes.
@@ -1406,8 +1474,8 @@
         # prevent nesting of addgroup
         self._adding_group = None
 
-        chunk_cache = self._loadindex()
-        self._load_inner(chunk_cache)
+        index, chunk_cache = self._loadindex()
+        self._load_inner(index, chunk_cache)
         self._concurrencychecker = concurrencychecker
 
     def _init_opts(self):
@@ -1647,6 +1715,7 @@
             self.delta_config.general_delta = features[b'generaldelta'](
                 self._format_flags
             )
+            self.data_config.generaldelta = self.delta_config.general_delta
             self.feature_config.has_side_data = features[b'sidedata']
 
             if not features[b'docket']:
@@ -1677,6 +1746,7 @@
             self._inline = False
             # generaldelta implied by version 2 revlogs.
             self.delta_config.general_delta = True
+            self.data_config.generaldelta = True
             # the logic for persistent nodemap will be dealt with within the
             # main docket, so disable it for now.
             self._nodemap_file = None
@@ -1705,7 +1775,12 @@
         )
 
         use_rust_index = False
-        if rustrevlog is not None and self._nodemap_file is not None:
+        rust_applicable = self._nodemap_file is not None
+        rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
+        rust_applicable = rust_applicable and getattr(
+            self.opener, "rust_compatible", True
+        )
+        if rustrevlog is not None and rust_applicable:
             # we would like to use the rust_index in all case, especially
             # because it is necessary for AncestorsIterator and LazyAncestors
             # since the 6.7 cycle.
@@ -1715,6 +1790,20 @@
             # repository.
             use_rust_index = True
 
+            if self._format_version != REVLOGV1:
+                use_rust_index = False
+
+        if hasattr(self.opener, "fncache"):
+            vfs = self.opener.vfs
+            if not self.opener.uses_dotencode:
+                use_rust_index = False
+            if not isinstance(vfs, vfsmod.vfs):
+                # Be cautious since we don't support other vfs
+                use_rust_index = False
+        else:
+            # Rust only supports repos with fncache
+            use_rust_index = False
+
         self._parse_index = parse_index_v1
         if self._format_version == REVLOGV0:
             self._parse_index = revlogv0.parse_index_v0
@@ -1724,58 +1813,103 @@
             self._parse_index = parse_index_cl_v2
         elif devel_nodemap:
             self._parse_index = parse_index_v1_nodemap
-        elif use_rust_index:
-            self._parse_index = functools.partial(
-                parse_index_v1_rust, default_header=new_header
-            )
-        try:
-            d = self._parse_index(index_data, self._inline)
-            index, chunkcache = d
-            use_nodemap = (
-                not self._inline
-                and self._nodemap_file is not None
-                and hasattr(index, 'update_nodemap_data')
-            )
-            if use_nodemap:
-                nodemap_data = nodemaputil.persisted_data(self)
-                if nodemap_data is not None:
-                    docket = nodemap_data[0]
-                    if (
-                        len(d[0]) > docket.tip_rev
-                        and d[0][docket.tip_rev][7] == docket.tip_node
-                    ):
-                        # no changelog tampering
-                        self._nodemap_docket = docket
-                        index.update_nodemap_data(*nodemap_data)
-        except (ValueError, IndexError):
-            raise error.RevlogError(
-                _(b"index %s is corrupted") % self.display_id
-            )
-        self.index = index
+
+        if use_rust_index:
+            # Let the Rust code parse its own index
+            index, chunkcache = (index_data, None)
+            self.uses_rust = True
+        else:
+            try:
+                d = self._parse_index(index_data, self._inline)
+                index, chunkcache = d
+                self._register_nodemap_info(index)
+            except (ValueError, IndexError):
+                raise error.RevlogError(
+                    _(b"index %s is corrupted") % self.display_id
+                )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)
 
-        return chunkcache
-
-    def _load_inner(self, chunk_cache):
+        return index, chunkcache
+
+    def _load_inner(self, index, chunk_cache):
         if self._docket is None:
             default_compression_header = None
         else:
             default_compression_header = self._docket.default_compression_header
 
-        self._inner = _InnerRevlog(
-            opener=self.opener,
-            index=self.index,
-            index_file=self._indexfile,
-            data_file=self._datafile,
-            sidedata_file=self._sidedatafile,
-            inline=self._inline,
-            data_config=self.data_config,
-            delta_config=self.delta_config,
-            feature_config=self.feature_config,
-            chunk_cache=chunk_cache,
-            default_compression_header=default_compression_header,
+        if self.uses_rust:
+            vfs_is_readonly = False
+            fncache = None
+
+            if hasattr(self.opener, "vfs"):
+                vfs = self.opener
+                if isinstance(vfs, vfsmod.readonlyvfs):
+                    vfs_is_readonly = True
+                    vfs = vfs.vfs
+                fncache = vfs.fncache
+                vfs = vfs.vfs
+            else:
+                vfs = self.opener
+
+            vfs_base = vfs.base
+            assert fncache is not None, "Rust only supports repos with fncache"
+
+            self._inner = rustrevlog.InnerRevlog(
+                vfs_base=vfs_base,
+                fncache=fncache,
+                vfs_is_readonly=vfs_is_readonly,
+                index_data=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+                revlog_type=self.target[0],
+                use_persistent_nodemap=self._nodemap_file is not None,
+            )
+            self.index = RustIndexProxy(self._inner)
+            self._register_nodemap_info(self.index)
+            self.uses_rust = True
+        else:
+            self._inner = _InnerRevlog(
+                opener=self.opener,
+                index=index,
+                index_file=self._indexfile,
+                data_file=self._datafile,
+                sidedata_file=self._sidedatafile,
+                inline=self._inline,
+                data_config=self.data_config,
+                delta_config=self.delta_config,
+                feature_config=self.feature_config,
+                chunk_cache=chunk_cache,
+                default_compression_header=default_compression_header,
+            )
+            self.index = self._inner.index
+
+    def _register_nodemap_info(self, index):
+        use_nodemap = (
+            not self._inline
+            and self._nodemap_file is not None
+            and hasattr(index, 'update_nodemap_data')
         )
+        if use_nodemap:
+            nodemap_data = nodemaputil.persisted_data(self)
+            if nodemap_data is not None:
+                docket = nodemap_data[0]
+                if (
+                    len(index) > docket.tip_rev
+                    and index[docket.tip_rev][7] == docket.tip_node
+                ):
+                    # no changelog tampering
+                    self._nodemap_docket = docket
+                    index.update_nodemap_data(
+                        *nodemap_data
+                    )  # pytype: disable=attribute-error
 
     def get_revlog(self):
         """simple function to mirror API of other not-really-revlog API"""
@@ -1867,7 +2001,9 @@
             nodemap_data = nodemaputil.persisted_data(self)
             if nodemap_data is not None:
                 self._nodemap_docket = nodemap_data[0]
-                self.index.update_nodemap_data(*nodemap_data)
+                self.index.update_nodemap_data(
+                    *nodemap_data
+                )  # pytype: disable=attribute-error
 
     def rev(self, node):
         """return the revision number associated with a <nodeid>"""
@@ -2366,23 +2502,26 @@
     def headrevs(self, revs=None, stop_rev=None):
         if revs is None:
             return self.index.headrevs(None, stop_rev)
-        assert stop_rev is None
         if rustdagop is not None and self.index.rust_ext_compat:
             return rustdagop.headrevs(self.index, revs)
         return dagop.headrevs(revs, self._uncheckedparentrevs)
 
     def headrevsdiff(self, start, stop):
         try:
-            return self.index.headrevsdiff(start, stop)
+            return self.index.headrevsdiff(
+                start, stop
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
 
     def computephases(self, roots):
-        return self.index.computephasesmapsets(roots)
+        return self.index.computephasesmapsets(
+            roots
+        )  # pytype: disable=attribute-error
 
     def _head_node_ids(self):
         try:
-            return self.index.head_node_ids()
+            return self.index.head_node_ids()  # pytype: disable=attribute-error
         except AttributeError:
             return [self.node(r) for r in self.headrevs()]
 
@@ -2440,7 +2579,9 @@
     def _commonancestorsheads(self, *revs):
         """calculate all the heads of the common ancestors of revs"""
         try:
-            ancs = self.index.commonancestorsheads(*revs)
+            ancs = self.index.commonancestorsheads(
+                *revs
+            )  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):  # C implementation failed
             ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
         return ancs
@@ -2474,7 +2615,7 @@
         try:
             return self.index.reachableroots2(
                 minroot, heads, roots, includepath
-            )
+            )  # pytype: disable=attribute-error
         except AttributeError:
             return dagop._reachablerootspure(
                 self.parentrevs, minroot, roots, heads, includepath
@@ -2485,7 +2626,7 @@
 
         a, b = self.rev(a), self.rev(b)
         try:
-            ancs = self.index.ancestors(a, b)
+            ancs = self.index.ancestors(a, b)  # pytype: disable=attribute-error
         except (AttributeError, OverflowError):
             ancs = ancestor.ancestors(self.parentrevs, a, b)
         if ancs:
@@ -2532,7 +2673,9 @@
         maybewdir = self.nodeconstants.wdirhex.startswith(id)
         ambiguous = False
         try:
-            partial = self.index.partialmatch(id)
+            partial = self.index.partialmatch(
+                id
+            )  # pytype: disable=attribute-error
             if partial and self.hasnode(partial):
                 if maybewdir:
                     # single 'ff...' match in radix tree, ambiguous with wdir
@@ -2634,7 +2777,10 @@
 
         if not getattr(self, 'filteredrevs', None):
             try:
-                length = max(self.index.shortest(node), minlength)
+                shortest = self.index.shortest(
+                    node
+                )  # pytype: disable=attribute-error
+                length = max(shortest, minlength)
                 return disambiguate(hexnode, length)
             except error.RevlogError:
                 if node != self.nodeconstants.wdirid:
@@ -4087,7 +4233,9 @@
             ifh.seek(startrev * self.index.entry_size)
             for i, e in enumerate(new_entries):
                 rev = startrev + i
-                self.index.replace_sidedata_info(rev, *e)
+                self.index.replace_sidedata_info(
+                    rev, *e
+                )  # pytype: disable=attribute-error
                 packed = self.index.entry_binary(rev)
                 if rev == 0 and self._docket is None:
                     header = self._format_flags | self._format_version
--- a/mercurial/revlogutils/rewrite.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/revlogutils/rewrite.py	Wed Nov 20 15:53:19 2024 +0100
@@ -136,8 +136,8 @@
         rl.opener.rename(newrl._datafile, rl._datafile)
 
     rl.clearcaches()
-    chunk_cache = rl._loadindex()
-    rl._load_inner(chunk_cache)
+    index, chunk_cache = rl._loadindex()
+    rl._load_inner(index, chunk_cache)
 
 
 def v2_censor(revlog, tr, censor_nodes, tombstone=b''):
@@ -327,7 +327,8 @@
 
     # reload the revlog internal information
     revlog.clearcaches()
-    revlog._loadindex(docket=docket)
+    index, chunk_cache = revlog._loadindex(docket=docket)
+    revlog._load_inner(index, chunk_cache)
 
     @contextlib.contextmanager
     def all_files_opener():
@@ -569,7 +570,8 @@
 
             rl.opener.rename(new_file_path, index_file)
             rl.clearcaches()
-            rl._loadindex()
+            index, chunk_cache = rl._loadindex()
+            rl._load_inner(index, chunk_cache)
         finally:
             util.tryunlink(new_file_path)
 
--- a/mercurial/statichttprepo.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/statichttprepo.py	Wed Nov 20 15:53:19 2024 +0100
@@ -219,6 +219,9 @@
         self.store = localrepo.makestore(requirements, self.path, vfsclass)
         self.spath = self.store.path
         self.svfs = self.store.opener
+        # We can't use Rust because the Rust code cannot cope with the
+        # `httprangereader` (yet?)
+        self.svfs.rust_compatible = False
         self.sjoin = self.store.join
         self._filecache = {}
         self.requirements = requirements
--- a/mercurial/store.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/store.py	Wed Nov 20 15:53:19 2024 +0100
@@ -998,12 +998,16 @@
         # set of new additions to fncache
         self.addls = set()
 
+    @property
+    def is_loaded(self):
+        return self.entries is not None
+
     def ensureloaded(self, warn=None):
         """read the fncache file if not already read.
 
         If the file on disk is corrupted, raise. If warn is provided,
         warn and keep going instead."""
-        if self.entries is None:
+        if not self.is_loaded:
             self._load(warn)
 
     def _load(self, warn=None):
@@ -1058,7 +1062,7 @@
 
     def write(self, tr):
         if self._dirty:
-            assert self.entries is not None
+            assert self.is_loaded
             self.entries = self.entries | self.addls
             self.addls = set()
             tr.addbackup(b'fncache')
@@ -1083,13 +1087,13 @@
     def add(self, fn):
         if fn in self._ignores:
             return
-        if self.entries is None:
+        if not self.is_loaded:
             self._load()
         if fn not in self.entries:
             self.addls.add(fn)
 
     def remove(self, fn):
-        if self.entries is None:
+        if not self.is_loaded:
             self._load()
         if fn in self.addls:
             self.addls.remove(fn)
@@ -1103,12 +1107,12 @@
     def __contains__(self, fn):
         if fn in self.addls:
             return True
-        if self.entries is None:
+        if not self.is_loaded:
             self._load()
         return fn in self.entries
 
     def __iter__(self):
-        if self.entries is None:
+        if not self.is_loaded:
             self._load()
         return iter(self.entries | self.addls)
 
@@ -1116,8 +1120,9 @@
 class _fncachevfs(vfsmod.proxyvfs):
     def __init__(self, vfs, fnc, encode):
         vfsmod.proxyvfs.__init__(self, vfs)
-        self.fncache = fnc
+        self.fncache: fncache = fnc
         self.encode = encode
+        self.uses_dotencode = encode is _pathencode
 
     def __call__(self, path, mode=b'r', *args, **kw):
         encoded = self.encode(path)
@@ -1128,7 +1133,7 @@
         ):
             # do not trigger a fncache load when adding a file that already is
             # known to exist.
-            notload = self.fncache.entries is None and (
+            notload = not self.fncache.is_loaded and (
                 # if the file has size zero, it should be considered as missing.
                 # Such zero-size files are the result of truncation when a
                 # transaction is aborted.
--- a/mercurial/vfs.py	Wed Nov 20 15:38:57 2024 +0100
+++ b/mercurial/vfs.py	Wed Nov 20 15:53:19 2024 +0100
@@ -82,6 +82,10 @@
     # encoded vfs (see issue6546)
     _dir_sep: bytes = b'/'
 
+    # Used to disable the Rust `InnerRevlog` in case the VFS is not supported
+    # by the Rust code
+    rust_compatible = True
+
     # TODO: type return, which is util.posixfile wrapped by a proxy
     @abc.abstractmethod
     def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs) -> Any:
--- a/rust/Cargo.lock	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/Cargo.lock	Wed Nov 20 15:53:19 2024 +0100
@@ -15,6 +15,7 @@
 checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
 dependencies = [
  "cfg-if",
+ "getrandom 0.2.8",
  "once_cell",
  "version_check",
 ]
@@ -509,14 +510,14 @@
 
 [[package]]
 name = "filetime"
-version = "0.2.25"
+version = "0.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
+checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
 dependencies = [
  "cfg-if",
  "libc",
- "libredox",
- "windows-sys 0.59.0",
+ "redox_syscall 0.4.1",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -596,9 +597,9 @@
 
 [[package]]
 name = "hashbrown"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
+checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
 dependencies = [
  "ahash",
  "rayon",
@@ -642,7 +643,7 @@
  "filetime",
  "flate2",
  "format-bytes",
- "hashbrown 0.13.1",
+ "hashbrown 0.13.2",
  "home",
  "im-rc",
  "indicatif",
@@ -660,6 +661,7 @@
  "rayon",
  "regex",
  "same-file",
+ "schnellru",
  "self_cell",
  "serde",
  "sha-1 0.10.0",
@@ -681,6 +683,8 @@
  "hg-core",
  "libc",
  "log",
+ "logging_timer",
+ "python3-sys",
  "stable_deref_trait",
  "vcsgraph",
 ]
@@ -823,7 +827,6 @@
 dependencies = [
  "bitflags 2.6.0",
  "libc",
- "redox_syscall 0.5.3",
 ]
 
 [[package]]
@@ -1220,11 +1223,11 @@
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.3"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
 dependencies = [
- "bitflags 2.6.0",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -1312,6 +1315,17 @@
 ]
 
 [[package]]
+name = "schnellru"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9a8ef13a93c54d20580de1e5c413e624e53121d42fc7e2c11d10ef7f8b02367"
+dependencies = [
+ "ahash",
+ "cfg-if",
+ "hashbrown 0.13.2",
+]
+
+[[package]]
 name = "scopeguard"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/Cargo.toml	Wed Nov 20 15:53:19 2024 +0100
@@ -43,6 +43,7 @@
 once_cell = "1.16.0"
 bitvec = "1.0.1"
 chrono = "0.4.34"
+schnellru = "0.2.1"
 dyn-clone = "1.0.16"
 filetime = "0.2.23"
 uuid = { version = "1.10", features = ["v4"] }
--- a/rust/hg-core/examples/nodemap/index.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/examples/nodemap/index.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -4,6 +4,7 @@
 // GNU General Public License version 2 or any later version.
 
 //! Minimal `RevlogIndex`, readable from standard Mercurial file format
+use hg::revlog::RevlogIndex;
 use hg::*;
 use memmap2::*;
 use std::fs::File;
--- a/rust/hg-core/src/errors.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/errors.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -68,6 +68,10 @@
         from: std::path::PathBuf,
         to: std::path::PathBuf,
     },
+    CopyingFile {
+        from: std::path::PathBuf,
+        to: std::path::PathBuf,
+    },
     /// `std::fs::canonicalize`
     CanonicalizingPath(std::path::PathBuf),
     /// `std::env::current_dir`
@@ -150,6 +154,12 @@
                 from.display(),
                 to.display()
             ),
+            IoErrorContext::CopyingFile { from, to } => write!(
+                f,
+                "when copying {} to {}",
+                from.display(),
+                to.display()
+            ),
             IoErrorContext::CanonicalizingPath(path) => {
                 write!(f, "when canonicalizing {}", path.display())
             }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/fncache.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,26 @@
+use std::path::Path;
+
+use dyn_clone::DynClone;
+
+/// The FnCache stores the list of most files contained in the store and is
+/// used for stream/copy clones.
+///
+/// It keeps track of the name of "all" indexes and data files for all revlogs.
+/// The names are relative to the store roots and are stored before any
+/// encoding or path compression.
+///
+/// Despite its name, the FnCache is *NOT* a cache, it keep tracks of
+/// information that is not easily available elsewhere. It has no mechanism
+/// for detecting isn't up to date, and de-synchronization with the actual
+/// contents of the repository will lead to a corrupted clone and possibly
+/// other corruption during maintenance operations.
+/// Strictly speaking, it could be recomputed by looking at the contents of all
+/// manifests AND actual store files on disk, however that is a
+/// prohibitively expensive operation.
+pub trait FnCache: Sync + Send + DynClone {
+    /// Whether the fncache was loaded from disk
+    fn is_loaded(&self) -> bool;
+    /// Add a path to be tracked in the fncache
+    fn add(&self, path: &Path);
+    // TODO add more methods once we start doing more with the FnCache
+}
--- a/rust/hg-core/src/lib.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/lib.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -14,6 +14,7 @@
 pub mod dirstate_tree;
 pub mod discovery;
 pub mod exit_codes;
+pub mod fncache;
 pub mod requirements;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
@@ -29,7 +30,12 @@
 pub mod matchers;
 pub mod repo;
 pub mod revlog;
-pub use revlog::*;
+// Export very common types to make discovery easier
+pub use revlog::{
+    BaseRevision, Graph, GraphError, Node, NodePrefix, Revision,
+    UncheckedRevision, NULL_NODE, NULL_NODE_ID, NULL_REVISION,
+    WORKING_DIRECTORY_HEX, WORKING_DIRECTORY_REVISION,
+};
 pub mod checkexec;
 pub mod config;
 pub mod lock;
@@ -37,6 +43,7 @@
 pub mod operations;
 pub mod progress;
 pub mod revset;
+pub mod transaction;
 pub mod update;
 pub mod utils;
 pub mod vfs;
--- a/rust/hg-core/src/lock.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/lock.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -2,9 +2,11 @@
 
 use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::vfs::Vfs;
 use crate::vfs::VfsImpl;
 use std::io;
 use std::io::ErrorKind;
+use std::path::Path;
 
 #[derive(derive_more::From)]
 pub enum LockError {
@@ -65,7 +67,7 @@
         if !lock_should_be_broken(&lock_data) {
             return Err(LockError::AlreadyHeld);
         }
-        Ok(hg_vfs.remove_file(lock_filename)?)
+        Ok(hg_vfs.unlink(Path::new(lock_filename))?)
     })?
 }
 
@@ -99,7 +101,7 @@
 }
 
 fn unlock(hg_vfs: &VfsImpl, lock_filename: &str) -> Result<(), HgError> {
-    hg_vfs.remove_file(lock_filename)
+    hg_vfs.unlink(Path::new(lock_filename))
 }
 
 /// Return whether the process that is/was holding the lock is known not to be
--- a/rust/hg-core/src/logging.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/logging.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -1,6 +1,7 @@
 use crate::errors::{HgError, HgResultExt, IoErrorContext, IoResultExt};
-use crate::vfs::VfsImpl;
+use crate::vfs::{Vfs, VfsImpl};
 use std::io::Write;
+use std::path::Path;
 
 /// An utility to append to a log file with the given name, and optionally
 /// rotate it after it reaches a certain maximum size.
@@ -64,15 +65,20 @@
                 for i in (1..self.max_files).rev() {
                     self.vfs
                         .rename(
-                            format!("{}.{}", self.name, i),
-                            format!("{}.{}", self.name, i + 1),
+                            Path::new(&format!("{}.{}", self.name, i)),
+                            Path::new(&format!("{}.{}", self.name, i + 1)),
+                            false,
                         )
                         .io_not_found_as_none()?;
                 }
                 // Then rename `{name}` to `{name}.1`. This is the
                 // previously-opened `file`.
                 self.vfs
-                    .rename(self.name, format!("{}.1", self.name))
+                    .rename(
+                        Path::new(&self.name),
+                        Path::new(&format!("{}.1", self.name)),
+                        false,
+                    )
                     .io_not_found_as_none()?;
                 // Finally, create a new `{name}` file and replace our `file`
                 // handle.
@@ -87,9 +93,7 @@
 #[test]
 fn test_rotation() {
     let temp = tempfile::tempdir().unwrap();
-    let vfs = VfsImpl {
-        base: temp.path().to_owned(),
-    };
+    let vfs = VfsImpl::new(temp.path().to_owned(), false);
     let logger = LogFile::new(vfs.clone(), "log")
         .max_size(Some(3))
         .max_files(2);
--- a/rust/hg-core/src/operations/cat.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/operations/cat.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -12,8 +12,8 @@
 use crate::utils::hg_path::HgPath;
 
 use crate::errors::HgError;
-use crate::manifest::Manifest;
-use crate::manifest::ManifestEntry;
+use crate::revlog::manifest::Manifest;
+use crate::revlog::manifest::ManifestEntry;
 use itertools::put_back;
 use itertools::PutBack;
 use std::cmp::Ordering;
--- a/rust/hg-core/src/operations/debugdata.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/operations/debugdata.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -6,9 +6,10 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::errors::HgError;
+use crate::exit_codes;
 use crate::repo::Repo;
-use crate::revlog::Revlog;
-use crate::{exit_codes, RevlogError, RevlogType};
+use crate::revlog::options::default_revlog_options;
+use crate::revlog::{Revlog, RevlogError, RevlogType};
 
 /// Dump the contents data of a revision.
 pub fn debug_data(
@@ -31,7 +32,11 @@
         &repo.store_vfs(),
         index_file,
         None,
-        repo.default_revlog_options(RevlogType::Changelog)?,
+        default_revlog_options(
+            repo.config(),
+            repo.requirements(),
+            RevlogType::Changelog,
+        )?,
     )?;
     let rev =
         crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
--- a/rust/hg-core/src/repo.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/repo.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -1,4 +1,3 @@
-use crate::changelog::Changelog;
 use crate::config::{Config, ConfigError, ConfigParseError};
 use crate::dirstate::DirstateParents;
 use crate::dirstate_tree::dirstate_map::{
@@ -9,24 +8,19 @@
 use crate::errors::HgResultExt;
 use crate::errors::{HgError, IoResultExt};
 use crate::lock::{try_with_lock_no_wait, LockError};
-use crate::manifest::{Manifest, Manifestlog};
-use crate::requirements::{
-    CHANGELOGV2_REQUIREMENT, DIRSTATE_TRACKED_HINT_V1,
-    GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT,
-    REVLOGV2_REQUIREMENT,
-};
+use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
+use crate::revlog::changelog::Changelog;
 use crate::revlog::filelog::Filelog;
-use crate::revlog::RevlogError;
+use crate::revlog::manifest::{Manifest, Manifestlog};
+use crate::revlog::options::default_revlog_options;
+use crate::revlog::{RevlogError, RevlogType};
 use crate::utils::debug::debug_wait_for_file_or_print;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
-use crate::vfs::{is_dir, is_file, VfsImpl};
-use crate::{
-    exit_codes, requirements, NodePrefix, RevlogDataConfig, RevlogDeltaConfig,
-    RevlogFeatureConfig, RevlogType, RevlogVersionOptions, UncheckedRevision,
-};
-use crate::{DirstateError, RevlogOpenOptions};
+use crate::vfs::{is_dir, is_file, Vfs, VfsImpl};
+use crate::DirstateError;
+use crate::{exit_codes, requirements, NodePrefix, UncheckedRevision};
 use std::cell::{Ref, RefCell, RefMut};
 use std::collections::HashSet;
 use std::io::Seek;
@@ -151,9 +145,7 @@
         let mut repo_config_files =
             vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
 
-        let hg_vfs = VfsImpl {
-            base: dot_hg.to_owned(),
-        };
+        let hg_vfs = VfsImpl::new(dot_hg.to_owned(), false);
         let mut reqs = requirements::load_if_exists(&hg_vfs)?;
         let relative =
             reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
@@ -195,9 +187,10 @@
 
             store_path = shared_path.join("store");
 
-            let source_is_share_safe = requirements::load(VfsImpl {
-                base: shared_path.to_owned(),
-            })?
+            let source_is_share_safe = requirements::load(VfsImpl::new(
+                shared_path.to_owned(),
+                true,
+            ))?
             .contains(requirements::SHARESAFE_REQUIREMENT);
 
             if share_safe != source_is_share_safe {
@@ -209,9 +202,10 @@
             }
         }
         if share_safe {
-            reqs.extend(requirements::load(VfsImpl {
-                base: store_path.to_owned(),
-            })?);
+            reqs.extend(requirements::load(VfsImpl::new(
+                store_path.to_owned(),
+                true,
+            ))?);
         }
 
         let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
@@ -252,23 +246,17 @@
     /// For accessing repository files (in `.hg`), except for the store
     /// (`.hg/store`).
     pub fn hg_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.dot_hg.to_owned(),
-        }
+        VfsImpl::new(self.dot_hg.to_owned(), false)
     }
 
     /// For accessing repository store files (in `.hg/store`)
     pub fn store_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.store.to_owned(),
-        }
+        VfsImpl::new(self.store.to_owned(), false)
     }
 
     /// For accessing the working copy
     pub fn working_directory_vfs(&self) -> VfsImpl {
-        VfsImpl {
-            base: self.working_directory.to_owned(),
-        }
+        VfsImpl::new(self.working_directory.to_owned(), false)
     }
 
     pub fn try_with_wlock_no_wait<R>(
@@ -577,7 +565,11 @@
     fn new_changelog(&self) -> Result<Changelog, HgError> {
         Changelog::open(
             &self.store_vfs(),
-            self.default_revlog_options(RevlogType::Changelog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Changelog,
+            )?,
         )
     }
 
@@ -592,7 +584,11 @@
     fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
         Manifestlog::open(
             &self.store_vfs(),
-            self.default_revlog_options(RevlogType::Manifestlog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Manifestlog,
+            )?,
         )
     }
 
@@ -642,7 +638,11 @@
         Filelog::open(
             self,
             path,
-            self.default_revlog_options(RevlogType::Filelog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Filelog,
+            )?,
         )
     }
     /// Write to disk any updates that were made through `dirstate_map_mut`.
@@ -787,55 +787,11 @@
         if let Some(uuid) = old_uuid_to_remove {
             // Remove the old data file after the new docket pointing to the
             // new data file was written.
-            vfs.remove_file(format!("dirstate.{}", uuid))?;
+            vfs.unlink(Path::new(&format!("dirstate.{}", uuid)))?;
         }
         Ok(())
     }
 
-    pub fn default_revlog_options(
-        &self,
-        revlog_type: RevlogType,
-    ) -> Result<RevlogOpenOptions, HgError> {
-        let requirements = self.requirements();
-        let is_changelog = revlog_type == RevlogType::Changelog;
-        let version = if is_changelog
-            && requirements.contains(CHANGELOGV2_REQUIREMENT)
-        {
-            let compute_rank = self
-                .config()
-                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
-            RevlogVersionOptions::ChangelogV2 { compute_rank }
-        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
-            RevlogVersionOptions::V2
-        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
-            RevlogVersionOptions::V1 {
-                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
-                inline: !is_changelog,
-            }
-        } else {
-            RevlogVersionOptions::V0
-        };
-        Ok(RevlogOpenOptions {
-            version,
-            // We don't need to dance around the slow path like in the Python
-            // implementation since we know we have access to the fast code.
-            use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
-            delta_config: RevlogDeltaConfig::new(
-                self.config(),
-                self.requirements(),
-                revlog_type,
-            )?,
-            data_config: RevlogDataConfig::new(
-                self.config(),
-                self.requirements(),
-            )?,
-            feature_config: RevlogFeatureConfig::new(
-                self.config(),
-                requirements,
-            )?,
-        })
-    }
-
     pub fn node(&self, rev: UncheckedRevision) -> Option<crate::Node> {
         self.changelog()
             .ok()
--- a/rust/hg-core/src/revlog/changelog.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/changelog.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -14,7 +14,9 @@
 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
 use crate::utils::hg_path::HgPath;
 use crate::vfs::VfsImpl;
-use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
+use crate::{Graph, GraphError, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
 
 /// A specialized `Revlog` to work with changelog data format.
 pub struct Changelog {
@@ -84,7 +86,7 @@
     }
 
     pub fn get_index(&self) -> &Index {
-        &self.revlog.index
+        self.revlog.index()
     }
 }
 
@@ -364,10 +366,7 @@
     let timezone = FixedOffset::west_opt(timezone_secs)
         .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
 
-    Ok(DateTime::from_naive_utc_and_offset(
-        timestamp_utc.naive_utc(),
-        timezone,
-    ))
+    Ok(timestamp_utc.with_timezone(&timezone))
 }
 
 /// Attempt to parse the given string as floating-point timestamp, and
@@ -504,10 +503,7 @@
 mod tests {
     use super::*;
     use crate::vfs::VfsImpl;
-    use crate::{
-        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
-        NULL_REVISION,
-    };
+    use crate::NULL_REVISION;
     use pretty_assertions::assert_eq;
 
     #[test]
@@ -566,23 +562,11 @@
     fn test_data_from_rev_null() -> Result<(), RevlogError> {
         // an empty revlog will be enough for this case
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
-        std::fs::write(temp.path().join("foo.d"), b"").unwrap();
-        let revlog = Revlog::open(
-            &vfs,
-            "foo.i",
-            None,
-            RevlogOpenOptions::new(
-                false,
-                RevlogDataConfig::default(),
-                RevlogDeltaConfig::default(),
-                RevlogFeatureConfig::default(),
-            ),
-        )
-        .unwrap();
+        let revlog =
+            Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
+                .unwrap();
 
         let changelog = Changelog { revlog };
         assert_eq!(
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/compression.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,383 @@
+//! Helpers around revlog compression
+
+use std::cell::RefCell;
+use std::collections::HashSet;
+use std::io::Read;
+
+use flate2::bufread::ZlibEncoder;
+use flate2::read::ZlibDecoder;
+
+use crate::config::Config;
+use crate::errors::HgError;
+use crate::exit_codes;
+
+use super::corrupted;
+use super::RevlogError;
+
+/// Header byte used to identify ZSTD-compressed data
+pub const ZSTD_BYTE: u8 = b'\x28';
+/// Header byte used to identify Zlib-compressed data
+pub const ZLIB_BYTE: u8 = b'x';
+
+const ZSTD_DEFAULT_LEVEL: u8 = 3;
+const ZLIB_DEFAULT_LEVEL: u8 = 6;
+/// The length of data below which we don't even try to compress it when using
+/// Zstandard.
+const MINIMUM_LENGTH_ZSTD: usize = 50;
+/// The length of data below which we don't even try to compress it when using
+/// Zlib.
+const MINIMUM_LENGTH_ZLIB: usize = 44;
+
+/// Defines the available compression engines and their options.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum CompressionConfig {
+    Zlib {
+        /// Between 0 and 9 included
+        level: u8,
+    },
+    Zstd {
+        /// Between 0 and 22 included
+        level: u8,
+        /// Never used in practice for now
+        threads: u8,
+    },
+    /// No compression is performed
+    None,
+}
+
+impl CompressionConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        let mut new = Self::default();
+
+        let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
+        let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
+
+        for requirement in requirements {
+            if requirement.starts_with("revlog-compression-")
+                || requirement.starts_with("exp-compression-")
+            {
+                let split = &mut requirement.splitn(3, '-');
+                split.next();
+                split.next();
+                new = match split.next().unwrap() {
+                    "zstd" => CompressionConfig::zstd(zstd_level)?,
+                    e => {
+                        return Err(HgError::UnsupportedFeature(format!(
+                            "Unsupported compression engine '{e}'"
+                        )))
+                    }
+                };
+            }
+        }
+        if let Some(level) = zlib_level {
+            if matches!(new, CompressionConfig::Zlib { .. }) {
+                new.set_level(level as usize)?;
+            }
+        }
+        Ok(new)
+    }
+
+    /// Sets the level of the current compression engine
+    pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
+        match self {
+            CompressionConfig::Zlib { level } => {
+                if new_level > 9 {
+                    return Err(HgError::abort(
+                        format!(
+                            "invalid compression zlib compression level {}, \
+                            expected between 0 and 9 included",
+                            new_level
+                        ),
+                        exit_codes::ABORT,
+                        None,
+                    ));
+                }
+                *level = new_level as u8;
+            }
+            CompressionConfig::Zstd { level, .. } => {
+                if new_level > 22 {
+                    return Err(HgError::abort(
+                        format!(
+                            "invalid compression zstd compression level {}, \
+                            expected between 0 and 22 included",
+                            new_level
+                        ),
+                        exit_codes::ABORT,
+                        None,
+                    ));
+                }
+                *level = new_level as u8;
+            }
+            CompressionConfig::None => {}
+        }
+        Ok(())
+    }
+
+    /// Return a ZSTD compression config
+    pub fn zstd(
+        zstd_level: Option<u32>,
+    ) -> Result<CompressionConfig, HgError> {
+        let mut engine = CompressionConfig::Zstd {
+            level: ZSTD_DEFAULT_LEVEL,
+            threads: 0,
+        };
+        if let Some(level) = zstd_level {
+            engine.set_level(level as usize)?;
+        }
+        Ok(engine)
+    }
+}
+
+impl Default for CompressionConfig {
+    fn default() -> Self {
+        Self::Zlib {
+            level: ZLIB_DEFAULT_LEVEL,
+        }
+    }
+}
+
+/// A high-level trait to define compressors that should be able to compress
+/// and decompress arbitrary bytes.
+pub trait Compressor {
+    /// Returns a new [`Vec`] with the compressed data.
+    /// Should return `Ok(None)` if compression does not apply (e.g. too small)
+    fn compress(
+        &mut self,
+        data: &[u8],
+    ) -> Result<Option<Vec<u8>>, RevlogError>;
+    /// Returns a new [`Vec`] with the decompressed data.
+    fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, RevlogError>;
+}
+
+/// A compressor that does nothing (useful in tests)
+pub struct NoneCompressor;
+
+impl Compressor for NoneCompressor {
+    fn compress(
+        &mut self,
+        _data: &[u8],
+    ) -> Result<Option<Vec<u8>>, RevlogError> {
+        Ok(None)
+    }
+
+    fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, RevlogError> {
+        Ok(data.to_owned())
+    }
+}
+
+/// A compressor for Zstandard
+pub struct ZstdCompressor {
+    /// Level of compression to use
+    level: u8,
+    /// How many threads are used (not implemented yet)
+    threads: u8,
+    /// The underlying zstd compressor
+    compressor: zstd::bulk::Compressor<'static>,
+}
+
+impl ZstdCompressor {
+    pub fn new(level: u8, threads: u8) -> Self {
+        Self {
+            level,
+            threads,
+            compressor: zstd::bulk::Compressor::new(level.into())
+                .expect("invalid zstd arguments"),
+        }
+    }
+}
+
+impl Compressor for ZstdCompressor {
+    fn compress(
+        &mut self,
+        data: &[u8],
+    ) -> Result<Option<Vec<u8>>, RevlogError> {
+        if self.threads != 0 {
+            // TODO use a zstd builder + zstd cargo feature to support this
+            unimplemented!("zstd parallel compression is not implemented");
+        }
+        if data.len() < MINIMUM_LENGTH_ZSTD {
+            return Ok(None);
+        }
+        let level = self.level as i32;
+        if data.len() <= 1000000 {
+            let compressed = self.compressor.compress(data).map_err(|e| {
+                corrupted(format!("revlog compress error: {}", e))
+            })?;
+            Ok(if compressed.len() < data.len() {
+                Some(compressed)
+            } else {
+                None
+            })
+        } else {
+            Ok(Some(zstd::stream::encode_all(data, level).map_err(
+                |e| corrupted(format!("revlog compress error: {}", e)),
+            )?))
+        }
+    }
+
+    fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, RevlogError> {
+        zstd::stream::decode_all(data).map_err(|e| {
+            corrupted(format!("revlog decompress error: {}", e)).into()
+        })
+    }
+}
+
+/// A compressor for Zlib
+pub struct ZlibCompressor {
+    /// Level of compression to use
+    level: flate2::Compression,
+}
+
+impl ZlibCompressor {
+    pub fn new(level: u8) -> Self {
+        Self {
+            level: flate2::Compression::new(level.into()),
+        }
+    }
+}
+
+impl Compressor for ZlibCompressor {
+    fn compress(
+        &mut self,
+        data: &[u8],
+    ) -> Result<Option<Vec<u8>>, RevlogError> {
+        assert!(!data.is_empty());
+        if data.len() < MINIMUM_LENGTH_ZLIB {
+            return Ok(None);
+        }
+        let mut buf = Vec::with_capacity(data.len());
+        ZlibEncoder::new(data, self.level)
+            .read_to_end(&mut buf)
+            .map_err(|e| corrupted(format!("revlog compress error: {}", e)))?;
+
+        Ok(if buf.len() < data.len() {
+            buf.shrink_to_fit();
+            Some(buf)
+        } else {
+            None
+        })
+    }
+
+    fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, RevlogError> {
+        let mut decoder = ZlibDecoder::new(data);
+        // TODO reuse the allocation somehow?
+        let mut buf = vec![];
+        decoder.read_to_end(&mut buf).map_err(|e| {
+            corrupted(format!("revlog decompress error: {}", e))
+        })?;
+        Ok(buf)
+    }
+}
+
+thread_local! {
+  // seems fine to [unwrap] here: this can only fail due to memory allocation
+  // failing, and it's normal for that to cause panic.
+  static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
+      RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
+}
+
+/// Util to wrap the reuse of a zstd decoder while controlling its buffer size.
+fn zstd_decompress_to_buffer(
+    bytes: &[u8],
+    buf: &mut Vec<u8>,
+) -> Result<usize, std::io::Error> {
+    ZSTD_DECODER
+        .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
+}
+
+/// Specialized revlog decompression to use less memory for deltas while
+/// keeping performance acceptable.
+pub(super) fn uncompressed_zstd_data(
+    bytes: &[u8],
+    is_delta: bool,
+    uncompressed_len: i32,
+) -> Result<Vec<u8>, HgError> {
+    let cap = uncompressed_len.max(0) as usize;
+    if is_delta {
+        // [cap] is usually an over-estimate of the space needed because
+        // it's the length of delta-decoded data, but we're interested
+        // in the size of the delta.
+        // This means we have to [shrink_to_fit] to avoid holding on
+        // to a large chunk of memory, but it also means we must have a
+        // fallback branch, for the case when the delta is longer than
+        // the original data (surprisingly, this does happen in practice)
+        let mut buf = Vec::with_capacity(cap);
+        match zstd_decompress_to_buffer(bytes, &mut buf) {
+            Ok(_) => buf.shrink_to_fit(),
+            Err(_) => {
+                buf.clear();
+                zstd::stream::copy_decode(bytes, &mut buf)
+                    .map_err(|e| corrupted(e.to_string()))?;
+            }
+        };
+        Ok(buf)
+    } else {
+        let mut buf = Vec::with_capacity(cap);
+        let len = zstd_decompress_to_buffer(bytes, &mut buf)
+            .map_err(|e| corrupted(e.to_string()))?;
+        if len != uncompressed_len as usize {
+            Err(corrupted("uncompressed length does not match"))
+        } else {
+            Ok(buf)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const LARGE_TEXT: &[u8] = b"
+    Patali Dirapata, Cromda Cromda Ripalo, Pata Pata, Ko Ko Ko
+    Bokoro Dipoulito, Rondi Rondi Pepino, Pata Pata, Ko Ko Ko
+    Emana Karassoli, Loucra Loucra Nonponto, Pata Pata, Ko Ko Ko.";
+
+    #[test]
+    fn test_zlib_compressor() {
+        // Can return `Ok(None)`
+        let mut compressor = ZlibCompressor::new(1);
+        assert_eq!(compressor.compress(b"too small").unwrap(), None);
+
+        // Compression returns bytes
+        let compressed_with_1 =
+            compressor.compress(LARGE_TEXT).unwrap().unwrap();
+        assert!(compressed_with_1.len() < LARGE_TEXT.len());
+        // Round trip works
+        assert_eq!(
+            compressor.decompress(&compressed_with_1).unwrap(),
+            LARGE_TEXT
+        );
+
+        // Compression levels mean something
+        let mut compressor = ZlibCompressor::new(9);
+        // Compression returns bytes
+        let compressed = compressor.compress(LARGE_TEXT).unwrap().unwrap();
+        assert!(compressed.len() < compressed_with_1.len());
+    }
+
+    #[test]
+    fn test_zstd_compressor() {
+        // Can return `Ok(None)`
+        let mut compressor = ZstdCompressor::new(1, 0);
+        assert_eq!(compressor.compress(b"too small").unwrap(), None);
+
+        // Compression returns bytes
+        let compressed_with_1 =
+            compressor.compress(LARGE_TEXT).unwrap().unwrap();
+        assert!(compressed_with_1.len() < LARGE_TEXT.len());
+        // Round trip works
+        assert_eq!(
+            compressor.decompress(&compressed_with_1).unwrap(),
+            LARGE_TEXT
+        );
+
+        // Compression levels mean something
+        let mut compressor = ZstdCompressor::new(22, 0);
+        // Compression returns bytes
+        let compressed = compressor.compress(LARGE_TEXT).unwrap().unwrap();
+        assert!(compressed.len() < compressed_with_1.len());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/file_io.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,544 @@
+//! Helpers for revlog file reading and writing.
+
+use std::{
+    cell::RefCell,
+    io::{Read, Seek, SeekFrom, Write},
+    path::{Path, PathBuf},
+    sync::{Arc, Mutex},
+};
+
+use crate::{
+    errors::{HgError, IoResultExt},
+    vfs::{Vfs, VfsFile},
+};
+
+/// Wraps accessing arbitrary chunks of data within a file and reusing handles.
+/// This is currently useful for accessing a revlog's data file, only reading
+/// the ranges that are currently relevant, like a sort of basic and manual
+/// file-based mmap.
+///
+/// XXX should this just be replaced with `mmap` + `madvise` ranges?
+/// The upcoming `UncompressedChunkCache` will make up for most of the slowness
+/// of re-reading the same chunks, so this might not be as useful. Aside from
+/// the major benefit of having less code to take care of, using `mmap` will
+/// allow multiple processes to share the same pages, especially for the
+/// changelog and manifest, which would make a difference in server contexts.
+pub struct RandomAccessFile {
+    /// The current store VFS to pass it to [`FileHandle`]
+    vfs: Box<dyn Vfs>,
+    /// Filename of the open file, relative to the vfs root
+    pub filename: PathBuf,
+    /// The current read-only handle on the file, if any
+    pub reading_handle: RefCell<Option<FileHandle>>,
+    /// The current read-write handle on the file, if any
+    pub writing_handle: RefCell<Option<FileHandle>>,
+}
+
+impl RandomAccessFile {
+    /// Wrap a file for random access
+    pub fn new(vfs: Box<dyn Vfs>, filename: PathBuf) -> Self {
+        assert!(filename.is_relative());
+        Self {
+            vfs,
+            filename,
+            reading_handle: RefCell::new(None),
+            writing_handle: RefCell::new(None),
+        }
+    }
+
+    /// Read a chunk of bytes from the file.
+    pub fn read_chunk(
+        &self,
+        offset: usize,
+        length: usize,
+    ) -> Result<Vec<u8>, HgError> {
+        let mut handle = self.get_read_handle()?;
+        handle
+            .seek(SeekFrom::Start(offset as u64))
+            .when_reading_file(&self.filename)?;
+        handle.read_exact(length).when_reading_file(&self.filename)
+    }
+
+    /// `pub` only for hg-cpython
+    #[doc(hidden)]
+    pub fn get_read_handle(&self) -> Result<FileHandle, HgError> {
+        if let Some(handle) = &*self.writing_handle.borrow() {
+            // Use a file handle being actively used for writes, if available.
+            // There is some danger to doing this because reads will seek the
+            // file.
+            // However, [`Revlog::write_entry`] performs a `SeekFrom::End(0)`
+            // before all writes, so we should be safe.
+            return Ok(handle.clone());
+        }
+        if let Some(handle) = &*self.reading_handle.borrow() {
+            return Ok(handle.clone());
+        }
+        // early returns done to work around borrowck being overzealous
+        // See https://github.com/rust-lang/rust/issues/103108
+        let new_handle = FileHandle::new(
+            dyn_clone::clone_box(&*self.vfs),
+            &self.filename,
+            false,
+            false,
+        )?;
+        *self.reading_handle.borrow_mut() = Some(new_handle.clone());
+        Ok(new_handle)
+    }
+
+    /// `pub` only for hg-cpython
+    #[doc(hidden)]
+    pub fn exit_reading_context(&self) {
+        self.reading_handle.take();
+    }
+
+    // Returns whether this file currently open
+    pub fn is_open(&self) -> bool {
+        self.reading_handle.borrow().is_some()
+            || self.writing_handle.borrow().is_some()
+    }
+}
+
+/// A buffer that holds new changelog index data that needs to be written
+/// after the manifest and filelogs so that the repo is updated atomically to
+/// external processes.
+#[derive(Clone, Debug, Default)]
+pub struct DelayedBuffer {
+    // The actual in-memory bytes storing the delayed writes
+    pub(super) buffer: Vec<u8>,
+    /// The current offset into the virtual file composed of file + buffer
+    offset: u64,
+    /// The size of the file at the time of opening
+    file_size: u64,
+}
+
+impl DelayedBuffer {
+    /// Returns the length of the full data (on-disk + buffer length).
+    pub fn len(&self) -> u64 {
+        self.buffer.len() as u64 + self.file_size
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+/// Holds an open [`VfsFile`] and the related data. This can be used for
+/// reading and writing. Writes can be delayed to a buffer before touching
+/// the disk, if relevant (in the changelog case), but reads are transparent.
+pub struct FileHandle {
+    /// The actual open file
+    pub file: VfsFile,
+    /// The VFS with which the file was opened
+    vfs: Box<dyn Vfs>,
+    /// Filename of the open file, relative to the repo root
+    filename: PathBuf,
+    /// Buffer of delayed entry writes to the changelog index. This points
+    /// back to the buffer inside the revlog this handle refers to.
+    delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
+}
+
+impl std::fmt::Debug for FileHandle {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("FileHandle")
+            .field("filename", &self.filename)
+            .field("delayed_buffer", &self.delayed_buffer)
+            .field("file", &self.file)
+            .finish()
+    }
+}
+
+impl Clone for FileHandle {
+    fn clone(&self) -> Self {
+        Self {
+            vfs: dyn_clone::clone_box(&*self.vfs),
+            filename: self.filename.clone(),
+            delayed_buffer: self.delayed_buffer.clone(),
+            // This can only fail if the OS doesn't have the file handle
+            // anymore, so we're not going to do anything useful anyway.
+            file: self.file.try_clone().expect("couldn't clone file handle"),
+        }
+    }
+}
+
+impl FileHandle {
+    /// Get a (read or write) file handle to `filename`. Only creates the file
+    /// if `create` is `true`.
+    pub fn new(
+        vfs: Box<dyn Vfs>,
+        filename: impl AsRef<Path>,
+        create: bool,
+        write: bool,
+    ) -> Result<Self, HgError> {
+        let file = if create {
+            vfs.create(filename.as_ref(), false)?
+        } else if write {
+            vfs.open(filename.as_ref())?
+        } else {
+            vfs.open_read(filename.as_ref())?
+        };
+        Ok(Self {
+            vfs,
+            filename: filename.as_ref().to_owned(),
+            delayed_buffer: None,
+            file,
+        })
+    }
+
+    /// Get a file handle to `filename`, but writes go to a [`DelayedBuffer`].
+    pub fn new_delayed(
+        vfs: Box<dyn Vfs>,
+        filename: impl AsRef<Path>,
+        create: bool,
+        delayed_buffer: Arc<Mutex<DelayedBuffer>>,
+    ) -> Result<Self, HgError> {
+        let mut file = if create {
+            vfs.create(filename.as_ref(), false)?
+        } else {
+            vfs.open(filename.as_ref())?
+        };
+        let size = vfs.file_size(&file)?;
+        let offset = file
+            .stream_position()
+            .when_reading_file(filename.as_ref())?;
+
+        {
+            let mut buf = delayed_buffer.lock().unwrap();
+            buf.file_size = size;
+            buf.offset = offset;
+        }
+
+        Ok(Self {
+            vfs,
+            filename: filename.as_ref().to_owned(),
+            delayed_buffer: Some(delayed_buffer),
+            file,
+        })
+    }
+
+    /// Wrap an existing [`VfsFile`]
+    pub fn from_file(
+        file: VfsFile,
+        vfs: Box<dyn Vfs>,
+        filename: impl AsRef<Path>,
+    ) -> Self {
+        Self {
+            vfs,
+            filename: filename.as_ref().to_owned(),
+            delayed_buffer: None,
+            file,
+        }
+    }
+
+    /// Wrap an existing [`VfsFile`], but writes go to a [`DelayedBuffer`].
+    pub fn from_file_delayed(
+        mut file: VfsFile,
+        vfs: Box<dyn Vfs>,
+        filename: impl AsRef<Path>,
+        delayed_buffer: Arc<Mutex<DelayedBuffer>>,
+    ) -> Result<Self, HgError> {
+        let size = vfs.file_size(&file)?;
+        let offset = file
+            .stream_position()
+            .when_reading_file(filename.as_ref())?;
+
+        {
+            let mut buf = delayed_buffer.lock().unwrap();
+            buf.file_size = size;
+            buf.offset = offset;
+        }
+
+        Ok(Self {
+            vfs,
+            filename: filename.as_ref().to_owned(),
+            delayed_buffer: Some(delayed_buffer),
+            file,
+        })
+    }
+
+    /// Move the position of the handle to `pos`,
+    /// spanning the [`DelayedBuffer`] if defined. Will return an error if
+    /// an invalid seek position is asked, or for any standard io error.
+    pub fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
+        if let Some(delay_buf) = &self.delayed_buffer {
+            let mut delay_buf = delay_buf.lock().unwrap();
+            // Virtual file offset spans real file and data
+            match pos {
+                SeekFrom::Start(offset) => delay_buf.offset = offset,
+                SeekFrom::End(offset) => {
+                    delay_buf.offset =
+                        delay_buf.len().saturating_add_signed(offset)
+                }
+                SeekFrom::Current(offset) => {
+                    delay_buf.offset =
+                        delay_buf.offset.saturating_add_signed(offset);
+                }
+            }
+            if delay_buf.offset < delay_buf.file_size {
+                self.file.seek(pos)
+            } else {
+                Ok(delay_buf.offset)
+            }
+        } else {
+            self.file.seek(pos)
+        }
+    }
+
+    /// Read exactly `length` bytes from the current position.
+    /// Errors are the same as [`std::io::Read::read_exact`].
+    pub fn read_exact(
+        &mut self,
+        length: usize,
+    ) -> Result<Vec<u8>, std::io::Error> {
+        if let Some(delay_buf) = self.delayed_buffer.as_mut() {
+            let mut delay_buf = delay_buf.lock().unwrap();
+            let mut buf = vec![0; length];
+            let offset: isize =
+                delay_buf.offset.try_into().expect("buffer too large");
+            let file_size: isize =
+                delay_buf.file_size.try_into().expect("file too large");
+            let span: isize = offset - file_size;
+            let length = length.try_into().expect("too large of a length");
+            let absolute_span: u64 =
+                span.unsigned_abs().try_into().expect("length too large");
+            if span < 0 {
+                if length <= absolute_span {
+                    // We're only in the file
+                    self.file.read_exact(&mut buf)?;
+                } else {
+                    // We're spanning file and buffer
+                    self.file
+                        .read_exact(&mut buf[..absolute_span as usize])?;
+                    delay_buf
+                        .buffer
+                        .take(length - absolute_span)
+                        .read_exact(&mut buf[absolute_span as usize..])?;
+                }
+            } else {
+                // We're only in the buffer
+                delay_buf.buffer[absolute_span as usize..]
+                    .take(length)
+                    .read_exact(&mut buf)?;
+            }
+            delay_buf.offset += length;
+            Ok(buf.to_owned())
+        } else {
+            let mut buf = vec![0; length];
+            self.file.read_exact(&mut buf)?;
+            Ok(buf)
+        }
+    }
+
+    /// Flush the in-memory changes to disk. This does *not* write the
+    /// delayed buffer, only the pending file changes.
+    pub fn flush(&mut self) -> Result<(), HgError> {
+        self.file.flush().when_writing_file(&self.filename)
+    }
+
+    /// Return the current position in the file
+    pub fn position(&mut self) -> Result<u64, HgError> {
+        self.file
+            .stream_position()
+            .when_reading_file(&self.filename)
+    }
+
+    /// Append `data` to the file, or to the [`DelayedBuffer`], if any.
+    pub fn write_all(&mut self, data: &[u8]) -> Result<(), HgError> {
+        if let Some(buf) = &mut self.delayed_buffer {
+            let mut delayed_buffer = buf.lock().expect("propagate the panic");
+            assert_eq!(delayed_buffer.offset, delayed_buffer.len());
+            delayed_buffer.buffer.extend_from_slice(data);
+            delayed_buffer.offset += data.len() as u64;
+            Ok(())
+        } else {
+            self.file
+                .write_all(data)
+                .when_writing_file(&self.filename)?;
+            Ok(())
+        }
+    }
+}
+
+/// Write handles to a given revlog (index + maybe data)
+#[derive(Debug)]
+pub struct WriteHandles {
+    /// Handle to the index file
+    pub index_handle: FileHandle,
+    /// Handle to the data file, if the revlog is non-inline
+    pub data_handle: Option<FileHandle>,
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::ErrorKind;
+
+    use crate::vfs::VfsImpl;
+
+    use super::*;
+
+    #[test]
+    fn test_random_access_file() {
+        let base = tempfile::tempdir().unwrap().into_path();
+        let filename = Path::new("a");
+        let file_path = base.join(filename);
+        let raf = RandomAccessFile::new(
+            Box::new(VfsImpl::new(base.clone(), true)),
+            filename.to_owned(),
+        );
+
+        assert!(!raf.is_open());
+        assert_eq!(&raf.filename, &filename);
+        // Should fail to read a non-existing file
+        match raf.get_read_handle().unwrap_err() {
+            HgError::IoError { error, .. } => match error.kind() {
+                std::io::ErrorKind::NotFound => {}
+                _ => panic!("should be not found"),
+            },
+            e => panic!("{}", e.to_string()),
+        }
+
+        std::fs::write(file_path, b"1234567890").unwrap();
+
+        // Should be able to open an existing file
+        let mut handle = raf.get_read_handle().unwrap();
+        assert!(raf.is_open());
+        assert_eq!(handle.read_exact(10).unwrap(), b"1234567890".to_vec());
+    }
+
+    #[test]
+    fn test_file_handle() {
+        let base = tempfile::tempdir().unwrap().into_path();
+        let filename = base.join("a");
+        // No `create` should fail
+        FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), false)),
+            &filename,
+            false,
+            false,
+        )
+        .unwrap_err();
+        std::fs::write(&filename, b"1234567890").unwrap();
+
+        let mut read_handle = FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), true)),
+            &filename,
+            false,
+            false,
+        )
+        .unwrap();
+        assert_eq!(&read_handle.filename, &filename);
+        assert_eq!(read_handle.position().unwrap(), 0);
+
+        // Writing to an explicit read handle should fail
+        read_handle.write_all(b"some data").unwrap_err();
+
+        // reading exactly n bytes should work
+        assert_eq!(read_handle.read_exact(3).unwrap(), b"123".to_vec());
+        // and the position should be remembered
+        assert_eq!(read_handle.read_exact(2).unwrap(), b"45".to_vec());
+
+        // Seeking should work
+        let position = read_handle.position().unwrap();
+        read_handle.seek(SeekFrom::Current(-2)).unwrap();
+        assert_eq!(position - 2, read_handle.position().unwrap());
+
+        // Seeking too much data should fail
+        read_handle.read_exact(1000).unwrap_err();
+
+        // Open a write handle
+        let mut handle = FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), false)),
+            &filename,
+            false,
+            true,
+        )
+        .unwrap();
+
+        // Now writing should succeed
+        handle.write_all(b"new data").unwrap();
+        // Opening or writing does not seek, so we should be at the start
+        assert_eq!(handle.position().unwrap(), 8);
+        // We can still read
+        assert_eq!(handle.read_exact(2).unwrap(), b"90".to_vec());
+
+        let mut read_handle = FileHandle::new(
+            Box::new(VfsImpl::new(base.clone(), true)),
+            &filename,
+            false,
+            false,
+        )
+        .unwrap();
+        read_handle.seek(SeekFrom::Start(0)).unwrap();
+        // On-disk file contents should be changed
+        assert_eq!(
+            &read_handle.read_exact(10).unwrap(),
+            &b"new data90".to_vec(),
+        );
+        // Flushing doesn't do anything unexpected
+        handle.flush().unwrap();
+
+        let delayed_buffer = Arc::new(Mutex::new(DelayedBuffer::default()));
+        let mut handle = FileHandle::new_delayed(
+            Box::new(VfsImpl::new(base.clone(), false)),
+            &filename,
+            false,
+            delayed_buffer,
+        )
+        .unwrap();
+
+        assert_eq!(
+            handle
+                .delayed_buffer
+                .as_ref()
+                .unwrap()
+                .lock()
+                .unwrap()
+                .file_size,
+            10
+        );
+        handle.seek(SeekFrom::End(0)).unwrap();
+        handle.write_all(b"should go to buffer").unwrap();
+        assert_eq!(
+            handle
+                .delayed_buffer
+                .as_ref()
+                .unwrap()
+                .lock()
+                .unwrap()
+                .len(),
+            29
+        );
+        read_handle.seek(SeekFrom::Start(0)).unwrap();
+        // On-disk file contents should be unchanged
+        assert_eq!(
+            read_handle.read_exact(10).unwrap(),
+            b"new data90".to_vec(),
+        );
+
+        assert_eq!(
+            read_handle.read_exact(1).unwrap_err().kind(),
+            ErrorKind::UnexpectedEof
+        );
+
+        handle.flush().unwrap();
+        // On-disk file contents should still be unchanged after a flush
+        assert_eq!(
+            read_handle.read_exact(1).unwrap_err().kind(),
+            ErrorKind::UnexpectedEof
+        );
+
+        // Read from the buffer only
+        handle.seek(SeekFrom::End(-1)).unwrap();
+        assert_eq!(handle.read_exact(1).unwrap(), b"r".to_vec());
+
+        // Read from an overlapping section of file and buffer
+        handle.seek(SeekFrom::Start(6)).unwrap();
+        assert_eq!(
+            handle.read_exact(20).unwrap(),
+            b"ta90should go to buf".to_vec()
+        );
+
+        // Read from file only
+        handle.seek(SeekFrom::Start(0)).unwrap();
+        assert_eq!(handle.read_exact(8).unwrap(), b"new data".to_vec());
+    }
+}
--- a/rust/hg-core/src/revlog/filelog.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/filelog.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -11,10 +11,11 @@
 use crate::utils::SliceExt;
 use crate::Graph;
 use crate::GraphError;
-use crate::RevlogOpenOptions;
 use crate::UncheckedRevision;
 use std::path::PathBuf;
 
+use super::options::RevlogOpenOptions;
+
 /// A specialized `Revlog` to work with file data logs.
 pub struct Filelog {
     /// The generic `revlog` format.
--- a/rust/hg-core/src/revlog/index.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/index.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -7,14 +7,14 @@
 use byteorder::{BigEndian, ByteOrder};
 use bytes_cast::{unaligned, BytesCast};
 
-use super::REVIDX_KNOWN_FLAGS;
+use super::{NodePrefix, RevlogError, RevlogIndex, REVIDX_KNOWN_FLAGS};
 use crate::errors::HgError;
-use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
-use crate::revlog::node::Node;
+use crate::revlog::node::{
+    Node, NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES,
+};
 use crate::revlog::{Revision, NULL_REVISION};
 use crate::{
-    dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
-    RevlogIndex, UncheckedRevision,
+    dagops, BaseRevision, FastHashMap, Graph, GraphError, UncheckedRevision,
 };
 
 pub const INDEX_ENTRY_SIZE: usize = 64;
@@ -62,22 +62,19 @@
         BigEndian::read_u16(&self.header_bytes[2..4])
     }
 
-    pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
-        if index_bytes.is_empty() {
-            return Ok(None);
-        }
+    pub fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
         if index_bytes.len() < 4 {
             return Err(HgError::corrupted(
                 "corrupted revlog: can't read the index format header",
             ));
         }
-        Ok(Some(IndexHeader {
+        Ok(IndexHeader {
             header_bytes: {
                 let bytes: [u8; 4] =
                     index_bytes[0..4].try_into().expect("impossible");
                 bytes
             },
-        }))
+        })
     }
 }
 
@@ -275,7 +272,7 @@
     /// be accessed via the [`Self::head_revs`] method.
     /// The last filtered revisions in this index, used to make sure
     /// we haven't changed filters when returning the cached `head_revs`.
-    head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
+    pub(super) head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
 }
 
 impl Debug for Index {
@@ -341,8 +338,11 @@
         bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
         default_header: IndexHeader,
     ) -> Result<Self, HgError> {
-        let header =
-            IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
+        let header = if bytes.len() < INDEX_ENTRY_SIZE {
+            default_header
+        } else {
+            IndexHeader::parse(bytes.as_ref())?
+        };
 
         if header.format_version() != IndexHeader::REVLOGV1 {
             // A proper new version should have had a repo/store
@@ -417,6 +417,45 @@
         }
     }
 
+    /// Same as `rev_from_node`, without using a persistent nodemap
+    ///
+    /// This is used as fallback when a persistent nodemap is not present.
+    /// This happens when the persistent-nodemap experimental feature is not
+    /// enabled, or for small revlogs.
+    pub fn rev_from_node_no_persistent_nodemap(
+        &self,
+        node: NodePrefix,
+    ) -> Result<Revision, RevlogError> {
+        // Linear scan of the revlog
+        // TODO: consider building a non-persistent nodemap in memory to
+        // optimize these cases.
+        let mut found_by_prefix = None;
+        for rev in (-1..self.len() as BaseRevision).rev() {
+            let rev = Revision(rev as BaseRevision);
+            let candidate_node = if rev == Revision(-1) {
+                NULL_NODE
+            } else {
+                let index_entry = self.get_entry(rev).ok_or_else(|| {
+                    HgError::corrupted(
+                        "revlog references a revision not in the index",
+                    )
+                })?;
+                *index_entry.hash()
+            };
+            if node == candidate_node {
+                return Ok(rev);
+            }
+            if node.is_prefix_of(&candidate_node) {
+                if found_by_prefix.is_some() {
+                    return Err(RevlogError::AmbiguousPrefix);
+                }
+                found_by_prefix = Some(rev)
+            }
+        }
+        found_by_prefix
+            .ok_or_else(|| RevlogError::InvalidRevision(format!("{:x}", node)))
+    }
+
     pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
         assert!(self.is_inline());
         {
@@ -925,8 +964,8 @@
         let mut gaps = Vec::new();
         let mut previous_end = None;
 
-        for (i, (_rev, entry)) in entries.iter().enumerate() {
-            let start = entry.c_start() as usize;
+        for (i, (rev, entry)) in entries.iter().enumerate() {
+            let start = self.start(*rev, entry);
             let length = entry.compressed_len();
 
             // Skip empty revisions to form larger holes
@@ -1004,15 +1043,13 @@
         if revs.is_empty() {
             return 0;
         }
-        let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
-        let end = last_entry.c_start() + last_entry.compressed_len() as u64;
+        let last_rev = revs[revs.len() - 1];
+        let last_entry = &self.get_entry(last_rev).unwrap();
+        let end = last_entry.offset() + last_entry.compressed_len() as usize;
         let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
-        let start = if first_rev.0 == 0 {
-            0
-        } else {
-            self.get_entry(*first_rev).unwrap().c_start()
-        };
-        (end - start) as usize
+        let first_entry = self.get_entry(*first_rev).unwrap();
+        let start = first_entry.offset();
+        end - start
     }
 
     /// Returns `&revs[startidx..endidx]` without empty trailing revs
@@ -1379,6 +1416,33 @@
             })
             .collect())
     }
+
+    /// Return the offset into the data corresponding to `rev` (in the index
+    /// file if inline, in the data file otherwise). `entry` must be the entry
+    /// for `rev`: the API is done this way to reduce the number of lookups
+    /// since we sometimes already have the entry, and because very few places
+    /// actually use this function.
+    #[inline(always)]
+    pub fn start(&self, rev: Revision, entry: &IndexEntry<'_>) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            assert_eq!(&self.get_entry(rev).unwrap(), entry);
+        }
+        let offset = entry.offset();
+        if self.is_inline() {
+            offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
+        } else {
+            offset
+        }
+    }
+
+    pub(crate) fn make_null_entry(&self) -> IndexEntry<'_> {
+        IndexEntry {
+            bytes: b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \
+            \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff \
+            \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
+        }
+    }
 }
 
 /// The kind of functionality needed by find_gca_candidates
@@ -1692,7 +1756,7 @@
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct IndexEntry<'a> {
     bytes: &'a [u8],
 }
@@ -1708,11 +1772,6 @@
         BigEndian::read_u64(&self.bytes[0..8])
     }
 
-    /// Same result (except potentially for rev 0) as C `index_get_start()`
-    fn c_start(&self) -> u64 {
-        self.raw_offset() >> 16
-    }
-
     pub fn flags(&self) -> u16 {
         BigEndian::read_u16(&self.bytes[6..=7])
     }
@@ -1766,7 +1825,7 @@
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::node::NULL_NODE;
+    use crate::NULL_NODE;
 
     #[cfg(test)]
     #[derive(Debug, Copy, Clone)]
@@ -1901,24 +1960,21 @@
 
     pub fn is_inline(index_bytes: &[u8]) -> bool {
         IndexHeader::parse(index_bytes)
-            .expect("too short")
-            .unwrap()
+            .expect("invalid header")
             .format_flags()
             .is_inline()
     }
 
     pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
         IndexHeader::parse(index_bytes)
-            .expect("too short")
-            .unwrap()
+            .expect("invalid header")
             .format_flags()
             .uses_generaldelta()
     }
 
     pub fn get_version(index_bytes: &[u8]) -> u16 {
         IndexHeader::parse(index_bytes)
-            .expect("too short")
-            .unwrap()
+            .expect("invalid header")
             .format_version()
     }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/inner_revlog.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,1354 @@
+//! A layer of lower-level revlog functionality to encapsulate most of the
+//! IO work and expensive operations.
+use std::{
+    borrow::Cow,
+    cell::RefCell,
+    io::{ErrorKind, Seek, SeekFrom, Write},
+    ops::Deref,
+    path::PathBuf,
+    sync::{Arc, Mutex},
+};
+
+use schnellru::{ByMemoryUsage, LruMap};
+use sha1::{Digest, Sha1};
+
+use crate::{
+    errors::{HgError, IoResultExt},
+    exit_codes,
+    transaction::Transaction,
+    vfs::Vfs,
+};
+
+use super::{
+    compression::{
+        uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
+        ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
+    },
+    file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
+    index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
+    node::{NODE_BYTES_LENGTH, NULL_NODE},
+    options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
+    BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
+    UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
+};
+
+/// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
+/// boundary to incrementally rewrite higher-level revlog functionality in
+/// Rust.
+pub struct InnerRevlog {
+    /// When index and data are not interleaved: bytes of the revlog index.
+    /// When index and data are interleaved (inline revlog): bytes of the
+    /// revlog index and data.
+    pub index: Index,
+    /// The store vfs that is used to interact with the filesystem
+    vfs: Box<dyn Vfs>,
+    /// The index file path, relative to the vfs root
+    pub index_file: PathBuf,
+    /// The data file path, relative to the vfs root (same as `index_file`
+    /// if inline)
+    data_file: PathBuf,
+    /// Data config that applies to this revlog
+    data_config: RevlogDataConfig,
+    /// Delta config that applies to this revlog
+    delta_config: RevlogDeltaConfig,
+    /// Feature config that applies to this revlog
+    feature_config: RevlogFeatureConfig,
+    /// A view into this revlog's data file
+    segment_file: RandomAccessFile,
+    /// A cache of uncompressed chunks that have previously been restored.
+    /// Its eviction policy is defined in [`Self::new`].
+    uncompressed_chunk_cache: Option<UncompressedChunkCache>,
+    /// Used to keep track of the actual target during diverted writes
+    /// for the changelog
+    original_index_file: Option<PathBuf>,
+    /// Write handles to the index and data files
+    /// XXX why duplicate from `index` and `segment_file`?
+    writing_handles: Option<WriteHandles>,
+    /// See [`DelayedBuffer`].
+    delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
+    /// Whether this revlog is inline. XXX why duplicate from `index`?
+    pub inline: bool,
+    /// A cache of the last revision, which is usually accessed multiple
+    /// times.
+    pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
+}
+
+impl InnerRevlog {
+    pub fn new(
+        vfs: Box<dyn Vfs>,
+        index: Index,
+        index_file: PathBuf,
+        data_file: PathBuf,
+        data_config: RevlogDataConfig,
+        delta_config: RevlogDeltaConfig,
+        feature_config: RevlogFeatureConfig,
+    ) -> Self {
+        assert!(index_file.is_relative());
+        assert!(data_file.is_relative());
+        let segment_file = RandomAccessFile::new(
+            dyn_clone::clone_box(&*vfs),
+            if index.is_inline() {
+                index_file.to_owned()
+            } else {
+                data_file.to_owned()
+            },
+        );
+
+        let uncompressed_chunk_cache =
+            data_config.uncompressed_cache_factor.map(
+                // Arbitrary initial value
+                // TODO check if using a hasher specific to integers is useful
+                |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
+            );
+
+        let inline = index.is_inline();
+        Self {
+            index,
+            vfs,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+            segment_file,
+            uncompressed_chunk_cache,
+            original_index_file: None,
+            writing_handles: None,
+            delayed_buffer: None,
+            inline,
+            last_revision_cache: Mutex::new(None),
+        }
+    }
+
+    /// Return number of entries of the revlog index
+    pub fn len(&self) -> usize {
+        self.index.len()
+    }
+
+    /// Return `true` if this revlog has no entries
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return whether this revlog is inline (mixed index and data)
+    pub fn is_inline(&self) -> bool {
+        self.inline
+    }
+
+    /// Clear all caches from this revlog
+    pub fn clear_cache(&mut self) {
+        assert!(!self.is_delaying());
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            // We don't clear the allocation here because it's probably faster.
+            // We could change our minds later if this ends up being a problem
+            // with regards to memory consumption.
+            cache.borrow_mut().clear();
+        }
+    }
+
+    /// Return an entry for the null revision
+    pub fn make_null_entry(&self) -> RevlogEntry {
+        RevlogEntry {
+            revlog: self,
+            rev: NULL_REVISION,
+            uncompressed_len: 0,
+            p1: NULL_REVISION,
+            p2: NULL_REVISION,
+            flags: NULL_REVLOG_ENTRY_FLAGS,
+            hash: NULL_NODE,
+        }
+    }
+
+    /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
+    pub fn get_entry_for_checked_rev(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        if rev == NULL_REVISION {
+            return Ok(self.make_null_entry());
+        }
+        let index_entry = self
+            .index
+            .get_entry(rev)
+            .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
+        let p1 =
+            self.index.check_revision(index_entry.p1()).ok_or_else(|| {
+                RevlogError::corrupted(format!(
+                    "p1 for rev {} is invalid",
+                    rev
+                ))
+            })?;
+        let p2 =
+            self.index.check_revision(index_entry.p2()).ok_or_else(|| {
+                RevlogError::corrupted(format!(
+                    "p2 for rev {} is invalid",
+                    rev
+                ))
+            })?;
+        let entry = RevlogEntry {
+            revlog: self,
+            rev,
+            uncompressed_len: index_entry.uncompressed_len(),
+            p1,
+            p2,
+            flags: index_entry.flags(),
+            hash: *index_entry.hash(),
+        };
+        Ok(entry)
+    }
+
+    /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
+    /// returns a [`RevlogError`].
+    /// TODO normalize naming across the index and all revlogs
+    /// (changelog, etc.) so that `get_entry` is always on an unchecked rev and
+    /// `get_entry_for_checked_rev` is for checked rev
+    pub fn get_entry(
+        &self,
+        rev: UncheckedRevision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        if rev == NULL_REVISION.into() {
+            return Ok(self.make_null_entry());
+        }
+        let rev = self.index.check_revision(rev).ok_or_else(|| {
+            RevlogError::corrupted(format!("rev {} is invalid", rev))
+        })?;
+        self.get_entry_for_checked_rev(rev)
+    }
+
+    /// Is the revlog currently delaying the visibility of written data?
+    ///
+    /// The delaying mechanism can be either in-memory or written on disk in a
+    /// side-file.
+    pub fn is_delaying(&self) -> bool {
+        self.delayed_buffer.is_some() || self.original_index_file.is_some()
+    }
+
+    /// The offset of the data chunk for this revision
+    #[inline(always)]
+    pub fn start(&self, rev: Revision) -> usize {
+        self.index.start(
+            rev,
+            &self
+                .index
+                .get_entry(rev)
+                .unwrap_or_else(|| self.index.make_null_entry()),
+        )
+    }
+
+    /// The length of the data chunk for this revision
+    /// TODO rename this method and others to more explicit names than the
+    /// existing ones that were copied over from Python
+    #[inline(always)]
+    pub fn length(&self, rev: Revision) -> usize {
+        self.index
+            .get_entry(rev)
+            .unwrap_or_else(|| self.index.make_null_entry())
+            .compressed_len() as usize
+    }
+
+    /// The end of the data chunk for this revision
+    #[inline(always)]
+    pub fn end(&self, rev: Revision) -> usize {
+        self.start(rev) + self.length(rev)
+    }
+
+    /// Return the delta parent of the given revision
+    pub fn delta_parent(&self, rev: Revision) -> Revision {
+        let base = self
+            .index
+            .get_entry(rev)
+            .unwrap()
+            .base_revision_or_base_of_delta_chain();
+        if base.0 == rev.0 {
+            NULL_REVISION
+        } else if self.delta_config.general_delta {
+            Revision(base.0)
+        } else {
+            Revision(rev.0 - 1)
+        }
+    }
+
+    /// Return whether `rev` points to a snapshot revision (i.e. does not have
+    /// a delta base).
+    pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
+        if !self.delta_config.sparse_revlog {
+            return Ok(self.delta_parent(rev) == NULL_REVISION);
+        }
+        self.index.is_snapshot_unchecked(rev)
+    }
+
+    /// Return the delta chain for `rev` according to this revlog's config.
+    /// See [`Index::delta_chain`] for more information.
+    pub fn delta_chain(
+        &self,
+        rev: Revision,
+        stop_rev: Option<Revision>,
+    ) -> Result<(Vec<Revision>, bool), HgError> {
+        self.index.delta_chain(
+            rev,
+            stop_rev,
+            self.delta_config.general_delta.into(),
+        )
+    }
+
+    fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
+        // TODO cache the compressor?
+        Ok(match self.feature_config.compression_engine {
+            CompressionConfig::Zlib { level } => {
+                Box::new(ZlibCompressor::new(level))
+            }
+            CompressionConfig::Zstd { level, threads } => {
+                Box::new(ZstdCompressor::new(level, threads))
+            }
+            CompressionConfig::None => Box::new(NoneCompressor),
+        })
+    }
+
+    /// Generate a possibly-compressed representation of data.
+    /// Returns `None` if the data was not compressed.
+    pub fn compress<'data>(
+        &self,
+        data: &'data [u8],
+    ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
+        if data.is_empty() {
+            return Ok(Some(data.into()));
+        }
+        let res = self.compressor()?.compress(data)?;
+        if let Some(compressed) = res {
+            // The revlog compressor added the header in the returned data.
+            return Ok(Some(compressed.into()));
+        }
+
+        if data[0] == b'\0' {
+            return Ok(Some(data.into()));
+        }
+        Ok(None)
+    }
+
+    /// Decompress a revlog chunk.
+    ///
+    /// The chunk is expected to begin with a header identifying the
+    /// format type so it can be routed to an appropriate decompressor.
+    pub fn decompress<'a>(
+        &'a self,
+        data: &'a [u8],
+    ) -> Result<Cow<[u8]>, RevlogError> {
+        if data.is_empty() {
+            return Ok(data.into());
+        }
+
+        // Revlogs are read much more frequently than they are written and many
+        // chunks only take microseconds to decompress, so performance is
+        // important here.
+
+        let header = data[0];
+        match header {
+            // Settings don't matter as they only affect compression
+            ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
+            // Settings don't matter as they only affect compression
+            ZSTD_BYTE => {
+                Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
+            }
+            b'\0' => Ok(data.into()),
+            b'u' => Ok((&data[1..]).into()),
+            other => Err(HgError::UnsupportedFeature(format!(
+                "unknown compression header '{}'",
+                other
+            ))
+            .into()),
+        }
+    }
+
+    /// Obtain a segment of raw data corresponding to a range of revisions.
+    ///
+    /// Requests for data may be satisfied by a cache.
+    ///
+    /// Returns a 2-tuple of (offset, data) for the requested range of
+    /// revisions. Offset is the integer offset from the beginning of the
+    /// revlog and data is a slice of the raw byte data.
+    ///
+    /// Callers will need to call `self.start(rev)` and `self.length(rev)`
+    /// to determine where each revision's data begins and ends.
+    pub fn get_segment_for_revs(
+        &self,
+        start_rev: Revision,
+        end_rev: Revision,
+    ) -> Result<(usize, Vec<u8>), HgError> {
+        let start = if start_rev == NULL_REVISION {
+            0
+        } else {
+            let start_entry = self
+                .index
+                .get_entry(start_rev)
+                .expect("null revision segment");
+            self.index.start(start_rev, &start_entry)
+        };
+        let end_entry = self
+            .index
+            .get_entry(end_rev)
+            .expect("null revision segment");
+        let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
+
+        let length = end - start;
+
+        // XXX should we use mmap instead of doing this for platforms that
+        // support madvise/populate?
+        Ok((start, self.segment_file.read_chunk(start, length)?))
+    }
+
+    /// Return the uncompressed raw data for `rev`
+    pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            if let Some(chunk) = cache.borrow_mut().get(&rev) {
+                return Ok(chunk.clone());
+            }
+        }
+        // TODO revlogv2 should check the compression mode
+        let data = self.get_segment_for_revs(rev, rev)?.1;
+        let uncompressed = self.decompress(&data).map_err(|e| {
+            HgError::abort(
+                format!("revlog decompression error: {}", e),
+                exit_codes::ABORT,
+                None,
+            )
+        })?;
+        let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            cache.borrow_mut().insert(rev, uncompressed.clone());
+        }
+        Ok(uncompressed)
+    }
+
+    /// Execute `func` within a read context for the data file, meaning that
+    /// the read handle will be taken and discarded after the operation.
+    pub fn with_read<R>(
+        &self,
+        func: impl FnOnce() -> Result<R, RevlogError>,
+    ) -> Result<R, RevlogError> {
+        self.enter_reading_context()?;
+        let res = func();
+        self.exit_reading_context();
+        res.map_err(Into::into)
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn enter_reading_context(&self) -> Result<(), HgError> {
+        if self.is_empty() {
+            // Nothing to be read
+            return Ok(());
+        }
+        if self.delayed_buffer.is_some() && self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        self.segment_file.get_read_handle()?;
+        Ok(())
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn exit_reading_context(&self) {
+        self.segment_file.exit_reading_context()
+    }
+
+    /// Fill the buffer returned by `get_buffer` with the possibly un-validated
+    /// raw text for a revision. It can be already validated if it comes
+    /// from the cache.
+    pub fn raw_text<G, T>(
+        &self,
+        rev: Revision,
+        get_buffer: G,
+    ) -> Result<(), RevlogError>
+    where
+        G: FnOnce(
+            usize,
+            &mut dyn FnMut(
+                &mut dyn RevisionBuffer<Target = T>,
+            ) -> Result<(), RevlogError>,
+        ) -> Result<(), RevlogError>,
+    {
+        let entry = &self.get_entry_for_checked_rev(rev)?;
+        let raw_size = entry.uncompressed_len();
+        let mut mutex_guard = self
+            .last_revision_cache
+            .lock()
+            .expect("lock should not be held");
+        let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
+            Some((*rev, data.deref().as_ref()))
+        } else {
+            None
+        };
+        if let Some(cache) = &self.uncompressed_chunk_cache {
+            let cache = &mut cache.borrow_mut();
+            if let Some(size) = raw_size {
+                // Dynamically update the uncompressed_chunk_cache size to the
+                // largest revision we've seen in this revlog.
+                // Do it *before* restoration in case the current revision
+                // is the largest.
+                let factor = self
+                    .data_config
+                    .uncompressed_cache_factor
+                    .expect("cache should not exist without factor");
+                let candidate_size = (size as f64 * factor) as usize;
+                let limiter_mut = cache.limiter_mut();
+                if candidate_size > limiter_mut.max_memory_usage() {
+                    std::mem::swap(
+                        limiter_mut,
+                        &mut ByMemoryUsage::new(candidate_size),
+                    );
+                }
+            }
+        }
+        entry.rawdata(cached_rev, get_buffer)?;
+        // drop cache to save memory, the caller is expected to update
+        // the revision cache after validating the text
+        mutex_guard.take();
+        Ok(())
+    }
+
+    /// Only `pub` for `hg-cpython`.
+    /// Obtain decompressed raw data for the specified revisions that are
+    /// assumed to be in ascending order.
+    ///
+    /// Returns a list with decompressed data for each requested revision.
+    #[doc(hidden)]
+    pub fn chunks(
+        &self,
+        revs: Vec<Revision>,
+        target_size: Option<u64>,
+    ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
+        if revs.is_empty() {
+            return Ok(vec![]);
+        }
+        let mut fetched_revs = vec![];
+        let mut chunks = Vec::with_capacity(revs.len());
+
+        match self.uncompressed_chunk_cache.as_ref() {
+            Some(cache) => {
+                let mut cache = cache.borrow_mut();
+                for rev in revs.iter() {
+                    match cache.get(rev) {
+                        Some(hit) => chunks.push((*rev, hit.to_owned())),
+                        None => fetched_revs.push(*rev),
+                    }
+                }
+            }
+            None => fetched_revs = revs,
+        }
+
+        let already_cached = chunks.len();
+
+        let sliced_chunks = if fetched_revs.is_empty() {
+            vec![]
+        } else if !self.data_config.with_sparse_read || self.is_inline() {
+            vec![fetched_revs]
+        } else {
+            self.slice_chunk(&fetched_revs, target_size)?
+        };
+
+        self.with_read(|| {
+            for revs_chunk in sliced_chunks {
+                let first_rev = revs_chunk[0];
+                // Skip trailing revisions with empty diff
+                let last_rev_idx = revs_chunk
+                    .iter()
+                    .rposition(|r| self.length(*r) != 0)
+                    .unwrap_or(revs_chunk.len() - 1);
+
+                let last_rev = revs_chunk[last_rev_idx];
+
+                let (offset, data) =
+                    self.get_segment_for_revs(first_rev, last_rev)?;
+
+                let revs_chunk = &revs_chunk[..=last_rev_idx];
+
+                for rev in revs_chunk {
+                    let chunk_start = self.start(*rev);
+                    let chunk_length = self.length(*rev);
+                    // TODO revlogv2 should check the compression mode
+                    let bytes = &data[chunk_start - offset..][..chunk_length];
+                    let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
+                        // If we're using `zstd`, we want to try a more
+                        // specialized decompression
+                        let entry = self.index.get_entry(*rev).unwrap();
+                        let is_delta = entry
+                            .base_revision_or_base_of_delta_chain()
+                            != (*rev).into();
+                        let uncompressed = uncompressed_zstd_data(
+                            bytes,
+                            is_delta,
+                            entry.uncompressed_len(),
+                        )?;
+                        Cow::Owned(uncompressed)
+                    } else {
+                        // Otherwise just fallback to generic decompression.
+                        self.decompress(bytes)?
+                    };
+
+                    chunks.push((*rev, chunk.into()));
+                }
+            }
+            Ok(())
+        })?;
+
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            let mut cache = cache.borrow_mut();
+            for (rev, chunk) in chunks.iter().skip(already_cached) {
+                cache.insert(*rev, chunk.clone());
+            }
+        }
+        // Use stable sort here since it's *mostly* sorted
+        chunks.sort_by(|a, b| a.0.cmp(&b.0));
+        Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
+    }
+
+    /// Slice revs to reduce the amount of unrelated data to be read from disk.
+    ///
+    /// ``revs`` is sliced into groups that should be read in one time.
+    /// Assume that revs are sorted.
+    ///
+    /// The initial chunk is sliced until the overall density
+    /// (payload/chunks-span ratio) is above
+    /// `revlog.data_config.sr_density_threshold`.
+    /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
+    ///
+    /// If `target_size` is set, no chunk larger than `target_size`
+    /// will be returned.
+    /// For consistency with other slicing choices, this limit won't go lower
+    /// than `revlog.data_config.sr_min_gap_size`.
+    ///
+    /// If individual revision chunks are larger than this limit, they will
+    /// still be raised individually.
+    pub fn slice_chunk(
+        &self,
+        revs: &[Revision],
+        target_size: Option<u64>,
+    ) -> Result<Vec<Vec<Revision>>, RevlogError> {
+        let target_size =
+            target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
+
+        let target_density = self.data_config.sr_density_threshold;
+        let min_gap_size = self.data_config.sr_min_gap_size as usize;
+        let to_density = self.index.slice_chunk_to_density(
+            revs,
+            target_density,
+            min_gap_size,
+        );
+
+        let mut sliced = vec![];
+
+        for chunk in to_density {
+            sliced.extend(
+                self.slice_chunk_to_size(&chunk, target_size)?
+                    .into_iter()
+                    .map(ToOwned::to_owned),
+            );
+        }
+
+        Ok(sliced)
+    }
+
+    /// Slice revs to match the target size
+    ///
+    /// This is intended to be used on chunks that density slicing selected,
+    /// but that are still too large compared to the read guarantee of revlogs.
+    /// This might happen when the "minimal gap size" interrupted the slicing
+    /// or when chains are built in a way that create large blocks next to
+    /// each other.
+    fn slice_chunk_to_size<'a>(
+        &self,
+        revs: &'a [Revision],
+        target_size: Option<u64>,
+    ) -> Result<Vec<&'a [Revision]>, RevlogError> {
+        let mut start_data = self.start(revs[0]);
+        let end_data = self.end(revs[revs.len() - 1]);
+        let full_span = end_data - start_data;
+
+        let nothing_to_do = target_size
+            .map(|size| full_span <= size as usize)
+            .unwrap_or(true);
+
+        if nothing_to_do {
+            return Ok(vec![revs]);
+        }
+        let target_size = target_size.expect("target_size is set") as usize;
+
+        let mut start_rev_idx = 0;
+        let mut end_rev_idx = 1;
+        let mut chunks = vec![];
+
+        for (idx, rev) in revs.iter().enumerate().skip(1) {
+            let span = self.end(*rev) - start_data;
+            let is_snapshot = self.is_snapshot(*rev)?;
+            if span <= target_size && is_snapshot {
+                end_rev_idx = idx + 1;
+            } else {
+                let chunk =
+                    self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
+                if !chunk.is_empty() {
+                    chunks.push(chunk);
+                }
+                start_rev_idx = idx;
+                start_data = self.start(*rev);
+                end_rev_idx = idx + 1;
+            }
+            if !is_snapshot {
+                break;
+            }
+        }
+
+        // For the others, we use binary slicing to quickly converge towards
+        // valid chunks (otherwise, we might end up looking for the start/end
+        // of many revisions). This logic is not looking for the perfect
+        // slicing point, it quickly converges towards valid chunks.
+        let number_of_items = revs.len();
+
+        while (end_data - start_data) > target_size {
+            end_rev_idx = number_of_items;
+            if number_of_items - start_rev_idx <= 1 {
+                // Protect against individual chunks larger than the limit
+                break;
+            }
+            let mut local_end_data = self.end(revs[end_rev_idx - 1]);
+            let mut span = local_end_data - start_data;
+            while span > target_size {
+                if end_rev_idx - start_rev_idx <= 1 {
+                    // Protect against individual chunks larger than the limit
+                    break;
+                }
+                end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
+                local_end_data = self.end(revs[end_rev_idx - 1]);
+                span = local_end_data - start_data;
+            }
+            let chunk =
+                self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
+            if !chunk.is_empty() {
+                chunks.push(chunk);
+            }
+            start_rev_idx = end_rev_idx;
+            start_data = self.start(revs[start_rev_idx]);
+        }
+
+        let chunk = self.trim_chunk(revs, start_rev_idx, None);
+        if !chunk.is_empty() {
+            chunks.push(chunk);
+        }
+
+        Ok(chunks)
+    }
+
+    /// Returns `revs[startidx..endidx]` without empty trailing revs
+    fn trim_chunk<'a>(
+        &self,
+        revs: &'a [Revision],
+        start_rev_idx: usize,
+        end_rev_idx: Option<usize>,
+    ) -> &'a [Revision] {
+        let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
+
+        // If we have a non-empty delta candidate, there is nothing to trim
+        if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
+            // Trim empty revs at the end, except the very first rev of a chain
+            while end_rev_idx > 1
+                && end_rev_idx > start_rev_idx
+                && self.length(revs[end_rev_idx - 1]) == 0
+            {
+                end_rev_idx -= 1
+            }
+        }
+
+        &revs[start_rev_idx..end_rev_idx]
+    }
+
+    /// Check the hash of some given data against the recorded hash.
+    pub fn check_hash(
+        &self,
+        p1: Revision,
+        p2: Revision,
+        expected: &[u8],
+        data: &[u8],
+    ) -> bool {
+        let e1 = self.index.get_entry(p1);
+        let h1 = match e1 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE,
+        };
+        let e2 = self.index.get_entry(p2);
+        let h2 = match e2 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE,
+        };
+
+        hash(data, h1.as_bytes(), h2.as_bytes()) == expected
+    }
+
+    /// Returns whether we are currently in a [`Self::with_write`] context
+    pub fn is_writing(&self) -> bool {
+        self.writing_handles.is_some()
+    }
+
+    /// Open the revlog files for writing
+    ///
+    /// Adding content to a revlog should be done within this context.
+    /// TODO try using `BufRead` and `BufWrite` and see if performance improves
+    pub fn with_write<R>(
+        &mut self,
+        transaction: &mut impl Transaction,
+        data_end: Option<usize>,
+        func: impl FnOnce() -> R,
+    ) -> Result<R, HgError> {
+        if self.is_writing() {
+            return Ok(func());
+        }
+        self.enter_writing_context(data_end, transaction)
+            .map_err(|e| {
+                self.exit_writing_context();
+                e
+            })?;
+        let res = func();
+        self.exit_writing_context();
+        Ok(res)
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn exit_writing_context(&mut self) {
+        self.writing_handles.take();
+        self.segment_file.writing_handle.take();
+        self.segment_file.reading_handle.take();
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
+        self.writing_handles.as_ref()
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn enter_writing_context(
+        &mut self,
+        data_end: Option<usize>,
+        transaction: &mut impl Transaction,
+    ) -> Result<(), HgError> {
+        let data_size = if self.is_empty() {
+            0
+        } else {
+            self.end(Revision((self.len() - 1) as BaseRevision))
+        };
+        let data_handle = if !self.is_inline() {
+            let data_handle = match self.vfs.open(&self.data_file) {
+                Ok(mut f) => {
+                    if let Some(end) = data_end {
+                        f.seek(SeekFrom::Start(end as u64))
+                            .when_reading_file(&self.data_file)?;
+                    } else {
+                        f.seek(SeekFrom::End(0))
+                            .when_reading_file(&self.data_file)?;
+                    }
+                    f
+                }
+                Err(e) => match e {
+                    HgError::IoError { error, context } => {
+                        if error.kind() != ErrorKind::NotFound {
+                            return Err(HgError::IoError { error, context });
+                        }
+                        self.vfs.create(&self.data_file, true)?
+                    }
+                    e => return Err(e),
+                },
+            };
+            transaction.add(&self.data_file, data_size);
+            Some(FileHandle::from_file(
+                data_handle,
+                dyn_clone::clone_box(&*self.vfs),
+                &self.data_file,
+            ))
+        } else {
+            None
+        };
+        let index_size = self.len() * INDEX_ENTRY_SIZE;
+        let index_handle = self.index_write_handle()?;
+        if self.is_inline() {
+            transaction.add(&self.index_file, data_size);
+        } else {
+            transaction.add(&self.index_file, index_size);
+        }
+        self.writing_handles = Some(WriteHandles {
+            index_handle: index_handle.clone(),
+            data_handle: data_handle.clone(),
+        });
+        *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
+            Some(index_handle)
+        } else {
+            data_handle
+        };
+        Ok(())
+    }
+
+    /// Get a write handle to the index, sought to the end of its data.
+    fn index_write_handle(&self) -> Result<FileHandle, HgError> {
+        let res = if self.delayed_buffer.is_none() {
+            if self.data_config.check_ambig {
+                self.vfs.open_check_ambig(&self.index_file)
+            } else {
+                self.vfs.open(&self.index_file)
+            }
+        } else {
+            self.vfs.open(&self.index_file)
+        };
+        match res {
+            Ok(mut handle) => {
+                handle
+                    .seek(SeekFrom::End(0))
+                    .when_reading_file(&self.index_file)?;
+                Ok(
+                    if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
+                    {
+                        FileHandle::from_file_delayed(
+                            handle,
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            delayed_buffer.clone(),
+                        )?
+                    } else {
+                        FileHandle::from_file(
+                            handle,
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                        )
+                    },
+                )
+            }
+            Err(e) => match e {
+                HgError::IoError { error, context } => {
+                    if error.kind() != ErrorKind::NotFound {
+                        return Err(HgError::IoError { error, context });
+                    };
+                    if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
+                    {
+                        FileHandle::new_delayed(
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            true,
+                            delayed_buffer.clone(),
+                        )
+                    } else {
+                        FileHandle::new(
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            true,
+                            true,
+                        )
+                    }
+                }
+                e => Err(e),
+            },
+        }
+    }
+
+    /// Split the data of an inline revlog into an index and a data file
+    pub fn split_inline(
+        &mut self,
+        header: IndexHeader,
+        new_index_file_path: Option<PathBuf>,
+    ) -> Result<PathBuf, RevlogError> {
+        assert!(self.delayed_buffer.is_none());
+        let existing_handles = self.writing_handles.is_some();
+        if let Some(handles) = &mut self.writing_handles {
+            handles.index_handle.flush()?;
+            self.writing_handles.take();
+            self.segment_file.writing_handle.take();
+        }
+        let mut new_data_file_handle =
+            self.vfs.create(&self.data_file, true)?;
+        // Drop any potential data, possibly redundant with the VFS impl.
+        new_data_file_handle
+            .set_len(0)
+            .when_writing_file(&self.data_file)?;
+
+        self.with_read(|| -> Result<(), RevlogError> {
+            for r in 0..self.index.len() {
+                let rev = Revision(r as BaseRevision);
+                let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
+                new_data_file_handle
+                    .write_all(&rev_segment)
+                    .when_writing_file(&self.data_file)?;
+            }
+            new_data_file_handle
+                .flush()
+                .when_writing_file(&self.data_file)?;
+            Ok(())
+        })?;
+
+        if let Some(index_path) = new_index_file_path {
+            self.index_file = index_path
+        }
+
+        let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
+        let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
+        for r in 0..self.len() {
+            let rev = Revision(r as BaseRevision);
+            let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
+                panic!(
+                    "entry {} should exist in {}",
+                    r,
+                    self.index_file.display()
+                )
+            });
+            if r == 0 {
+                new_data.extend(header.header_bytes);
+            }
+            new_data.extend(entry);
+        }
+        new_index_handle
+            .write_all(&new_data)
+            .when_writing_file(&self.index_file)?;
+        // Replace the index with a new one because the buffer contains inline
+        // data
+        self.index = Index::new(Box::new(new_data), header)?;
+        self.inline = false;
+
+        self.segment_file = RandomAccessFile::new(
+            dyn_clone::clone_box(&*self.vfs),
+            self.data_file.to_owned(),
+        );
+        if existing_handles {
+            // Switched from inline to conventional, reopen the index
+            let new_data_handle = Some(FileHandle::from_file(
+                new_data_file_handle,
+                dyn_clone::clone_box(&*self.vfs),
+                &self.data_file,
+            ));
+            self.writing_handles = Some(WriteHandles {
+                index_handle: self.index_write_handle()?,
+                data_handle: new_data_handle.clone(),
+            });
+            *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
+        }
+
+        Ok(self.index_file.to_owned())
+    }
+
+    /// Write a new entry to this revlog.
+    /// - `entry` is the index bytes
+    /// - `header_and_data` is the compression header and the revision data
+    /// - `offset` is the position in the data file to write to
+    /// - `index_end` is the overwritten position in the index in revlog-v2,
+    ///   since the format may allow a rewrite of garbage data at the end.
+    /// - `data_end` is the overwritten position in the data-file in revlog-v2,
+    ///   since the format may allow a rewrite of garbage data at the end.
+    ///
+    /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
+    pub fn write_entry(
+        &mut self,
+        mut transaction: impl Transaction,
+        entry: &[u8],
+        header_and_data: (&[u8], &[u8]),
+        mut offset: usize,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+    ) -> Result<(u64, Option<u64>), HgError> {
+        let current_revision = self.len() - 1;
+        let canonical_index_file = self.canonical_index_file();
+
+        let is_inline = self.is_inline();
+        let handles = match &mut self.writing_handles {
+            None => {
+                return Err(HgError::abort(
+                    "adding revision outside of the `with_write` context",
+                    exit_codes::ABORT,
+                    None,
+                ));
+            }
+            Some(handles) => handles,
+        };
+        let index_handle = &mut handles.index_handle;
+        let data_handle = &mut handles.data_handle;
+        if let Some(end) = index_end {
+            index_handle
+                .seek(SeekFrom::Start(end))
+                .when_reading_file(&self.index_file)?;
+        } else {
+            index_handle
+                .seek(SeekFrom::End(0))
+                .when_reading_file(&self.index_file)?;
+        }
+        if let Some(data_handle) = data_handle {
+            if let Some(end) = data_end {
+                data_handle
+                    .seek(SeekFrom::Start(end))
+                    .when_reading_file(&self.data_file)?;
+            } else {
+                data_handle
+                    .seek(SeekFrom::End(0))
+                    .when_reading_file(&self.data_file)?;
+            }
+        }
+        let (header, data) = header_and_data;
+
+        if !is_inline {
+            transaction.add(&self.data_file, offset);
+            transaction
+                .add(&canonical_index_file, current_revision * entry.len());
+            let data_handle = data_handle
+                .as_mut()
+                .expect("data handle should exist when not inline");
+            if !header.is_empty() {
+                data_handle.write_all(header)?;
+            }
+            data_handle.write_all(data)?;
+            match &mut self.delayed_buffer {
+                Some(buf) => {
+                    buf.lock()
+                        .expect("propagate the panic")
+                        .buffer
+                        .write_all(entry)
+                        .expect("write to delay buffer should succeed");
+                }
+                None => index_handle.write_all(entry)?,
+            }
+        } else if self.delayed_buffer.is_some() {
+            return Err(HgError::abort(
+                "invalid delayed write on inline revlog",
+                exit_codes::ABORT,
+                None,
+            ));
+        } else {
+            offset += current_revision * entry.len();
+            transaction.add(&canonical_index_file, offset);
+            index_handle.write_all(entry)?;
+            index_handle.write_all(header)?;
+            index_handle.write_all(data)?;
+        }
+        let data_position = match data_handle {
+            Some(h) => Some(h.position()?),
+            None => None,
+        };
+        Ok((index_handle.position()?, data_position))
+    }
+
+    /// Return the real target index file and not the temporary when diverting
+    pub fn canonical_index_file(&self) -> PathBuf {
+        self.original_index_file
+            .as_ref()
+            .map(ToOwned::to_owned)
+            .unwrap_or_else(|| self.index_file.to_owned())
+    }
+
+    /// Return the path to the diverted index
+    fn diverted_index(&self) -> PathBuf {
+        self.index_file.with_extension("i.a")
+    }
+
+    /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
+    pub fn is_open(&self) -> bool {
+        self.segment_file.is_open()
+    }
+
+    /// Set this revlog to delay its writes to a buffer
+    pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        if self.delayed_buffer.is_some() || self.original_index_file.is_some()
+        {
+            // Delay or divert already happening
+            return Ok(None);
+        }
+        if self.is_empty() {
+            self.original_index_file = Some(self.index_file.to_owned());
+            self.index_file = self.diverted_index();
+            if self.vfs.exists(&self.index_file) {
+                self.vfs.unlink(&self.index_file)?;
+            }
+            Ok(Some(self.index_file.to_owned()))
+        } else {
+            self.delayed_buffer =
+                Some(Arc::new(Mutex::new(DelayedBuffer::default())));
+            Ok(None)
+        }
+    }
+
+    /// Write the pending data (in memory) if any to the diverted index file
+    /// (on disk temporary file)
+    pub fn write_pending(
+        &mut self,
+    ) -> Result<(Option<PathBuf>, bool), HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        if self.original_index_file.is_some() {
+            return Ok((None, true));
+        }
+        let mut any_pending = false;
+        let pending_index_file = self.diverted_index();
+        if self.vfs.exists(&pending_index_file) {
+            self.vfs.unlink(&pending_index_file)?;
+        }
+        self.vfs.copy(&self.index_file, &pending_index_file)?;
+        if let Some(delayed_buffer) = self.delayed_buffer.take() {
+            let mut index_file_handle = self.vfs.open(&pending_index_file)?;
+            index_file_handle
+                .seek(SeekFrom::End(0))
+                .when_writing_file(&pending_index_file)?;
+            let delayed_data =
+                &delayed_buffer.lock().expect("propagate the panic").buffer;
+            index_file_handle
+                .write_all(delayed_data)
+                .when_writing_file(&pending_index_file)?;
+            any_pending = true;
+        }
+        self.original_index_file = Some(self.index_file.to_owned());
+        self.index_file = pending_index_file;
+        Ok((Some(self.index_file.to_owned()), any_pending))
+    }
+
+    /// Overwrite the canonical file with the diverted file, or write out the
+    /// delayed buffer.
+    /// Returns an error if the revlog is neither diverted nor delayed.
+    pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        match (
+            self.delayed_buffer.as_ref(),
+            self.original_index_file.as_ref(),
+        ) {
+            (None, None) => {
+                return Err(HgError::abort(
+                    "neither delay nor divert found on this revlog",
+                    exit_codes::ABORT,
+                    None,
+                ));
+            }
+            (Some(delay), None) => {
+                let mut index_file_handle = self.vfs.open(&self.index_file)?;
+                index_file_handle
+                    .seek(SeekFrom::End(0))
+                    .when_writing_file(&self.index_file)?;
+                index_file_handle
+                    .write_all(
+                        &delay.lock().expect("propagate the panic").buffer,
+                    )
+                    .when_writing_file(&self.index_file)?;
+                self.delayed_buffer = None;
+            }
+            (None, Some(divert)) => {
+                if self.vfs.exists(&self.index_file) {
+                    self.vfs.rename(&self.index_file, divert, true)?;
+                }
+                divert.clone_into(&mut self.index_file);
+                self.original_index_file = None;
+            }
+            (Some(_), Some(_)) => unreachable!(
+                "{} is in an inconsistent state of both delay and divert",
+                self.canonical_index_file().display(),
+            ),
+        }
+        Ok(self.canonical_index_file())
+    }
+
+    /// `pub` only for `hg-cpython`. This is made a different method than
+    /// [`Revlog::index`] in case there is a different invariant that pops up
+    /// later.
+    #[doc(hidden)]
+    pub fn shared_index(&self) -> &Index {
+        &self.index
+    }
+}
+
+/// The use of a [`Refcell`] assumes that a given revlog will only
+/// be accessed (read or write) by a single thread.
+type UncompressedChunkCache =
+    RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
+
+/// The node, revision and data for the last revision we've seen. Speeds up
+/// a lot of sequential operations of the revlog.
+///
+/// The data is not just bytes since it can come from Python and we want to
+/// avoid copies if possible.
+type SingleRevisionCache =
+    (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
+
+/// A way of progressively filling a buffer with revision data, then return
+/// that buffer. Used to abstract away Python-allocated code to reduce copying
+/// for performance reasons.
+pub trait RevisionBuffer {
+    /// The owned buffer type to return
+    type Target;
+    /// Copies the slice into the buffer
+    fn extend_from_slice(&mut self, slice: &[u8]);
+    /// Returns the now finished owned buffer
+    fn finish(self) -> Self::Target;
+}
+
+/// A simple vec-based buffer. This is uselessly complicated for the pure Rust
+/// case, but it's the price to pay for Python compatibility.
+#[derive(Debug)]
+pub(super) struct CoreRevisionBuffer {
+    buf: Vec<u8>,
+}
+
+impl CoreRevisionBuffer {
+    pub fn new() -> Self {
+        Self { buf: vec![] }
+    }
+
+    #[inline]
+    pub fn resize(&mut self, size: usize) {
+        self.buf.reserve_exact(size - self.buf.capacity());
+    }
+}
+
+impl RevisionBuffer for CoreRevisionBuffer {
+    type Target = Vec<u8>;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        self.buf.extend_from_slice(slice);
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        self.buf
+    }
+}
+
+/// Calculate the hash of a revision given its data and its parents.
+pub fn hash(
+    data: &[u8],
+    p1_hash: &[u8],
+    p2_hash: &[u8],
+) -> [u8; NODE_BYTES_LENGTH] {
+    let mut hasher = Sha1::new();
+    let (a, b) = (p1_hash, p2_hash);
+    if a > b {
+        hasher.update(b);
+        hasher.update(a);
+    } else {
+        hasher.update(a);
+        hasher.update(b);
+    }
+    hasher.update(data);
+    *hasher.finalize().as_ref()
+}
--- a/rust/hg-core/src/revlog/manifest.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/manifest.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -6,9 +6,9 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::VfsImpl;
-use crate::{
-    Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
-};
+use crate::{Graph, GraphError, Revision, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifestlog {
--- a/rust/hg-core/src/revlog/mod.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/mod.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -9,35 +9,35 @@
 pub mod nodemap;
 mod nodemap_docket;
 pub mod path_encode;
-pub use node::{FromHexError, Node, NodePrefix};
+use inner_revlog::CoreRevisionBuffer;
+use inner_revlog::InnerRevlog;
+use inner_revlog::RevisionBuffer;
+use memmap2::MmapOptions;
+pub use node::{FromHexError, Node, NodePrefix, NULL_NODE, NULL_NODE_ID};
+use options::RevlogOpenOptions;
 pub mod changelog;
+pub mod compression;
+pub mod file_io;
 pub mod filelog;
 pub mod index;
+pub mod inner_revlog;
 pub mod manifest;
+pub mod options;
 pub mod patch;
 
 use std::borrow::Cow;
-use std::collections::HashSet;
+use std::io::ErrorKind;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
 
-use flate2::read::ZlibDecoder;
-use sha1::{Digest, Sha1};
-use std::cell::RefCell;
-use zstd;
-
-use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
 use self::nodemap_docket::NodeMapDocket;
-use super::index::Index;
-use super::index::INDEX_ENTRY_SIZE;
-use super::nodemap::{NodeMap, NodeMapError};
-use crate::config::{Config, ResourceProfileValue};
 use crate::errors::HgError;
+use crate::errors::IoResultExt;
 use crate::exit_codes;
-use crate::requirements::{
-    GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
-};
+use crate::revlog::index::Index;
+use crate::revlog::nodemap::{NodeMap, NodeMapError};
+use crate::vfs::Vfs;
 use crate::vfs::VfsImpl;
 
 /// As noted in revlog.c, revision numbers are actually encoded in
@@ -258,461 +258,17 @@
     }
 }
 
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum CompressionEngine {
-    Zlib {
-        /// Between 0 and 9 included
-        level: u32,
-    },
-    Zstd {
-        /// Between 0 and 22 included
-        level: u32,
-        /// Never used in practice for now
-        threads: u32,
-    },
-    /// No compression is performed
-    None,
-}
-impl CompressionEngine {
-    pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
-        match self {
-            CompressionEngine::Zlib { level } => {
-                if new_level > 9 {
-                    return Err(HgError::abort(
-                        format!(
-                            "invalid compression zlib compression level {}",
-                            new_level
-                        ),
-                        exit_codes::ABORT,
-                        None,
-                    ));
-                }
-                *level = new_level as u32;
-            }
-            CompressionEngine::Zstd { level, .. } => {
-                if new_level > 22 {
-                    return Err(HgError::abort(
-                        format!(
-                            "invalid compression zstd compression level {}",
-                            new_level
-                        ),
-                        exit_codes::ABORT,
-                        None,
-                    ));
-                }
-                *level = new_level as u32;
-            }
-            CompressionEngine::None => {}
-        }
-        Ok(())
-    }
-
-    pub fn zstd(
-        zstd_level: Option<u32>,
-    ) -> Result<CompressionEngine, HgError> {
-        let mut engine = CompressionEngine::Zstd {
-            level: 3,
-            threads: 0,
-        };
-        if let Some(level) = zstd_level {
-            engine.set_level(level as usize)?;
-        }
-        Ok(engine)
-    }
-}
-
-impl Default for CompressionEngine {
-    fn default() -> Self {
-        Self::Zlib { level: 6 }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how the revlog data is read
-pub struct RevlogDataConfig {
-    /// Should we try to open the "pending" version of the revlog
-    pub try_pending: bool,
-    /// Should we try to open the "split" version of the revlog
-    pub try_split: bool,
-    /// When True, `indexfile` should be opened with `checkambig=True` at
-    /// writing time, to avoid file stat ambiguity
-    pub check_ambig: bool,
-    /// If true, use mmap instead of reading to deal with large indexes
-    pub mmap_large_index: bool,
-    /// How much data is considered large
-    pub mmap_index_threshold: Option<u64>,
-    /// How much data to read and cache into the raw revlog data cache
-    pub chunk_cache_size: u64,
-    /// The size of the uncompressed cache compared to the largest revision
-    /// seen
-    pub uncompressed_cache_factor: Option<f64>,
-    /// The number of chunks cached
-    pub uncompressed_cache_count: Option<u64>,
-    /// Allow sparse reading of the revlog data
-    pub with_sparse_read: bool,
-    /// Minimal density of a sparse read chunk
-    pub sr_density_threshold: f64,
-    /// Minimal size of the data we skip when performing sparse reads
-    pub sr_min_gap_size: u64,
-    /// Whether deltas are encoded against arbitrary bases
-    pub general_delta: bool,
-}
-
-impl RevlogDataConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-    ) -> Result<Self, HgError> {
-        let mut data_config = Self::default();
-        if let Some(chunk_cache_size) =
-            config.get_byte_size(b"format", b"chunkcachesize")?
-        {
-            data_config.chunk_cache_size = chunk_cache_size;
-        }
-
-        let memory_profile = config.get_resource_profile(Some("memory"));
-        if memory_profile.value >= ResourceProfileValue::Medium {
-            data_config.uncompressed_cache_count = Some(10_000);
-            data_config.uncompressed_cache_factor = Some(4.0);
-            if memory_profile.value >= ResourceProfileValue::High {
-                data_config.uncompressed_cache_factor = Some(10.0)
-            }
-        }
-
-        if let Some(mmap_index_threshold) = config
-            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
-        {
-            data_config.mmap_index_threshold = Some(mmap_index_threshold);
-        }
-
-        let with_sparse_read =
-            config.get_bool(b"experimental", b"sparse-read")?;
-        if let Some(sr_density_threshold) = config
-            .get_f64(b"experimental", b"sparse-read.density-threshold")?
-        {
-            data_config.sr_density_threshold = sr_density_threshold;
-        }
-        data_config.with_sparse_read = with_sparse_read;
-        if let Some(sr_min_gap_size) = config
-            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
-        {
-            data_config.sr_min_gap_size = sr_min_gap_size;
-        }
-
-        data_config.with_sparse_read =
-            requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
-        Ok(data_config)
-    }
-}
-
-impl Default for RevlogDataConfig {
-    fn default() -> Self {
-        Self {
-            chunk_cache_size: 65536,
-            sr_density_threshold: 0.50,
-            sr_min_gap_size: 262144,
-            try_pending: Default::default(),
-            try_split: Default::default(),
-            check_ambig: Default::default(),
-            mmap_large_index: Default::default(),
-            mmap_index_threshold: Default::default(),
-            uncompressed_cache_factor: Default::default(),
-            uncompressed_cache_count: Default::default(),
-            with_sparse_read: Default::default(),
-            general_delta: Default::default(),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how new deltas are computed.
-///
-/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
-/// each object self contained.
-pub struct RevlogDeltaConfig {
-    /// Whether deltas can be encoded against arbitrary bases
-    pub general_delta: bool,
-    /// Allow sparse writing of the revlog data
-    pub sparse_revlog: bool,
-    /// Maximum length of a delta chain
-    pub max_chain_len: Option<u64>,
-    /// Maximum distance between a delta chain's start and end
-    pub max_deltachain_span: Option<u64>,
-    /// If `upper_bound_comp` is not None, this is the expected maximal
-    /// gain from compression for the data content
-    pub upper_bound_comp: Option<f64>,
-    /// Should we try a delta against both parents
-    pub delta_both_parents: bool,
-    /// Test delta base candidate groups by chunks of this maximal size
-    pub candidate_group_chunk_size: u64,
-    /// Should we display debug information about delta computation
-    pub debug_delta: bool,
-    /// Trust incoming deltas by default
-    pub lazy_delta: bool,
-    /// Trust the base of incoming deltas by default
-    pub lazy_delta_base: bool,
-}
-impl RevlogDeltaConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-        revlog_type: RevlogType,
-    ) -> Result<Self, HgError> {
-        let mut delta_config = Self {
-            delta_both_parents: config
-                .get_option_no_default(
-                    b"storage",
-                    b"revlog.optimize-delta-parent-choice",
-                )?
-                .unwrap_or(true),
-            candidate_group_chunk_size: config
-                .get_u64(
-                    b"storage",
-                    b"revlog.delta-parent-search.candidate-group-chunk-size",
-                )?
-                .unwrap_or_default(),
-            ..Default::default()
-        };
-
-        delta_config.debug_delta =
-            config.get_bool(b"debug", b"revlog.debug-delta")?;
-
-        delta_config.general_delta =
-            requirements.contains(GENERALDELTA_REQUIREMENT);
-
-        let lazy_delta =
-            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
-
-        if revlog_type == RevlogType::Manifestlog {
-            // upper bound of what we expect from compression
-            // (real life value seems to be 3)
-            delta_config.upper_bound_comp = Some(3.0)
-        }
-
-        let mut lazy_delta_base = false;
-        if lazy_delta {
-            lazy_delta_base = match config.get_option_no_default(
-                b"storage",
-                b"revlog.reuse-external-delta-parent",
-            )? {
-                Some(base) => base,
-                None => config.get_bool(b"format", b"generaldelta")?,
-            };
-        }
-        delta_config.lazy_delta = lazy_delta;
-        delta_config.lazy_delta_base = lazy_delta_base;
-
-        delta_config.max_deltachain_span =
-            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
-                Some(span) => {
-                    if span < 0 {
-                        None
-                    } else {
-                        Some(span as u64)
-                    }
-                }
-                None => None,
-            };
-
-        delta_config.sparse_revlog =
-            requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
-        delta_config.max_chain_len =
-            config.get_byte_size_no_default(b"format", b"maxchainlen")?;
-
-        Ok(delta_config)
-    }
-}
-
-impl Default for RevlogDeltaConfig {
-    fn default() -> Self {
-        Self {
-            delta_both_parents: true,
-            lazy_delta: true,
-            general_delta: Default::default(),
-            sparse_revlog: Default::default(),
-            max_chain_len: Default::default(),
-            max_deltachain_span: Default::default(),
-            upper_bound_comp: Default::default(),
-            candidate_group_chunk_size: Default::default(),
-            debug_delta: Default::default(),
-            lazy_delta_base: Default::default(),
-        }
-    }
-}
-
-#[derive(Debug, Default, Clone, Copy, PartialEq)]
-/// Holds configuration values about the available revlog features
-pub struct RevlogFeatureConfig {
-    /// The compression engine and its options
-    pub compression_engine: CompressionEngine,
-    /// Can we use censor on this revlog
-    pub censorable: bool,
-    /// Does this revlog use the "side data" feature
-    pub has_side_data: bool,
-    /// Might remove this configuration once the rank computation has no
-    /// impact
-    pub compute_rank: bool,
-    /// Parent order is supposed to be semantically irrelevant, so we
-    /// normally re-sort parents to ensure that the first parent is non-null,
-    /// if there is a non-null parent at all.
-    /// filelog abuses the parent order as a flag to mark some instances of
-    /// meta-encoded files, so allow it to disable this behavior.
-    pub canonical_parent_order: bool,
-    /// Can ellipsis commit be used
-    pub enable_ellipsis: bool,
-}
-impl RevlogFeatureConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-    ) -> Result<Self, HgError> {
-        let mut feature_config = Self::default();
-
-        let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
-        let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
-
-        feature_config.compression_engine = CompressionEngine::default();
-
-        for requirement in requirements {
-            if requirement.starts_with("revlog-compression-")
-                || requirement.starts_with("exp-compression-")
-            {
-                let split = &mut requirement.splitn(3, '-');
-                split.next();
-                split.next();
-                feature_config.compression_engine = match split.next().unwrap()
-                {
-                    "zstd" => CompressionEngine::zstd(zstd_level)?,
-                    e => {
-                        return Err(HgError::UnsupportedFeature(format!(
-                            "Unsupported compression engine '{e}'"
-                        )))
-                    }
-                };
-            }
-        }
-        if let Some(level) = zlib_level {
-            if matches!(
-                feature_config.compression_engine,
-                CompressionEngine::Zlib { .. }
-            ) {
-                feature_config
-                    .compression_engine
-                    .set_level(level as usize)?;
-            }
-        }
-
-        feature_config.enable_ellipsis =
-            requirements.contains(NARROW_REQUIREMENT);
-
-        Ok(feature_config)
-    }
-}
-
-/// Read only implementation of revlog.
 pub struct Revlog {
-    /// When index and data are not interleaved: bytes of the revlog index.
-    /// When index and data are interleaved: bytes of the revlog index and
-    /// data.
-    index: Index,
-    /// When index and data are not interleaved: bytes of the revlog data
-    data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
+    inner: InnerRevlog,
     /// When present on disk: the persistent nodemap for this revlog
     nodemap: Option<nodemap::NodeTree>,
 }
 
 impl Graph for Revlog {
     fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
-        self.index.parents(rev)
-    }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum RevlogVersionOptions {
-    V0,
-    V1 { general_delta: bool, inline: bool },
-    V2,
-    ChangelogV2 { compute_rank: bool },
-}
-
-/// Options to govern how a revlog should be opened, usually from the
-/// repository configuration or requirements.
-#[derive(Debug, Copy, Clone)]
-pub struct RevlogOpenOptions {
-    /// The revlog version, along with any option specific to this version
-    pub version: RevlogVersionOptions,
-    /// Whether the revlog uses a persistent nodemap.
-    pub use_nodemap: bool,
-    pub delta_config: RevlogDeltaConfig,
-    pub data_config: RevlogDataConfig,
-    pub feature_config: RevlogFeatureConfig,
-}
-
-#[cfg(test)]
-impl Default for RevlogOpenOptions {
-    fn default() -> Self {
-        Self {
-            version: RevlogVersionOptions::V1 {
-                general_delta: true,
-                inline: false,
-            },
-            use_nodemap: true,
-            data_config: Default::default(),
-            delta_config: Default::default(),
-            feature_config: Default::default(),
-        }
+        self.index().parents(rev)
     }
 }
-
-impl RevlogOpenOptions {
-    pub fn new(
-        inline: bool,
-        data_config: RevlogDataConfig,
-        delta_config: RevlogDeltaConfig,
-        feature_config: RevlogFeatureConfig,
-    ) -> Self {
-        Self {
-            version: RevlogVersionOptions::V1 {
-                general_delta: data_config.general_delta,
-                inline,
-            },
-            use_nodemap: false,
-            data_config,
-            delta_config,
-            feature_config,
-        }
-    }
-
-    pub fn index_header(&self) -> index::IndexHeader {
-        index::IndexHeader {
-            header_bytes: match self.version {
-                RevlogVersionOptions::V0 => [0, 0, 0, 0],
-                RevlogVersionOptions::V1 {
-                    general_delta,
-                    inline,
-                } => [
-                    0,
-                    if general_delta && inline {
-                        3
-                    } else if general_delta {
-                        2
-                    } else {
-                        u8::from(inline)
-                    },
-                    0,
-                    1,
-                ],
-                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
-                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
-                    0xD34Du32.to_be_bytes()
-                }
-            },
-        }
-    }
-}
-
 impl Revlog {
     /// Open a revlog index file.
     ///
@@ -728,6 +284,10 @@
         Self::open_gen(store_vfs, index_path, data_path, options, None)
     }
 
+    fn index(&self) -> &Index {
+        &self.inner.index
+    }
+
     fn open_gen(
         // Todo use the `Vfs` trait here once we create a function for mmap
         store_vfs: &VfsImpl,
@@ -737,37 +297,10 @@
         nodemap_for_test: Option<nodemap::NodeTree>,
     ) -> Result<Self, HgError> {
         let index_path = index_path.as_ref();
-        let index = {
-            match store_vfs.mmap_open_opt(index_path)? {
-                None => Index::new(
-                    Box::<Vec<_>>::default(),
-                    options.index_header(),
-                ),
-                Some(index_mmap) => {
-                    let index = Index::new(
-                        Box::new(index_mmap),
-                        options.index_header(),
-                    )?;
-                    Ok(index)
-                }
-            }
-        }?;
+        let index = open_index(store_vfs, index_path, options)?;
 
         let default_data_path = index_path.with_extension("d");
-
-        // type annotation required
-        // won't recognize Mmap as Deref<Target = [u8]>
-        let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
-            if index.is_inline() {
-                None
-            } else if index.is_empty() {
-                // No need to even try to open the data file then.
-                Some(Box::new(&[][..]))
-            } else {
-                let data_path = data_path.unwrap_or(&default_data_path);
-                let data_mmap = store_vfs.mmap_open(data_path)?;
-                Some(Box::new(data_mmap))
-            };
+        let data_path = data_path.unwrap_or(&default_data_path);
 
         let nodemap = if index.is_inline() || !options.use_nodemap {
             None
@@ -785,20 +318,27 @@
         let nodemap = nodemap_for_test.or(nodemap);
 
         Ok(Revlog {
-            index,
-            data_bytes,
+            inner: InnerRevlog::new(
+                Box::new(store_vfs.clone()),
+                index,
+                index_path.to_path_buf(),
+                data_path.to_path_buf(),
+                options.data_config,
+                options.delta_config,
+                options.feature_config,
+            ),
             nodemap,
         })
     }
 
     /// Return number of entries of the `Revlog`.
     pub fn len(&self) -> usize {
-        self.index.len()
+        self.index().len()
     }
 
     /// Returns `true` if the `Revlog` has zero `entries`.
     pub fn is_empty(&self) -> bool {
-        self.index.is_empty()
+        self.index().is_empty()
     }
 
     /// Returns the node ID for the given revision number, if it exists in this
@@ -807,8 +347,8 @@
         if rev == NULL_REVISION.into() {
             return Some(&NULL_NODE);
         }
-        let rev = self.index.check_revision(rev)?;
-        Some(self.index.get_entry(rev)?.hash())
+        let rev = self.index().check_revision(rev)?;
+        Some(self.index().get_entry(rev)?.hash())
     }
 
     /// Return the revision number for the given node ID, if it exists in this
@@ -819,56 +359,30 @@
     ) -> Result<Revision, RevlogError> {
         if let Some(nodemap) = &self.nodemap {
             nodemap
-                .find_bin(&self.index, node)?
+                .find_bin(self.index(), node)?
                 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
         } else {
-            self.rev_from_node_no_persistent_nodemap(node)
+            self.index().rev_from_node_no_persistent_nodemap(node)
         }
     }
 
-    /// Same as `rev_from_node`, without using a persistent nodemap
-    ///
-    /// This is used as fallback when a persistent nodemap is not present.
-    /// This happens when the persistent-nodemap experimental feature is not
-    /// enabled, or for small revlogs.
-    fn rev_from_node_no_persistent_nodemap(
-        &self,
-        node: NodePrefix,
-    ) -> Result<Revision, RevlogError> {
-        // Linear scan of the revlog
-        // TODO: consider building a non-persistent nodemap in memory to
-        // optimize these cases.
-        let mut found_by_prefix = None;
-        for rev in (-1..self.len() as BaseRevision).rev() {
-            let rev = Revision(rev as BaseRevision);
-            let candidate_node = if rev == Revision(-1) {
-                NULL_NODE
-            } else {
-                let index_entry =
-                    self.index.get_entry(rev).ok_or_else(|| {
-                        HgError::corrupted(
-                            "revlog references a revision not in the index",
-                        )
-                    })?;
-                *index_entry.hash()
-            };
-            if node == candidate_node {
-                return Ok(rev);
-            }
-            if node.is_prefix_of(&candidate_node) {
-                if found_by_prefix.is_some() {
-                    return Err(RevlogError::AmbiguousPrefix);
-                }
-                found_by_prefix = Some(rev)
-            }
-        }
-        found_by_prefix
-            .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
-    }
-
     /// Returns whether the given revision exists in this revlog.
     pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
-        self.index.check_revision(rev).is_some()
+        self.index().check_revision(rev).is_some()
+    }
+
+    pub fn get_entry_for_checked_rev(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        self.inner.get_entry_for_checked_rev(rev)
+    }
+
+    pub fn get_entry(
+        &self,
+        rev: UncheckedRevision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        self.inner.get_entry(rev)
     }
 
     /// Return the full data associated to a revision.
@@ -905,174 +419,154 @@
         expected: &[u8],
         data: &[u8],
     ) -> bool {
-        let e1 = self.index.get_entry(p1);
-        let h1 = match e1 {
-            Some(ref entry) => entry.hash(),
-            None => &NULL_NODE,
-        };
-        let e2 = self.index.get_entry(p2);
-        let h2 = match e2 {
-            Some(ref entry) => entry.hash(),
-            None => &NULL_NODE,
-        };
-
-        hash(data, h1.as_bytes(), h2.as_bytes()) == expected
+        self.inner.check_hash(p1, p2, expected, data)
     }
 
     /// Build the full data of a revision out its snapshot
     /// and its deltas.
-    fn build_data_from_deltas(
-        snapshot: RevlogEntry,
-        deltas: &[RevlogEntry],
-    ) -> Result<Vec<u8>, HgError> {
-        let snapshot = snapshot.data_chunk()?;
-        let deltas = deltas
-            .iter()
-            .rev()
-            .map(RevlogEntry::data_chunk)
-            .collect::<Result<Vec<_>, _>>()?;
-        let patches: Vec<_> =
-            deltas.iter().map(|d| patch::PatchList::new(d)).collect();
-        let patch = patch::fold_patch_lists(&patches);
-        Ok(patch.apply(&snapshot))
-    }
-
-    /// Return the revlog data.
-    fn data(&self) -> &[u8] {
-        match &self.data_bytes {
-            Some(data_bytes) => data_bytes,
-            None => panic!(
-                "forgot to load the data or trying to access inline data"
-            ),
-        }
-    }
-
-    pub fn make_null_entry(&self) -> RevlogEntry {
-        RevlogEntry {
-            revlog: self,
-            rev: NULL_REVISION,
-            bytes: b"",
-            compressed_len: 0,
-            uncompressed_len: 0,
-            base_rev_or_base_of_delta_chain: None,
-            p1: NULL_REVISION,
-            p2: NULL_REVISION,
-            flags: NULL_REVLOG_ENTRY_FLAGS,
-            hash: NULL_NODE,
-        }
-    }
-
-    fn get_entry_for_checked_rev(
-        &self,
-        rev: Revision,
-    ) -> Result<RevlogEntry, RevlogError> {
-        if rev == NULL_REVISION {
-            return Ok(self.make_null_entry());
+    fn build_data_from_deltas<T>(
+        buffer: &mut dyn RevisionBuffer<Target = T>,
+        snapshot: &[u8],
+        deltas: &[impl AsRef<[u8]>],
+    ) -> Result<(), RevlogError> {
+        if deltas.is_empty() {
+            buffer.extend_from_slice(snapshot);
+            return Ok(());
         }
-        let index_entry = self
-            .index
-            .get_entry(rev)
-            .ok_or(RevlogError::InvalidRevision(rev.to_string()))?;
-        let offset = index_entry.offset();
-        let start = if self.index.is_inline() {
-            offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
-        } else {
-            offset
-        };
-        let end = start + index_entry.compressed_len() as usize;
-        let data = if self.index.is_inline() {
-            self.index.data(start, end)
-        } else {
-            &self.data()[start..end]
-        };
-        let base_rev = self
-            .index
-            .check_revision(index_entry.base_revision_or_base_of_delta_chain())
-            .ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "base revision for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let p1 =
-            self.index.check_revision(index_entry.p1()).ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "p1 for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let p2 =
-            self.index.check_revision(index_entry.p2()).ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "p2 for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let entry = RevlogEntry {
-            revlog: self,
-            rev,
-            bytes: data,
-            compressed_len: index_entry.compressed_len(),
-            uncompressed_len: index_entry.uncompressed_len(),
-            base_rev_or_base_of_delta_chain: if base_rev == rev {
+        let patches: Result<Vec<_>, _> = deltas
+            .iter()
+            .map(|d| patch::PatchList::new(d.as_ref()))
+            .collect();
+        let patch = patch::fold_patch_lists(&patches?);
+        patch.apply(buffer, snapshot);
+        Ok(())
+    }
+}
+
+type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>;
+
+/// TODO We should check for version 5.14+ at runtime, but we either should
+/// add the `nix` dependency to get it efficiently, or vendor the code to read
+/// both of which are overkill for such a feature. If we need this dependency
+/// for more things later, we'll use it here too.
+#[cfg(target_os = "linux")]
+fn can_advise_populate_read() -> bool {
+    true
+}
+
+#[cfg(not(target_os = "linux"))]
+fn can_advise_populate_read() -> bool {
+    false
+}
+
+/// Call `madvise` on the mmap with `MADV_POPULATE_READ` in a separate thread
+/// to populate the mmap in the background for a small perf improvement.
+#[cfg(target_os = "linux")]
+fn advise_populate_read_mmap(mmap: &memmap2::Mmap) {
+    const MADV_POPULATE_READ: i32 = 22;
+
+    // This is fine because the mmap is still referenced for at least
+    // the duration of this function, and the kernel will reject any wrong
+    // address.
+    let ptr = mmap.as_ptr() as u64;
+    let len = mmap.len();
+
+    // Fire and forget. The `JoinHandle` returned by `spawn` is dropped right
+    // after the call, the thread is thus detached. We don't care about success
+    // or failure here.
+    std::thread::spawn(move || unsafe {
+        // mmap's pointer is always page-aligned on Linux. In the case of
+        // file-based mmap (which is our use-case), the length should be
+        // correct. If not, it's not a safety concern as the kernel will just
+        // ignore unmapped pages and return ENOMEM, which we will promptly
+        // ignore, because we don't care about any errors.
+        libc::madvise(ptr as *mut libc::c_void, len, MADV_POPULATE_READ);
+    });
+}
+
+#[cfg(not(target_os = "linux"))]
+fn advise_populate_read_mmap(mmap: &memmap2::Mmap) {}
+
+/// Open the revlog [`Index`] at `index_path`, through the `store_vfs` and the
+/// given `options`. This controls whether (and how) we `mmap` the index file,
+/// and returns an empty buffer if the index does not exist on disk.
+/// This is only used when doing pure-Rust work, in Python contexts this is
+/// unused at the time of writing.
+pub fn open_index(
+    store_vfs: &impl Vfs,
+    index_path: &Path,
+    options: RevlogOpenOptions,
+) -> Result<Index, HgError> {
+    let buf: IndexData = match store_vfs.open_read(index_path) {
+        Ok(mut file) => {
+            let mut buf = if let Some(threshold) =
+                options.data_config.mmap_index_threshold
+            {
+                let size = store_vfs.file_size(&file)?;
+                if size >= threshold {
+                    // TODO madvise populate read in a background thread
+                    let mut mmap_options = MmapOptions::new();
+                    if !can_advise_populate_read() {
+                        // Fall back to populating in the main thread if
+                        // post-creation advice is not supported.
+                        // Does nothing on platforms where it's not defined.
+                        mmap_options.populate();
+                    }
+                    // Safety is "enforced" by locks and assuming other
+                    // processes are well-behaved. If any misbehaving or
+                    // malicious process does touch the index, it could lead
+                    // to corruption. This is somewhat inherent to file-based
+                    // `mmap`, though some platforms have some ways of
+                    // mitigating.
+                    // TODO linux: set the immutable flag with `chattr(1)`?
+                    let mmap = unsafe { mmap_options.map(&file) }
+                        .when_reading_file(index_path)?;
+
+                    if can_advise_populate_read() {
+                        advise_populate_read_mmap(&mmap);
+                    }
+
+                    Some(Box::new(mmap) as IndexData)
+                } else {
+                    None
+                }
+            } else {
                 None
-            } else {
-                Some(base_rev)
-            },
-            p1,
-            p2,
-            flags: index_entry.flags(),
-            hash: *index_entry.hash(),
-        };
-        Ok(entry)
-    }
+            };
 
-    /// Get an entry of the revlog.
-    pub fn get_entry(
-        &self,
-        rev: UncheckedRevision,
-    ) -> Result<RevlogEntry, RevlogError> {
-        if rev == NULL_REVISION.into() {
-            return Ok(self.make_null_entry());
+            if buf.is_none() {
+                let mut data = vec![];
+                file.read_to_end(&mut data).when_reading_file(index_path)?;
+                buf = Some(Box::new(data) as IndexData);
+            }
+            buf.unwrap()
         }
-        let rev = self.index.check_revision(rev).ok_or_else(|| {
-            RevlogError::corrupted(format!("rev {} is invalid", rev))
-        })?;
-        self.get_entry_for_checked_rev(rev)
-    }
+        Err(err) => match err {
+            HgError::IoError { error, context } => match error.kind() {
+                ErrorKind::NotFound => Box::<Vec<u8>>::default(),
+                _ => return Err(HgError::IoError { error, context }),
+            },
+            e => return Err(e),
+        },
+    };
+
+    let index = Index::new(buf, options.index_header())?;
+    Ok(index)
 }
 
 /// The revlog entry's bytes and the necessary informations to extract
 /// the entry's data.
 #[derive(Clone)]
 pub struct RevlogEntry<'revlog> {
-    revlog: &'revlog Revlog,
+    revlog: &'revlog InnerRevlog,
     rev: Revision,
-    bytes: &'revlog [u8],
-    compressed_len: u32,
     uncompressed_len: i32,
-    base_rev_or_base_of_delta_chain: Option<Revision>,
     p1: Revision,
     p2: Revision,
     flags: u16,
     hash: Node,
 }
 
-thread_local! {
-  // seems fine to [unwrap] here: this can only fail due to memory allocation
-  // failing, and it's normal for that to cause panic.
-  static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
-      RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
-}
-
-fn zstd_decompress_to_buffer(
-    bytes: &[u8],
-    buf: &mut Vec<u8>,
-) -> Result<usize, std::io::Error> {
-    ZSTD_DECODER
-        .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
-}
-
 impl<'revlog> RevlogEntry<'revlog> {
     pub fn revision(&self) -> Revision {
         self.rev
@@ -1137,33 +631,47 @@
     }
 
     /// The data for this entry, after resolving deltas if any.
-    pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
-        let mut entry = self.clone();
-        let mut delta_chain = vec![];
+    /// Non-Python callers should probably call [`Self::data`] instead.
+    fn rawdata<G, T>(
+        &self,
+        stop_rev: Option<(Revision, &[u8])>,
+        with_buffer: G,
+    ) -> Result<(), RevlogError>
+    where
+        G: FnOnce(
+            usize,
+            &mut dyn FnMut(
+                &mut dyn RevisionBuffer<Target = T>,
+            ) -> Result<(), RevlogError>,
+        ) -> Result<(), RevlogError>,
+    {
+        let (delta_chain, stopped) = self
+            .revlog
+            .delta_chain(self.revision(), stop_rev.map(|(r, _)| r))?;
+        let target_size =
+            self.uncompressed_len().map(|raw_size| 4 * raw_size as u64);
 
-        // The meaning of `base_rev_or_base_of_delta_chain` depends on
-        // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
-        // `mercurial/revlogutils/constants.py` and the code in
-        // [_chaininfo] and in [index_deltachain].
-        let uses_generaldelta = self.revlog.index.uses_generaldelta();
-        while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
-            entry = if uses_generaldelta {
-                delta_chain.push(entry);
-                self.revlog.get_entry_for_checked_rev(base_rev)?
-            } else {
-                let base_rev = UncheckedRevision(entry.rev.0 - 1);
-                delta_chain.push(entry);
-                self.revlog.get_entry(base_rev)?
-            };
-        }
+        let deltas = self.revlog.chunks(delta_chain, target_size)?;
 
-        let data = if delta_chain.is_empty() {
-            entry.data_chunk()?
+        let (base_text, deltas) = if stopped {
+            (
+                stop_rev.as_ref().expect("last revision should be cached").1,
+                &deltas[..],
+            )
         } else {
-            Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
+            let (buf, deltas) = deltas.split_at(1);
+            (buf[0].as_ref(), deltas)
         };
 
-        Ok(data)
+        let size = self
+            .uncompressed_len()
+            .map(|l| l as usize)
+            .unwrap_or(base_text.len());
+        with_buffer(size, &mut |buf| {
+            Revlog::build_data_from_deltas(buf, base_text, deltas)?;
+            Ok(())
+        })?;
+        Ok(())
     }
 
     fn check_data(
@@ -1193,128 +701,35 @@
     }
 
     pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
-        let data = self.rawdata()?;
+        // TODO figure out if there is ever a need for `Cow` here anymore.
+        let mut data = CoreRevisionBuffer::new();
         if self.rev == NULL_REVISION {
-            return Ok(data);
+            return Ok(data.finish().into());
         }
+        self.rawdata(None, |size, f| {
+            // Pre-allocate the expected size (received from the index)
+            data.resize(size);
+            // Actually fill the buffer
+            f(&mut data)?;
+            Ok(())
+        })?;
         if self.is_censored() {
             return Err(HgError::CensoredNodeError.into());
         }
-        self.check_data(data)
-    }
-
-    /// Extract the data contained in the entry.
-    /// This may be a delta. (See `is_delta`.)
-    fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
-        if self.bytes.is_empty() {
-            return Ok(Cow::Borrowed(&[]));
-        }
-        match self.bytes[0] {
-            // Revision data is the entirety of the entry, including this
-            // header.
-            b'\0' => Ok(Cow::Borrowed(self.bytes)),
-            // Raw revision data follows.
-            b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
-            // zlib (RFC 1950) data.
-            b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
-            // zstd data.
-            b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
-            // A proper new format should have had a repo/store requirement.
-            format_type => Err(corrupted(format!(
-                "unknown compression header '{}'",
-                format_type
-            ))),
-        }
-    }
-
-    fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
-        let mut decoder = ZlibDecoder::new(self.bytes);
-        if self.is_delta() {
-            let mut buf = Vec::with_capacity(self.compressed_len as usize);
-            decoder
-                .read_to_end(&mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            Ok(buf)
-        } else {
-            let cap = self.uncompressed_len.max(0) as usize;
-            let mut buf = vec![0; cap];
-            decoder
-                .read_exact(&mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            Ok(buf)
-        }
+        self.check_data(data.finish().into())
     }
-
-    fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
-        let cap = self.uncompressed_len.max(0) as usize;
-        if self.is_delta() {
-            // [cap] is usually an over-estimate of the space needed because
-            // it's the length of delta-decoded data, but we're interested
-            // in the size of the delta.
-            // This means we have to [shrink_to_fit] to avoid holding on
-            // to a large chunk of memory, but it also means we must have a
-            // fallback branch, for the case when the delta is longer than
-            // the original data (surprisingly, this does happen in practice)
-            let mut buf = Vec::with_capacity(cap);
-            match zstd_decompress_to_buffer(self.bytes, &mut buf) {
-                Ok(_) => buf.shrink_to_fit(),
-                Err(_) => {
-                    buf.clear();
-                    zstd::stream::copy_decode(self.bytes, &mut buf)
-                        .map_err(|e| corrupted(e.to_string()))?;
-                }
-            };
-            Ok(buf)
-        } else {
-            let mut buf = Vec::with_capacity(cap);
-            let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            if len != self.uncompressed_len as usize {
-                Err(corrupted("uncompressed length does not match"))
-            } else {
-                Ok(buf)
-            }
-        }
-    }
-
-    /// Tell if the entry is a snapshot or a delta
-    /// (influences on decompression).
-    fn is_delta(&self) -> bool {
-        self.base_rev_or_base_of_delta_chain.is_some()
-    }
-}
-
-/// Calculate the hash of a revision given its data and its parents.
-fn hash(
-    data: &[u8],
-    p1_hash: &[u8],
-    p2_hash: &[u8],
-) -> [u8; NODE_BYTES_LENGTH] {
-    let mut hasher = Sha1::new();
-    let (a, b) = (p1_hash, p2_hash);
-    if a > b {
-        hasher.update(b);
-        hasher.update(a);
-    } else {
-        hasher.update(a);
-        hasher.update(b);
-    }
-    hasher.update(data);
-    *hasher.finalize().as_ref()
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::index::IndexEntryBuilder;
+    use crate::revlog::index::IndexEntryBuilder;
     use itertools::Itertools;
 
     #[test]
     fn test_empty() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
         std::fs::write(temp.path().join("foo.d"), b"").unwrap();
         let revlog =
@@ -1336,9 +751,7 @@
     #[test]
     fn test_inline() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
         let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
             .unwrap();
         let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
@@ -1405,9 +818,7 @@
     #[test]
     fn test_nodemap() {
         let temp = tempfile::tempdir().unwrap();
-        let vfs = VfsImpl {
-            base: temp.path().to_owned(),
-        };
+        let vfs = VfsImpl::new(temp.path().to_owned(), false);
 
         // building a revlog with a forced Node starting with zeros
         // This is a corruption, but it does not preclude using the nodemap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/options.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,421 @@
+//! Helpers for the revlog config and opening options
+
+use std::collections::HashSet;
+
+use crate::{
+    config::{Config, ResourceProfileValue},
+    errors::HgError,
+    requirements::{
+        CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT,
+        NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
+        SPARSEREVLOG_REQUIREMENT,
+    },
+};
+
+use super::{compression::CompressionConfig, RevlogType};
+
+const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536;
+const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50;
+const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144;
+
+/// The known revlog versions and their options
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum RevlogVersionOptions {
+    V0,
+    V1 { general_delta: bool, inline: bool },
+    V2,
+    ChangelogV2 { compute_rank: bool },
+}
+
+/// Options to govern how a revlog should be opened, usually from the
+/// repository configuration or requirements.
+#[derive(Debug, Copy, Clone)]
+pub struct RevlogOpenOptions {
+    /// The revlog version, along with any option specific to this version
+    pub version: RevlogVersionOptions,
+    /// Whether the revlog uses a persistent nodemap.
+    pub use_nodemap: bool,
+    pub delta_config: RevlogDeltaConfig,
+    pub data_config: RevlogDataConfig,
+    pub feature_config: RevlogFeatureConfig,
+}
+
+#[cfg(test)]
+impl Default for RevlogOpenOptions {
+    fn default() -> Self {
+        Self {
+            version: RevlogVersionOptions::V1 {
+                general_delta: true,
+                inline: false,
+            },
+            use_nodemap: true,
+            data_config: Default::default(),
+            delta_config: Default::default(),
+            feature_config: Default::default(),
+        }
+    }
+}
+
+impl RevlogOpenOptions {
+    pub fn new(
+        inline: bool,
+        data_config: RevlogDataConfig,
+        delta_config: RevlogDeltaConfig,
+        feature_config: RevlogFeatureConfig,
+    ) -> Self {
+        Self {
+            version: RevlogVersionOptions::V1 {
+                general_delta: data_config.general_delta,
+                inline,
+            },
+            use_nodemap: false,
+            data_config,
+            delta_config,
+            feature_config,
+        }
+    }
+
+    pub fn index_header(&self) -> super::index::IndexHeader {
+        super::index::IndexHeader {
+            header_bytes: match self.version {
+                RevlogVersionOptions::V0 => [0, 0, 0, 0],
+                RevlogVersionOptions::V1 {
+                    general_delta,
+                    inline,
+                } => [
+                    0,
+                    if general_delta && inline {
+                        3
+                    } else if general_delta {
+                        2
+                    } else {
+                        u8::from(inline)
+                    },
+                    0,
+                    1,
+                ],
+                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
+                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
+                    0xD34Du32.to_be_bytes()
+                }
+            },
+        }
+    }
+}
+
+/// Technically only Linux 2.5.46+ has `MAP_POPULATE` and only `2.6.23` on
+/// private mappings, but if you're using such ancient Linux, you have other
+/// problems.
+#[cfg(target_os = "linux")]
+const fn can_populate_mmap() -> bool {
+    true
+}
+
+/// There is a of populating mmaps for Windows, but it would need testing.
+#[cfg(not(target_os = "linux"))]
+const fn can_populate_mmap() {
+    false
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how the revlog data is read
+pub struct RevlogDataConfig {
+    /// Should we try to open the "pending" version of the revlog
+    pub try_pending: bool,
+    /// Should we try to open the "split" version of the revlog
+    pub try_split: bool,
+    /// When True, `indexfile` should be opened with `checkambig=True` at
+    /// writing time, to avoid file stat ambiguity
+    pub check_ambig: bool,
+    /// If true, use mmap instead of reading to deal with large indexes
+    pub mmap_large_index: bool,
+    /// How much data is considered large
+    pub mmap_index_threshold: Option<u64>,
+    /// How much data to read and cache into the raw revlog data cache
+    pub chunk_cache_size: u64,
+    /// The size of the uncompressed cache compared to the largest revision
+    /// seen
+    pub uncompressed_cache_factor: Option<f64>,
+    /// The number of chunks cached
+    pub uncompressed_cache_count: Option<u64>,
+    /// Allow sparse reading of the revlog data
+    pub with_sparse_read: bool,
+    /// Minimal density of a sparse read chunk
+    pub sr_density_threshold: f64,
+    /// Minimal size of the data we skip when performing sparse reads
+    pub sr_min_gap_size: u64,
+    /// Whether deltas are encoded against arbitrary bases
+    pub general_delta: bool,
+}
+
+impl RevlogDataConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        let mut data_config = Self::default();
+        if let Some(chunk_cache_size) =
+            config.get_byte_size(b"format", b"chunkcachesize")?
+        {
+            data_config.chunk_cache_size = chunk_cache_size;
+        }
+
+        let memory_profile = config.get_resource_profile(Some("memory"));
+        if memory_profile.value >= ResourceProfileValue::Medium {
+            data_config.uncompressed_cache_count = Some(10_000);
+            data_config.uncompressed_cache_factor = Some(4.0);
+            if memory_profile.value >= ResourceProfileValue::High {
+                data_config.uncompressed_cache_factor = Some(10.0)
+            }
+        }
+
+        // Use mmap if requested, or by default if we can fully populate it
+        let mmap_index = config
+            .get_option_no_default(b"storage", b"revlog.mmap.index")?
+            .unwrap_or(can_populate_mmap());
+        if mmap_index {
+            if let Some(mmap_index_threshold) = config.get_byte_size(
+                b"storage",
+                b"revlog.mmap.index:size-threshold",
+            )? {
+                // Only mmap if above the requested size threshold
+                data_config.mmap_index_threshold = Some(mmap_index_threshold);
+            }
+        }
+
+        if let Some(mmap_index_threshold) = config
+            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
+        {
+            data_config.mmap_index_threshold = Some(mmap_index_threshold);
+        }
+
+        let with_sparse_read =
+            config.get_bool(b"experimental", b"sparse-read")?;
+        if let Some(sr_density_threshold) = config
+            .get_f64(b"experimental", b"sparse-read.density-threshold")?
+        {
+            data_config.sr_density_threshold = sr_density_threshold;
+        }
+        data_config.with_sparse_read = with_sparse_read;
+        if let Some(sr_min_gap_size) = config
+            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
+        {
+            data_config.sr_min_gap_size = sr_min_gap_size;
+        }
+
+        data_config.with_sparse_read =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        Ok(data_config)
+    }
+}
+
+impl Default for RevlogDataConfig {
+    fn default() -> Self {
+        Self {
+            chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE,
+            sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD,
+            sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE,
+            try_pending: Default::default(),
+            try_split: Default::default(),
+            check_ambig: Default::default(),
+            mmap_large_index: Default::default(),
+            mmap_index_threshold: Default::default(),
+            uncompressed_cache_factor: Default::default(),
+            uncompressed_cache_count: Default::default(),
+            with_sparse_read: Default::default(),
+            general_delta: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how new deltas are computed.
+///
+/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
+/// each object self contained.
+pub struct RevlogDeltaConfig {
+    /// Whether deltas can be encoded against arbitrary bases
+    pub general_delta: bool,
+    /// Allow sparse writing of the revlog data
+    pub sparse_revlog: bool,
+    /// Maximum length of a delta chain
+    pub max_chain_len: Option<u64>,
+    /// Maximum distance between a delta chain's start and end
+    pub max_deltachain_span: Option<u64>,
+    /// If `upper_bound_comp` is not None, this is the expected maximal
+    /// gain from compression for the data content
+    pub upper_bound_comp: Option<f64>,
+    /// Should we try a delta against both parents
+    pub delta_both_parents: bool,
+    /// Test delta base candidate groups by chunks of this maximal size
+    pub candidate_group_chunk_size: u64,
+    /// Should we display debug information about delta computation
+    pub debug_delta: bool,
+    /// Trust incoming deltas by default
+    pub lazy_delta: bool,
+    /// Trust the base of incoming deltas by default
+    pub lazy_delta_base: bool,
+}
+
+impl RevlogDeltaConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+        revlog_type: RevlogType,
+    ) -> Result<Self, HgError> {
+        let mut delta_config = Self {
+            delta_both_parents: config
+                .get_option_no_default(
+                    b"storage",
+                    b"revlog.optimize-delta-parent-choice",
+                )?
+                .unwrap_or(true),
+            candidate_group_chunk_size: config
+                .get_u64(
+                    b"storage",
+                    b"revlog.delta-parent-search.candidate-group-chunk-size",
+                )?
+                .unwrap_or_default(),
+            ..Default::default()
+        };
+
+        delta_config.debug_delta =
+            config.get_bool(b"debug", b"revlog.debug-delta")?;
+
+        delta_config.general_delta =
+            requirements.contains(GENERALDELTA_REQUIREMENT);
+
+        let lazy_delta =
+            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
+
+        if revlog_type == RevlogType::Manifestlog {
+            // upper bound of what we expect from compression
+            // (real life value seems to be 3)
+            delta_config.upper_bound_comp = Some(3.0)
+        }
+
+        let mut lazy_delta_base = false;
+        if lazy_delta {
+            lazy_delta_base = match config.get_option_no_default(
+                b"storage",
+                b"revlog.reuse-external-delta-parent",
+            )? {
+                Some(base) => base,
+                None => config.get_bool(b"format", b"generaldelta")?,
+            };
+        }
+        delta_config.lazy_delta = lazy_delta;
+        delta_config.lazy_delta_base = lazy_delta_base;
+
+        delta_config.max_deltachain_span =
+            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
+                Some(span) => {
+                    if span < 0 {
+                        None
+                    } else {
+                        Some(span as u64)
+                    }
+                }
+                None => None,
+            };
+
+        delta_config.sparse_revlog =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        delta_config.max_chain_len =
+            config.get_byte_size_no_default(b"format", b"maxchainlen")?;
+
+        Ok(delta_config)
+    }
+}
+
+impl Default for RevlogDeltaConfig {
+    fn default() -> Self {
+        Self {
+            delta_both_parents: true,
+            lazy_delta: true,
+            general_delta: Default::default(),
+            sparse_revlog: Default::default(),
+            max_chain_len: Default::default(),
+            max_deltachain_span: Default::default(),
+            upper_bound_comp: Default::default(),
+            candidate_group_chunk_size: Default::default(),
+            debug_delta: Default::default(),
+            lazy_delta_base: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy, PartialEq)]
+/// Holds configuration values about the available revlog features
+pub struct RevlogFeatureConfig {
+    /// The compression engine and its options
+    pub compression_engine: CompressionConfig,
+    /// Can we use censor on this revlog
+    pub censorable: bool,
+    /// Does this revlog use the "side data" feature
+    pub has_side_data: bool,
+    /// Might remove this configuration once the rank computation has no
+    /// impact
+    pub compute_rank: bool,
+    /// Parent order is supposed to be semantically irrelevant, so we
+    /// normally re-sort parents to ensure that the first parent is non-null,
+    /// if there is a non-null parent at all.
+    /// filelog abuses the parent order as a flag to mark some instances of
+    /// meta-encoded files, so allow it to disable this behavior.
+    pub canonical_parent_order: bool,
+    /// Can ellipsis commit be used
+    pub enable_ellipsis: bool,
+}
+
+impl RevlogFeatureConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        Ok(Self {
+            compression_engine: CompressionConfig::new(config, requirements)?,
+            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
+            ..Default::default()
+        })
+    }
+}
+
+/// Return the default options for a revlog of `revlog_type` according to the
+/// current config and requirements.
+pub fn default_revlog_options(
+    config: &Config,
+    requirements: &HashSet<String>,
+    revlog_type: RevlogType,
+) -> Result<RevlogOpenOptions, HgError> {
+    let is_changelog = revlog_type == RevlogType::Changelog;
+    let version =
+        if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) {
+            let compute_rank = config
+                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
+            RevlogVersionOptions::ChangelogV2 { compute_rank }
+        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
+            RevlogVersionOptions::V2
+        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
+            RevlogVersionOptions::V1 {
+                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
+                inline: !is_changelog,
+            }
+        } else {
+            RevlogVersionOptions::V0
+        };
+    Ok(RevlogOpenOptions {
+        version,
+        // We don't need to dance around the slow path like in the Python
+        // implementation since we know we have access to the fast code.
+        use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
+        delta_config: RevlogDeltaConfig::new(
+            config,
+            requirements,
+            revlog_type,
+        )?,
+        data_config: RevlogDataConfig::new(config, requirements)?,
+        feature_config: RevlogFeatureConfig::new(config, requirements)?,
+    })
+}
--- a/rust/hg-core/src/revlog/patch.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/revlog/patch.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -1,5 +1,9 @@
 use byteorder::{BigEndian, ByteOrder};
 
+use crate::revlog::RevlogError;
+
+use super::inner_revlog::RevisionBuffer;
+
 /// A chunk of data to insert, delete or replace in a patch
 ///
 /// A chunk is:
@@ -61,14 +65,16 @@
 
 impl<'a> PatchList<'a> {
     /// Create a `PatchList` from bytes.
-    pub fn new(data: &'a [u8]) -> Self {
+    pub fn new(data: &'a [u8]) -> Result<Self, RevlogError> {
         let mut chunks = vec![];
         let mut data = data;
         while !data.is_empty() {
             let start = BigEndian::read_u32(&data[0..]);
             let end = BigEndian::read_u32(&data[4..]);
             let len = BigEndian::read_u32(&data[8..]);
-            assert!(start <= end);
+            if start > end {
+                return Err(RevlogError::corrupted("patch cannot be decoded"));
+            }
             chunks.push(Chunk {
                 start,
                 end,
@@ -76,29 +82,23 @@
             });
             data = &data[12 + (len as usize)..];
         }
-        PatchList { chunks }
-    }
-
-    /// Return the final length of data after patching
-    /// given its initial length .
-    fn size(&self, initial_size: i32) -> i32 {
-        self.chunks
-            .iter()
-            .fold(initial_size, |acc, chunk| acc + chunk.len_diff())
+        Ok(PatchList { chunks })
     }
 
     /// Apply the patch to some data.
-    pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
+    pub fn apply<T>(
+        &self,
+        buffer: &mut dyn RevisionBuffer<Target = T>,
+        initial: &[u8],
+    ) {
         let mut last: usize = 0;
-        let mut vec =
-            Vec::with_capacity(self.size(initial.len() as i32) as usize);
         for Chunk { start, end, data } in self.chunks.iter() {
-            vec.extend(&initial[last..(*start as usize)]);
-            vec.extend(data.iter());
+            let slice = &initial[last..(*start as usize)];
+            buffer.extend_from_slice(slice);
+            buffer.extend_from_slice(data);
             last = *end as usize;
         }
-        vec.extend(&initial[last..]);
-        vec
+        buffer.extend_from_slice(&initial[last..]);
     }
 
     /// Combine two patch lists into a single patch list.
@@ -229,6 +229,8 @@
 
 #[cfg(test)]
 mod tests {
+    use crate::revlog::inner_revlog::CoreRevisionBuffer;
+
     use super::*;
 
     struct PatchDataBuilder {
@@ -264,17 +266,18 @@
         let data = vec![0u8, 0u8, 0u8];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 1, &[1, 2]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(2, 4, &[3, 4]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -282,17 +285,18 @@
         let data = vec![0u8, 0u8, 0u8];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(2, 3, &[3]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 2, &[1, 2]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![0u8, 1, 2, 3]);
+        assert_eq!(buffer.finish(), vec![0u8, 1, 2, 3]);
     }
 
     #[test]
@@ -300,17 +304,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(1, 2, &[3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 4, &[1, 2, 3]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![0u8, 1, 2, 3]);
+        assert_eq!(buffer.finish(), vec![0u8, 1, 2, 3]);
     }
 
     #[test]
@@ -318,17 +323,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 1, &[1, 3]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 4, &[2, 3, 4]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -336,17 +342,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(1, 3, &[1, 3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(0, 2, &[1, 2]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -354,16 +361,17 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 3, &[1, 3, 3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 3, &[2, 3]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/transaction.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,11 @@
+use std::path::Path;
+
+/// The Mercurial transaction system is based on the append-only nature
+/// of its core files. This exposes the necessary methods to safely write to
+/// the different core datastructures.
+pub trait Transaction {
+    /// Record the state of an append-only file before update
+    fn add(&mut self, file: impl AsRef<Path>, offset: usize);
+
+    // TODO the rest of the methods once we do more in Rust.
+}
--- a/rust/hg-core/src/update.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/update.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -15,13 +15,14 @@
         dirstate_map::DirstateEntryReset, on_disk::write_tracked_key,
     },
     errors::{HgError, IoResultExt},
-    exit_codes,
-    filelog::Filelog,
-    narrow,
-    node::NULL_NODE,
+    exit_codes, narrow,
     operations::{list_rev_tracked_files, ExpandedManifestEntry},
     progress::Progress,
     repo::Repo,
+    revlog::filelog::Filelog,
+    revlog::node::NULL_NODE,
+    revlog::options::{default_revlog_options, RevlogOpenOptions},
+    revlog::RevlogError,
     sparse,
     utils::{
         cap_default_rayon_threads,
@@ -30,8 +31,7 @@
         path_auditor::PathAuditor,
     },
     vfs::{is_on_nfs_mount, VfsImpl},
-    DirstateParents, RevlogError, RevlogOpenOptions, UncheckedRevision,
-    INTERRUPT_RECEIVED,
+    DirstateParents, UncheckedRevision, INTERRUPT_RECEIVED,
 };
 use crossbeam_channel::{Receiver, Sender};
 use rayon::prelude::*;
@@ -93,7 +93,11 @@
         return Ok(0);
     }
     let store_vfs = &repo.store_vfs();
-    let options = repo.default_revlog_options(crate::RevlogType::Filelog)?;
+    let options = default_revlog_options(
+        repo.config(),
+        repo.requirements(),
+        crate::revlog::RevlogType::Filelog,
+    )?;
     let (errors_sender, errors_receiver) = crossbeam_channel::unbounded();
     let (files_sender, files_receiver) = crossbeam_channel::unbounded();
     let working_directory_path = &repo.working_directory_path();
@@ -149,7 +153,7 @@
 
 fn handle_revlog_error(e: RevlogError) -> HgError {
     match e {
-        crate::RevlogError::Other(hg_error) => hg_error,
+        crate::revlog::RevlogError::Other(hg_error) => hg_error,
         e => HgError::abort(
             format!("revlog error: {}", e),
             exit_codes::ABORT,
--- a/rust/hg-core/src/vfs.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-core/src/vfs.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -1,21 +1,75 @@
-use crate::errors::{HgError, IoErrorContext, IoResultExt};
+use crate::errors::{HgError, HgResultExt, IoErrorContext, IoResultExt};
 use crate::exit_codes;
+use crate::fncache::FnCache;
+use crate::revlog::path_encode::path_encode;
+use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
 use dyn_clone::DynClone;
+use format_bytes::format_bytes;
 use memmap2::{Mmap, MmapOptions};
-use std::fs::File;
-use std::io::{ErrorKind, Write};
-use std::os::unix::fs::MetadataExt;
+use rand::distributions::{Alphanumeric, DistString};
+use std::fs::{File, Metadata, OpenOptions};
+use std::io::{ErrorKind, Read, Seek, Write};
+use std::os::fd::AsRawFd;
+use std::os::unix::fs::{MetadataExt, PermissionsExt};
 use std::path::{Path, PathBuf};
+#[cfg(test)]
+use std::sync::atomic::AtomicUsize;
+#[cfg(test)]
+use std::sync::atomic::Ordering;
+use std::sync::OnceLock;
 
 /// Filesystem access abstraction for the contents of a given "base" diretory
 #[derive(Clone)]
 pub struct VfsImpl {
     pub(crate) base: PathBuf,
+    pub readonly: bool,
+    pub mode: Option<u32>,
 }
 
 struct FileNotFound(std::io::Error, PathBuf);
 
+/// Store the umask for the whole process since it's expensive to get.
+static UMASK: OnceLock<u32> = OnceLock::new();
+
+fn get_umask() -> u32 {
+    *UMASK.get_or_init(|| unsafe {
+        // TODO is there any way of getting the umask without temporarily
+        // setting it? Doesn't this affect all threads in this tiny window?
+        let mask = libc::umask(0);
+        libc::umask(mask);
+        mask & 0o777
+    })
+}
+
+/// Return the (unix) mode with which we will create/fix files
+fn get_mode(base: impl AsRef<Path>) -> Option<u32> {
+    match base.as_ref().metadata() {
+        Ok(meta) => {
+            // files in .hg/ will be created using this mode
+            let mode = meta.mode();
+            // avoid some useless chmods
+            if (0o777 & !get_umask()) == (0o777 & mode) {
+                None
+            } else {
+                Some(mode)
+            }
+        }
+        Err(_) => None,
+    }
+}
+
 impl VfsImpl {
+    pub fn new(base: PathBuf, readonly: bool) -> Self {
+        let mode = get_mode(&base);
+        Self {
+            base,
+            readonly,
+            mode,
+        }
+    }
+
+    // XXX these methods are probably redundant with VFS trait?
+
     pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
         self.base.join(relative_path)
     }
@@ -103,26 +157,6 @@
         }
     }
 
-    pub fn rename(
-        &self,
-        relative_from: impl AsRef<Path>,
-        relative_to: impl AsRef<Path>,
-    ) -> Result<(), HgError> {
-        let from = self.join(relative_from);
-        let to = self.join(relative_to);
-        std::fs::rename(&from, &to)
-            .with_context(|| IoErrorContext::RenamingFile { from, to })
-    }
-
-    pub fn remove_file(
-        &self,
-        relative_path: impl AsRef<Path>,
-    ) -> Result<(), HgError> {
-        let path = self.join(relative_path);
-        std::fs::remove_file(&path)
-            .with_context(|| IoErrorContext::RemovingFile(path))
-    }
-
     #[cfg(unix)]
     pub fn create_symlink(
         &self,
@@ -160,7 +194,7 @@
     path: impl AsRef<Path>,
 ) -> Result<Option<std::fs::Metadata>, HgError> {
     let path = path.as_ref();
-    match std::fs::metadata(path) {
+    match path.metadata() {
         Ok(meta) => Ok(Some(meta)),
         Err(error) => match error.kind() {
             // TODO: when we require a Rust version where `NotADirectory` is
@@ -174,6 +208,188 @@
     }
 }
 
+/// Abstraction over the files handled by a [`Vfs`].
+#[derive(Debug)]
+pub enum VfsFile {
+    Atomic(AtomicFile),
+
+    Normal {
+        file: File,
+        path: PathBuf,
+        /// If `Some`, check (and maybe fix) this file's timestamp ambiguity.
+        /// See [`is_filetime_ambiguous`].
+        check_ambig: Option<Metadata>,
+    },
+}
+
+impl VfsFile {
+    pub fn normal(file: File, path: PathBuf) -> Self {
+        Self::Normal {
+            file,
+            check_ambig: None,
+            path,
+        }
+    }
+    pub fn normal_check_ambig(
+        file: File,
+        path: PathBuf,
+    ) -> Result<Self, HgError> {
+        Ok(Self::Normal {
+            file,
+            check_ambig: Some(path.metadata().when_reading_file(&path)?),
+            path,
+        })
+    }
+    pub fn try_clone(&self) -> Result<VfsFile, HgError> {
+        Ok(match self {
+            VfsFile::Atomic(AtomicFile {
+                fp,
+                temp_path,
+                check_ambig,
+                target_name,
+                is_open,
+            }) => Self::Atomic(AtomicFile {
+                fp: fp.try_clone().when_reading_file(temp_path)?,
+                temp_path: temp_path.clone(),
+                check_ambig: *check_ambig,
+                target_name: target_name.clone(),
+                is_open: *is_open,
+            }),
+            VfsFile::Normal {
+                file,
+                check_ambig,
+                path,
+            } => Self::Normal {
+                file: file.try_clone().when_reading_file(path)?,
+                check_ambig: check_ambig.clone(),
+                path: path.to_owned(),
+            },
+        })
+    }
+    pub fn set_len(&self, len: u64) -> Result<(), std::io::Error> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.set_len(len),
+            VfsFile::Normal { file, .. } => file.set_len(len),
+        }
+    }
+
+    pub fn metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.metadata(),
+            VfsFile::Normal { file, .. } => file.metadata(),
+        }
+    }
+}
+
+impl AsRawFd for VfsFile {
+    fn as_raw_fd(&self) -> std::os::unix::prelude::RawFd {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.as_raw_fd(),
+            VfsFile::Normal { file, .. } => file.as_raw_fd(),
+        }
+    }
+}
+
+impl Seek for VfsFile {
+    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.seek(pos),
+            VfsFile::Normal { file, .. } => file.seek(pos),
+        }
+    }
+}
+
+impl Read for VfsFile {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.read(buf),
+            VfsFile::Normal { file, .. } => file.read(buf),
+        }
+    }
+}
+
+impl Write for VfsFile {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.write(buf),
+            VfsFile::Normal { file, .. } => file.write(buf),
+        }
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        match self {
+            VfsFile::Atomic(atomic_file) => atomic_file.fp.flush(),
+            VfsFile::Normal { file, .. } => file.flush(),
+        }
+    }
+}
+
+impl Drop for VfsFile {
+    fn drop(&mut self) {
+        if let VfsFile::Normal {
+            path,
+            check_ambig: Some(old),
+            ..
+        } = self
+        {
+            avoid_timestamp_ambiguity(path, old)
+        }
+    }
+}
+
+/// Records the number of times we've fixed a timestamp ambiguity, only
+/// applicable for tests.
+#[cfg(test)]
+static TIMESTAMP_FIXES_CALLS: AtomicUsize = AtomicUsize::new(0);
+
+fn avoid_timestamp_ambiguity(path: &Path, old: &Metadata) {
+    if let Ok(new) = path.metadata() {
+        let is_ambiguous = is_filetime_ambiguous(&new, old);
+        if is_ambiguous {
+            let advanced =
+                filetime::FileTime::from_unix_time(old.mtime() + 1, 0);
+            if filetime::set_file_times(path, advanced, advanced).is_ok() {
+                #[cfg(test)]
+                {
+                    TIMESTAMP_FIXES_CALLS.fetch_add(1, Ordering::Relaxed);
+                }
+            }
+        }
+    }
+}
+
+/// Examine whether new stat is ambiguous against old one
+///
+/// "S[N]" below means stat of a file at N-th change:
+///
+/// - S[n-1].ctime  < S[n].ctime: can detect change of a file
+/// - S[n-1].ctime == S[n].ctime
+///   - S[n-1].ctime  < S[n].mtime: means natural advancing (*1)
+///   - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
+///   - S[n-1].ctime  > S[n].mtime: never occurs naturally (don't care)
+/// - S[n-1].ctime  > S[n].ctime: never occurs naturally (don't care)
+///
+/// Case (*2) above means that a file was changed twice or more at
+/// same time in sec (= S[n-1].ctime), and comparison of timestamp
+/// is ambiguous.
+///
+/// Base idea to avoid such ambiguity is "advance mtime 1 sec, if
+/// timestamp is ambiguous".
+///
+/// But advancing mtime only in case (*2) doesn't work as
+/// expected, because naturally advanced S[n].mtime in case (*1)
+/// might be equal to manually advanced S[n-1 or earlier].mtime.
+///
+/// Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
+/// treated as ambiguous regardless of mtime, to avoid overlooking
+/// by confliction between such mtime.
+///
+/// Advancing mtime "if isambig(new, old)" ensures "S[n-1].mtime !=
+/// S[n].mtime", even if size of a file isn't changed.
+fn is_filetime_ambiguous(new: &Metadata, old: &Metadata) -> bool {
+    new.ctime() == old.ctime()
+}
+
 /// Writable file object that atomically updates a file
 ///
 /// All writes will go to a temporary copy of the original file. Call
@@ -181,6 +397,7 @@
 /// the temporary copy to the original name, making the changes
 /// visible. If the object is destroyed without being closed, all your
 /// writes are discarded.
+#[derive(Debug)]
 pub struct AtomicFile {
     /// The temporary file to write to
     fp: std::fs::File,
@@ -197,6 +414,48 @@
 
 impl AtomicFile {
     pub fn new(
+        target_path: impl AsRef<Path>,
+        empty: bool,
+        check_ambig: bool,
+    ) -> Result<Self, HgError> {
+        let target_path = target_path.as_ref().to_owned();
+
+        let random_id =
+            Alphanumeric.sample_string(&mut rand::thread_rng(), 12);
+        let filename =
+            target_path.file_name().expect("target has no filename");
+        let filename = get_bytes_from_path(filename);
+        let temp_filename =
+            format_bytes!(b".{}-{}~", filename, random_id.as_bytes());
+        let temp_path =
+            target_path.with_file_name(get_path_from_bytes(&temp_filename));
+
+        if !empty {
+            std::fs::copy(&target_path, &temp_path)
+                .with_context(|| IoErrorContext::CopyingFile {
+                    from: target_path.to_owned(),
+                    to: temp_path.to_owned(),
+                })
+                // If it doesn't exist, create it on open
+                .io_not_found_as_none()?;
+        }
+        let fp = std::fs::OpenOptions::new()
+            .write(true)
+            .create(true)
+            .truncate(empty)
+            .open(&temp_path)
+            .when_writing_file(&temp_path)?;
+
+        Ok(Self {
+            fp,
+            temp_path,
+            check_ambig,
+            target_name: target_path,
+            is_open: true,
+        })
+    }
+
+    pub fn from_file(
         fp: std::fs::File,
         check_ambig: bool,
         temp_name: PathBuf,
@@ -228,31 +487,30 @@
         self.fp.flush()?;
         let target = self.target();
         if self.check_ambig {
-            if let Ok(stat) = std::fs::metadata(&target) {
+            if let Ok(stat) = target.metadata() {
                 std::fs::rename(&self.temp_path, &target)?;
-                let new_stat = std::fs::metadata(&target)?;
-                let ctime = new_stat.ctime();
-                let is_ambiguous = ctime == stat.ctime();
-                if is_ambiguous {
-                    let advanced =
-                        filetime::FileTime::from_unix_time(ctime + 1, 0);
-                    filetime::set_file_times(target, advanced, advanced)?;
-                }
+                avoid_timestamp_ambiguity(&target, &stat);
             } else {
                 std::fs::rename(&self.temp_path, target)?;
             }
         } else {
-            std::fs::rename(&self.temp_path, target).unwrap();
+            std::fs::rename(&self.temp_path, target)?;
         }
         self.is_open = false;
         Ok(())
     }
 }
 
+impl Seek for AtomicFile {
+    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
+        self.fp.seek(pos)
+    }
+}
+
 impl Drop for AtomicFile {
     fn drop(&mut self) {
         if self.is_open {
-            std::fs::remove_file(self.target()).ok();
+            std::fs::remove_file(&self.temp_path).ok();
         }
     }
 }
@@ -260,58 +518,178 @@
 /// Abstracts over the VFS to allow for different implementations of the
 /// filesystem layer (like passing one from Python).
 pub trait Vfs: Sync + Send + DynClone {
-    fn open(&self, filename: &Path) -> Result<std::fs::File, HgError>;
-    fn open_read(&self, filename: &Path) -> Result<std::fs::File, HgError>;
-    fn open_check_ambig(
+    // TODO make `open` readonly and make `open_read` an `open_write`
+    /// Open a [`VfsFile::Normal`] for writing and reading the file at
+    /// `filename`, relative to this VFS's root.
+    fn open(&self, filename: &Path) -> Result<VfsFile, HgError>;
+    /// Open a [`VfsFile::Normal`] for reading the file at `filename`,
+    /// relative to this VFS's root.
+    fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError>;
+    /// Open a [`VfsFile::Normal`] for reading and writing the file at
+    /// `filename`, relative to this VFS's root. This file will be checked
+    /// for an ambiguous mtime on [`drop`]. See [`is_filetime_ambiguous`].
+    fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError>;
+    /// Create a [`VfsFile::Normal`] for reading and writing the file at
+    /// `filename`, relative to this VFS's root. If the file already exists,
+    /// it will be truncated to 0 bytes.
+    fn create(
         &self,
         filename: &Path,
-    ) -> Result<std::fs::File, HgError>;
-    fn create(&self, filename: &Path) -> Result<std::fs::File, HgError>;
-    /// Must truncate the new file if exist
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError>;
+    /// Create a [`VfsFile::Atomic`] for reading and writing the file at
+    /// `filename`, relative to this VFS's root. If the file already exists,
+    /// it will be truncated to 0 bytes.
     fn create_atomic(
         &self,
         filename: &Path,
         check_ambig: bool,
-    ) -> Result<AtomicFile, HgError>;
-    fn file_size(&self, file: &File) -> Result<u64, HgError>;
+    ) -> Result<VfsFile, HgError>;
+    /// Return the total file size in bytes of the open `file`. Errors are
+    /// usual IO errors (invalid file handle, permissions, etc.)
+    fn file_size(&self, file: &VfsFile) -> Result<u64, HgError>;
+    /// Return `true` if `filename` exists relative to this VFS's root. Errors
+    /// will coerce to `false`, to this also returns `false` if there are
+    /// IO problems. This is fine because any operation that actually tries
+    /// to do anything with this path will get the same error.
     fn exists(&self, filename: &Path) -> bool;
+    /// Remove the file at `filename` relative to this VFS's root. Errors
+    /// are the usual IO errors (lacking permission, file does not exist, etc.)
     fn unlink(&self, filename: &Path) -> Result<(), HgError>;
+    /// Rename the file `from` to `to`, both relative to this VFS's root.
+    /// Errors are the usual IO errors (lacking permission, file does not
+    /// exist, etc.). If `check_ambig` is `true`, the VFS will check for an
+    /// ambiguous mtime on rename. See [`is_filetime_ambiguous`].
     fn rename(
         &self,
         from: &Path,
         to: &Path,
         check_ambig: bool,
     ) -> Result<(), HgError>;
+    /// Rename the file `from` to `to`, both relative to this VFS's root.
+    /// Errors are the usual IO errors (lacking permission, file does not
+    /// exist, etc.). If `check_ambig` is passed, the VFS will check for an
+    /// ambiguous mtime on rename. See [`is_filetime_ambiguous`].
     fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError>;
+    /// Returns the absolute root path of this VFS, relative to which all
+    /// operations are done.
+    fn base(&self) -> &Path;
 }
 
 /// These methods will need to be implemented once `rhg` (and other) non-Python
 /// users of `hg-core` start doing more on their own, like writing to files.
 impl Vfs for VfsImpl {
-    fn open(&self, _filename: &Path) -> Result<std::fs::File, HgError> {
-        todo!()
+    fn open(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        // TODO auditpath
+        let path = self.base.join(filename);
+        copy_in_place_if_hardlink(&path)?;
+
+        Ok(VfsFile::normal(
+            OpenOptions::new()
+                .create(false)
+                .create_new(false)
+                .write(true)
+                .read(true)
+                .open(&path)
+                .when_writing_file(&path)?,
+            path.to_owned(),
+        ))
     }
-    fn open_read(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+
+    fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        // TODO auditpath
         let path = self.base.join(filename);
-        std::fs::File::open(&path).when_reading_file(&path)
+        Ok(VfsFile::normal(
+            std::fs::File::open(&path).when_reading_file(&path)?,
+            filename.to_owned(),
+        ))
     }
-    fn open_check_ambig(
+
+    fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+
+        let path = self.base.join(filename);
+        copy_in_place_if_hardlink(&path)?;
+
+        // TODO auditpath
+        VfsFile::normal_check_ambig(
+            OpenOptions::new()
+                .write(true)
+                .read(true) // Can be used for reading to save on `open` calls
+                .create(false)
+                .open(&path)
+                .when_reading_file(&path)?,
+            path.to_owned(),
+        )
+    }
+
+    fn create(
         &self,
-        _filename: &Path,
-    ) -> Result<std::fs::File, HgError> {
-        todo!()
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        // TODO auditpath
+        let path = self.base.join(filename);
+        let parent = path.parent().expect("file at root");
+        std::fs::create_dir_all(parent).when_writing_file(parent)?;
+
+        let file = OpenOptions::new()
+            .create(true)
+            .truncate(true)
+            .write(true)
+            .read(true)
+            .open(&path)
+            .when_writing_file(&path)?;
+
+        if let Some(mode) = self.mode {
+            // Creating the file with the right permission (with `.mode()`)
+            // may not work since umask takes effect for file creation.
+            // So we need to fix the permission after creating the file.
+            fix_directory_permissions(&self.base, &path, mode)?;
+            let perm = std::fs::Permissions::from_mode(mode & 0o666);
+            std::fs::set_permissions(&path, perm).when_writing_file(&path)?;
+        }
+
+        Ok(VfsFile::Normal {
+            file,
+            check_ambig: if check_ambig {
+                Some(path.metadata().when_reading_file(&path)?)
+            } else {
+                None
+            },
+            path: path.to_owned(),
+        })
     }
-    fn create(&self, _filename: &Path) -> Result<std::fs::File, HgError> {
-        todo!()
-    }
+
     fn create_atomic(
         &self,
         _filename: &Path,
         _check_ambig: bool,
-    ) -> Result<AtomicFile, HgError> {
+    ) -> Result<VfsFile, HgError> {
         todo!()
     }
-    fn file_size(&self, file: &File) -> Result<u64, HgError> {
+
+    fn file_size(&self, file: &VfsFile) -> Result<u64, HgError> {
         Ok(file
             .metadata()
             .map_err(|e| {
@@ -323,25 +701,268 @@
             })?
             .size())
     }
-    fn exists(&self, _filename: &Path) -> bool {
-        todo!()
+
+    fn exists(&self, filename: &Path) -> bool {
+        self.base.join(filename).exists()
     }
-    fn unlink(&self, _filename: &Path) -> Result<(), HgError> {
-        todo!()
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        let path = self.base.join(filename);
+        std::fs::remove_file(&path)
+            .with_context(|| IoErrorContext::RemovingFile(path))
     }
+
     fn rename(
         &self,
-        _from: &Path,
-        _to: &Path,
-        _check_ambig: bool,
+        from: &Path,
+        to: &Path,
+        check_ambig: bool,
+    ) -> Result<(), HgError> {
+        if self.readonly {
+            return Err(HgError::abort(
+                "write access in a readonly vfs",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        let old_stat = if check_ambig {
+            Some(
+                from.metadata()
+                    .when_reading_file(from)
+                    .io_not_found_as_none()?,
+            )
+        } else {
+            None
+        };
+        let from = self.base.join(from);
+        let to = self.base.join(to);
+        std::fs::rename(&from, &to).with_context(|| {
+            IoErrorContext::RenamingFile {
+                from,
+                to: to.to_owned(),
+            }
+        })?;
+        if let Some(Some(old)) = old_stat {
+            avoid_timestamp_ambiguity(&to, &old);
+        }
+        Ok(())
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        let from = self.base.join(from);
+        let to = self.base.join(to);
+        std::fs::copy(&from, &to)
+            .with_context(|| IoErrorContext::CopyingFile { from, to })
+            .map(|_| ())
+    }
+
+    fn base(&self) -> &Path {
+        &self.base
+    }
+}
+
+fn fix_directory_permissions(
+    base: &Path,
+    path: &Path,
+    mode: u32,
+) -> Result<(), HgError> {
+    let mut ancestors = path.ancestors();
+    ancestors.next(); // yields the path itself
+
+    for ancestor in ancestors {
+        if ancestor == base {
+            break;
+        }
+        let perm = std::fs::Permissions::from_mode(mode);
+        std::fs::set_permissions(ancestor, perm)
+            .when_writing_file(ancestor)?;
+    }
+    Ok(())
+}
+
+/// A VFS that understands the `fncache` store layout (file encoding), and
+/// adds new entries to the `fncache`.
+/// TODO Only works when using from Python for now.
+pub struct FnCacheVfs {
+    inner: VfsImpl,
+    fncache: Box<dyn FnCache>,
+}
+
+impl Clone for FnCacheVfs {
+    fn clone(&self) -> Self {
+        Self {
+            inner: self.inner.clone(),
+            fncache: dyn_clone::clone_box(&*self.fncache),
+        }
+    }
+}
+
+impl FnCacheVfs {
+    pub fn new(
+        base: PathBuf,
+        readonly: bool,
+        fncache: Box<dyn FnCache>,
+    ) -> Self {
+        let inner = VfsImpl::new(base, readonly);
+        Self { inner, fncache }
+    }
+
+    fn maybe_add_to_fncache(
+        &self,
+        filename: &Path,
+        encoded_path: &Path,
     ) -> Result<(), HgError> {
-        todo!()
+        let relevant_file = (filename.starts_with("data/")
+            || filename.starts_with("meta/"))
+            && is_revlog_file(filename);
+        if relevant_file {
+            let not_load = !self.fncache.is_loaded()
+                && (self.exists(filename)
+                    && self
+                        .inner
+                        .join(encoded_path)
+                        .metadata()
+                        .when_reading_file(encoded_path)?
+                        .size()
+                        != 0);
+            if !not_load {
+                self.fncache.add(filename);
+            }
+        };
+        Ok(())
+    }
+}
+
+impl Vfs for FnCacheVfs {
+    fn open(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let encoded_path = get_path_from_bytes(&encoded);
+        self.maybe_add_to_fncache(filename, encoded_path)?;
+        self.inner.open(encoded_path)
+    }
+
+    fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let filename = get_path_from_bytes(&encoded);
+        self.inner.open_read(filename)
+    }
+
+    fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let filename = get_path_from_bytes(&encoded);
+        self.inner.open_check_ambig(filename)
+    }
+
+    fn create(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let encoded_path = get_path_from_bytes(&encoded);
+        self.maybe_add_to_fncache(filename, encoded_path)?;
+        self.inner.create(encoded_path, check_ambig)
+    }
+
+    fn create_atomic(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let filename = get_path_from_bytes(&encoded);
+        self.inner.create_atomic(filename, check_ambig)
+    }
+
+    fn file_size(&self, file: &VfsFile) -> Result<u64, HgError> {
+        self.inner.file_size(file)
+    }
+
+    fn exists(&self, filename: &Path) -> bool {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let filename = get_path_from_bytes(&encoded);
+        self.inner.exists(filename)
     }
-    fn copy(&self, _from: &Path, _to: &Path) -> Result<(), HgError> {
-        todo!()
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        let encoded = path_encode(&get_bytes_from_path(filename));
+        let filename = get_path_from_bytes(&encoded);
+        self.inner.unlink(filename)
+    }
+
+    fn rename(
+        &self,
+        from: &Path,
+        to: &Path,
+        check_ambig: bool,
+    ) -> Result<(), HgError> {
+        let encoded = path_encode(&get_bytes_from_path(from));
+        let from = get_path_from_bytes(&encoded);
+        let encoded = path_encode(&get_bytes_from_path(to));
+        let to = get_path_from_bytes(&encoded);
+        self.inner.rename(from, to, check_ambig)
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        let encoded = path_encode(&get_bytes_from_path(from));
+        let from = get_path_from_bytes(&encoded);
+        let encoded = path_encode(&get_bytes_from_path(to));
+        let to = get_path_from_bytes(&encoded);
+        self.inner.copy(from, to)
+    }
+    fn base(&self) -> &Path {
+        self.inner.base()
     }
 }
 
+/// Detects whether `path` is a hardlink and does a tmp copy + rename erase
+/// to turn it into its own file. Revlogs are usually hardlinked when doing
+/// a local clone, and we don't want to modify the original repo.
+fn copy_in_place_if_hardlink(path: &Path) -> Result<(), HgError> {
+    let metadata = path.metadata().when_writing_file(path)?;
+    if metadata.nlink() > 0 {
+        // If it's hardlinked, copy it and rename it back before changing it.
+        let tmpdir = path.parent().expect("file at root");
+        let name = Alphanumeric.sample_string(&mut rand::thread_rng(), 16);
+        let tmpfile = tmpdir.join(name);
+        std::fs::create_dir_all(tmpfile.parent().expect("file at root"))
+            .with_context(|| IoErrorContext::CopyingFile {
+                from: path.to_owned(),
+                to: tmpfile.to_owned(),
+            })?;
+        std::fs::copy(path, &tmpfile).with_context(|| {
+            IoErrorContext::CopyingFile {
+                from: path.to_owned(),
+                to: tmpfile.to_owned(),
+            }
+        })?;
+        std::fs::rename(&tmpfile, path).with_context(|| {
+            IoErrorContext::RenamingFile {
+                from: tmpfile,
+                to: path.to_owned(),
+            }
+        })?;
+    }
+    Ok(())
+}
+
+pub fn is_revlog_file(path: impl AsRef<Path>) -> bool {
+    path.as_ref()
+        .extension()
+        .map(|ext| {
+            ["i", "idx", "d", "dat", "n", "nd", "sda"]
+                .contains(&ext.to_string_lossy().as_ref())
+        })
+        .unwrap_or(false)
+}
+
 pub(crate) fn is_dir(path: impl AsRef<Path>) -> Result<bool, HgError> {
     Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_dir()))
 }
@@ -380,3 +1001,131 @@
 pub(crate) fn is_on_nfs_mount(_path: impl AsRef<Path>) -> bool {
     false
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_atomic_file() {
+        let dir = tempfile::tempdir().unwrap().into_path();
+        let target_path = dir.join("sometargetname");
+
+        for empty in [true, false] {
+            let file = AtomicFile::new(&target_path, empty, false).unwrap();
+            assert!(file.is_open);
+            let filename =
+                file.temp_path.file_name().unwrap().to_str().unwrap();
+            // Make sure we have a coherent temp name
+            assert_eq!(filename.len(), 29, "{}", filename);
+            assert!(filename.contains("sometargetname"));
+
+            // Make sure the temp file is created in the same folder
+            assert_eq!(target_path.parent(), file.temp_path.parent());
+        }
+
+        assert!(!target_path.exists());
+        std::fs::write(&target_path, "version 1").unwrap();
+        let mut file = AtomicFile::new(&target_path, false, false).unwrap();
+        file.write_all(b"version 2!").unwrap();
+        assert_eq!(
+            std::fs::read(&target_path).unwrap(),
+            b"version 1".to_vec()
+        );
+        let temp_path = file.temp_path.to_owned();
+        // test that dropping the file should discard the temp file and not
+        // affect the target path.
+        drop(file);
+        assert_eq!(
+            std::fs::read(&target_path).unwrap(),
+            b"version 1".to_vec()
+        );
+        assert!(!temp_path.exists());
+
+        let mut file = AtomicFile::new(&target_path, false, false).unwrap();
+        file.write_all(b"version 2!").unwrap();
+        assert_eq!(
+            std::fs::read(&target_path).unwrap(),
+            b"version 1".to_vec()
+        );
+        file.close().unwrap();
+        assert_eq!(
+            std::fs::read(&target_path).unwrap(),
+            b"version 2!".to_vec(),
+            "{}",
+            std::fs::read_to_string(&target_path).unwrap()
+        );
+        assert!(target_path.exists());
+        assert!(!temp_path.exists());
+    }
+
+    #[test]
+    fn test_vfs_file_check_ambig() {
+        let dir = tempfile::tempdir().unwrap().into_path();
+        let file_path = dir.join("file");
+
+        fn vfs_file_write(file_path: &Path, check_ambig: bool) {
+            let file = std::fs::OpenOptions::new()
+                .write(true)
+                .open(file_path)
+                .unwrap();
+            let old_stat = if check_ambig {
+                Some(file.metadata().unwrap())
+            } else {
+                None
+            };
+
+            let mut vfs_file = VfsFile::Normal {
+                file,
+                path: file_path.to_owned(),
+                check_ambig: old_stat,
+            };
+            vfs_file.write_all(b"contents").unwrap();
+        }
+
+        std::fs::OpenOptions::new()
+            .write(true)
+            .create(true)
+            .truncate(false)
+            .open(&file_path)
+            .unwrap();
+
+        let number_of_writes = 3;
+
+        // Try multiple times, because reproduction of an ambiguity depends
+        // on "filesystem time"
+        for _ in 0..5 {
+            TIMESTAMP_FIXES_CALLS.store(0, Ordering::Relaxed);
+            vfs_file_write(&file_path, false);
+            let old_stat = file_path.metadata().unwrap();
+            if old_stat.ctime() != old_stat.mtime() {
+                // subsequent changing never causes ambiguity
+                continue;
+            }
+
+            // Repeat atomic write with `check_ambig == true`, to examine
+            // whether the mtime is advanced multiple times as expected
+            for _ in 0..number_of_writes {
+                vfs_file_write(&file_path, true);
+            }
+            let new_stat = file_path.metadata().unwrap();
+            if !is_filetime_ambiguous(&new_stat, &old_stat) {
+                // timestamp ambiguity was naturally avoided while repetition
+                continue;
+            }
+
+            assert_eq!(
+                TIMESTAMP_FIXES_CALLS.load(Ordering::Relaxed),
+                number_of_writes
+            );
+            assert_eq!(
+                old_stat.mtime() + number_of_writes as i64,
+                file_path.metadata().unwrap().mtime()
+            );
+            break;
+        }
+        // If we've arrived here without breaking, we might not have
+        // tested anything because the platform is too slow. This test will
+        // still work on fast platforms.
+    }
+}
--- a/rust/hg-cpython/Cargo.toml	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-cpython/Cargo.toml	Wed Nov 20 15:53:19 2024 +0100
@@ -17,3 +17,5 @@
 env_logger = "0.9.3"
 stable_deref_trait = "1.2.0"
 vcsgraph = "0.2.0"
+logging_timer = "1.1.0"
+python3-sys = "0.7.1"
--- a/rust/hg-cpython/src/conversion.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-cpython/src/conversion.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -9,7 +9,7 @@
 //! `hg-core` crate. From Python, this will be seen as `rustext.ancestor`
 
 use cpython::{ObjectProtocol, PyErr, PyObject, PyResult, Python};
-use hg::{Revision, RevlogIndex, UncheckedRevision};
+use hg::{revlog::RevlogIndex, Revision, UncheckedRevision};
 
 use crate::{exceptions::GraphError, PyRevision};
 
--- a/rust/hg-cpython/src/lib.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-cpython/src/lib.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -47,6 +47,7 @@
 pub mod revlog;
 pub mod update;
 pub mod utils;
+pub mod vfs;
 
 /// Revision as exposed to/from the Python layer.
 ///
--- a/rust/hg-cpython/src/pybytes_deref.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-cpython/src/pybytes_deref.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -1,4 +1,7 @@
-use cpython::{PyBytes, Python};
+use crate::cpython::buffer::Element;
+use cpython::{
+    buffer::PyBuffer, exc::ValueError, PyBytes, PyErr, PyResult, Python,
+};
 use stable_deref_trait::StableDeref;
 
 /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice
@@ -55,3 +58,67 @@
 // but here sending one to another thread is fine since we ensure it stays
 // valid.
 unsafe impl Send for PyBytesDeref {}
+
+///
+/// It also enables using a (wrapped) `PyBuffer` in GIL-unaware generic code.
+pub struct PyBufferDeref {
+    #[allow(unused)]
+    keep_alive: PyBuffer,
+
+    /// Borrows the buffer inside `self.keep_alive`,
+    /// but the borrow-checker cannot express self-referential structs.
+    data: *const [u8],
+}
+
+fn get_buffer<'a>(py: Python, buf: &'a PyBuffer) -> PyResult<&'a [u8]> {
+    let len = buf.item_count();
+
+    let cbuf = buf.buf_ptr();
+    let has_correct_item_size = std::mem::size_of::<u8>() == buf.item_size();
+    let is_valid_buffer = has_correct_item_size
+        && buf.is_c_contiguous()
+        && u8::is_compatible_format(buf.format())
+        && buf.readonly();
+
+    let bytes = if is_valid_buffer {
+        unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
+    } else {
+        return Err(PyErr::new::<ValueError, _>(
+            py,
+            "Buffer has an invalid memory representation",
+        ));
+    };
+    Ok(bytes)
+}
+
+impl PyBufferDeref {
+    pub fn new(py: Python, buf: PyBuffer) -> PyResult<Self> {
+        Ok(Self {
+            data: get_buffer(py, &buf)?,
+            keep_alive: buf,
+        })
+    }
+}
+
+impl std::ops::Deref for PyBufferDeref {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        // Safety: the raw pointer is valid as long as the PyBuffer is still
+        // alive, and the returned slice borrows `self`.
+        unsafe { &*self.data }
+    }
+}
+
+unsafe impl StableDeref for PyBufferDeref {}
+
+#[allow(unused)]
+fn static_assert_pybuffer_is_send() {
+    #[allow(clippy::no_effect)]
+    require_send::<PyBuffer>;
+}
+
+// Safety: PyBuffer is Send. Raw pointers are not by default,
+// but here sending one to another thread is fine since we ensure it stays
+// valid.
+unsafe impl Send for PyBufferDeref {}
--- a/rust/hg-cpython/src/revlog.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/hg-cpython/src/revlog.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -4,9 +4,11 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
+#![allow(non_snake_case)]
 
 use crate::{
     conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
+    pybytes_deref::{PyBufferDeref, PyBytesDeref},
     utils::{node_from_py_bytes, node_from_py_object},
     PyRevision,
 };
@@ -14,39 +16,54 @@
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
     ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
-    PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
+    PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
     PythonObject, ToPyObject, UnsafePyLeaked,
 };
 use hg::{
     errors::HgError,
-    index::{
-        IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
-        INDEX_ENTRY_SIZE,
+    fncache::FnCache,
+    revlog::{
+        compression::CompressionConfig,
+        index::{
+            Index, IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
+            INDEX_ENTRY_SIZE,
+        },
+        inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer},
+        nodemap::{Block, NodeMap, NodeMapError, NodeTree as CoreNodeTree},
+        options::{
+            RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+            RevlogOpenOptions,
+        },
+        Graph, NodePrefix, RevlogError, RevlogIndex, RevlogType,
     },
-    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
-    revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
+    transaction::Transaction,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    vfs::FnCacheVfs,
     BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
 };
 use std::{
-    cell::RefCell,
+    cell::{Cell, RefCell},
     collections::{HashMap, HashSet},
+    sync::atomic::{AtomicBool, AtomicUsize, Ordering},
+    sync::OnceLock,
 };
 use vcsgraph::graph::Graph as VCSGraph;
 
 pub struct PySharedIndex {
     /// The underlying hg-core index
-    pub(crate) inner: &'static hg::index::Index,
+    pub(crate) inner: &'static Index,
 }
 
 /// Return a Struct implementing the Graph trait
 pub(crate) fn py_rust_index_to_graph(
     py: Python,
-    index: PyObject,
+    index_proxy: PyObject,
 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
-    let midx = index.extract::<Index>(py)?;
-    let leaked = midx.index(py).leak_immutable();
+    let inner_revlog = index_proxy.getattr(py, "inner")?;
+    let inner_revlog = inner_revlog.extract::<InnerRevlog>(py)?;
+    let leaked = inner_revlog.inner(py).leak_immutable();
     // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
-    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
+    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) })
 }
 
 impl Clone for PySharedIndex {
@@ -91,398 +108,6 @@
     }
 }
 
-py_class!(pub class Index |py| {
-    @shared data index: hg::index::Index;
-    data nt: RefCell<Option<CoreNodeTree>>;
-    data docket: RefCell<Option<PyObject>>;
-    // Holds a reference to the mmap'ed persistent nodemap data
-    data nodemap_mmap: RefCell<Option<PyBuffer>>;
-    // Holds a reference to the mmap'ed persistent index data
-    data index_mmap: RefCell<Option<PyBuffer>>;
-    data head_revs_py_list: RefCell<Option<PyList>>;
-    data head_node_ids_py_list: RefCell<Option<PyList>>;
-
-    def __new__(
-        _cls,
-        data: PyObject,
-        default_header: u32,
-    ) -> PyResult<Self> {
-        Self::new(py, data, default_header)
-    }
-
-    /// Compatibility layer used for Python consumers needing access to the C index
-    ///
-    /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
-    /// that may need to build a custom `nodetree`, based on a specified revset.
-    /// With a Rust implementation of the nodemap, we will be able to get rid of
-    /// this, by exposing our own standalone nodemap class,
-    /// ready to accept `Index`.
-/*    def get_cindex(&self) -> PyResult<PyObject> {
-        Ok(self.cindex(py).borrow().inner().clone_ref(py))
-    }
-*/
-    // Index API involving nodemap, as defined in mercurial/pure/parsers.py
-
-    /// Return Revision if found, raises a bare `error.RevlogError`
-    /// in case of ambiguity, same as C version does
-    def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let ridx = &*self.index(py).borrow();
-        let node = node_from_py_bytes(py, &node)?;
-        let rust_rev =
-            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
-        Ok(rust_rev.map(Into::into))
-
-    }
-
-    /// same as `get_rev()` but raises a bare `error.RevlogError` if node
-    /// is not found.
-    ///
-    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
-    /// will catch and rewrap with it
-    def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
-        self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
-    }
-
-    /// return True if the node exist in the index
-    def has_node(&self, node: PyBytes) -> PyResult<bool> {
-        // TODO OPTIM we could avoid a needless conversion here,
-        // to do when scaffolding for pure Rust switch is removed,
-        // as `get_rev()` currently does the necessary assertions
-        self.get_rev(py, node).map(|opt| opt.is_some())
-    }
-
-    /// find length of shortest hex nodeid of a binary ID
-    def shortest(&self, node: PyBytes) -> PyResult<usize> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
-        {
-            Ok(Some(l)) => Ok(l),
-            Ok(None) => Err(revlog_error(py)),
-            Err(e) => Err(nodemap_error(py, e)),
-        }
-    }
-
-    def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-
-        let node_as_string = if cfg!(feature = "python3-sys") {
-            node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
-        }
-        else {
-            let node = node.extract::<PyBytes>(py)?;
-            String::from_utf8_lossy(node.data(py)).to_string()
-        };
-
-        let prefix = NodePrefix::from_hex(&node_as_string)
-            .map_err(|_| PyErr::new::<ValueError, _>(
-                py, format!("Invalid node or prefix '{}'", node_as_string))
-            )?;
-
-        nt.find_bin(idx, prefix)
-            // TODO make an inner API returning the node directly
-            .map(|opt| opt.map(
-                |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
-            .map_err(|e| nodemap_error(py, e))
-
-    }
-
-    /// append an index entry
-    def append(&self, tup: PyTuple) -> PyResult<PyObject> {
-        if tup.len(py) < 8 {
-            // this is better than the panic promised by tup.get_item()
-            return Err(
-                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
-        }
-        let node_bytes = tup.get_item(py, 7).extract(py)?;
-        let node = node_from_py_object(py, &node_bytes)?;
-
-        let rev = self.len(py)? as BaseRevision;
-
-        // This is ok since we will just add the revision to the index
-        let rev = Revision(rev);
-        self.index(py)
-            .borrow_mut()
-            .append(py_tuple_to_revision_data_params(py, tup)?)
-            .unwrap();
-        let idx = &*self.index(py).borrow();
-        self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
-            .insert(idx, &node, rev)
-            .map_err(|e| nodemap_error(py, e))?;
-        Ok(py.None())
-    }
-
-    def __delitem__(&self, key: PyObject) -> PyResult<()> {
-        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
-        let start = if let Ok(rev) = key.extract(py) {
-            UncheckedRevision(rev)
-        } else {
-            let start = key.getattr(py, "start")?;
-            UncheckedRevision(start.extract(py)?)
-        };
-        let start = self.index(py)
-            .borrow()
-            .check_revision(start)
-            .ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
-            })?;
-        self.index(py).borrow_mut().remove(start).unwrap();
-        let mut opt = self.get_nodetree(py)?.borrow_mut();
-        let nt = opt.as_mut().unwrap();
-        nt.invalidate_all();
-        self.fill_nodemap(py, nt)?;
-        Ok(())
-    }
-
-    //
-    // Index methods previously reforwarded to C index (tp_methods)
-    // Same ordering as in revlog.c
-    //
-
-    /// return the gca set of the given revs
-    def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_ancestors(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// return the heads of the common ancestors of the given revs
-    def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_commonancestorsheads(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// Clear the index caches and inner py_class data.
-    /// It is Python's responsibility to call `update_nodemap_data` again.
-    def clearcaches(&self) -> PyResult<PyObject> {
-        self.nt(py).borrow_mut().take();
-        self.docket(py).borrow_mut().take();
-        self.nodemap_mmap(py).borrow_mut().take();
-        self.head_revs_py_list(py).borrow_mut().take();
-        self.head_node_ids_py_list(py).borrow_mut().take();
-        self.index(py).borrow().clear_caches();
-        Ok(py.None())
-    }
-
-    /// return the raw binary string representing a revision
-    def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
-        let rust_bytes = rindex.check_revision(rev).and_then(
-            |r| rindex.entry_binary(r))
-            .ok_or_else(|| rev_not_in_index(py, rev))?;
-        let rust_res = PyBytes::new(py, rust_bytes).into_object();
-        Ok(rust_res)
-    }
-
-    /// return a binary packed version of the header
-    def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
-        let rust_res = PyBytes::new(py, &packed).into_object();
-        Ok(rust_res)
-    }
-
-    /// compute phases
-    def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
-        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
-        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
-        Ok(rust_res)
-    }
-
-    /// reachableroots
-    def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_reachableroots2(
-            py,
-            UncheckedRevision(args.get_item(py, 0).extract(py)?),
-            args.get_item(py, 1),
-            args.get_item(py, 2),
-            args.get_item(py, 3).extract(py)?,
-        )?;
-        Ok(rust_res)
-    }
-
-    /// get head revisions
-    def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
-        let (filtered_revs, stop_rev) = match &args.len(py) {
-             0 => Ok((py.None(), py.None())),
-             1 => Ok((args.get_item(py, 0), py.None())),
-             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
-             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
-        }?;
-        self.inner_headrevs(py, &filtered_revs, &stop_rev)
-    }
-
-    /// get head nodeids
-    def head_node_ids(&self) -> PyResult<PyObject> {
-        let rust_res = self.inner_head_node_ids(py)?;
-        Ok(rust_res)
-    }
-
-    /// get diff in head revisions
-    def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_headrevsdiff(
-          py,
-          &args.get_item(py, 0),
-          &args.get_item(py, 1))?;
-        Ok(rust_res)
-    }
-
-    /// True if the object is a snapshot
-    def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
-        let index = self.index(py).borrow();
-        let result = index
-            .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
-            .map_err(|e| {
-                PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-            })?;
-        Ok(result)
-    }
-
-    /// Gather snapshot data in a cache dict
-    def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let cache: PyDict = args.get_item(py, 0).extract(py)?;
-        // this methods operates by setting new values in the cache,
-        // hence we will compare results by letting the C implementation
-        // operate over a deepcopy of the cache, and finally compare both
-        // caches.
-        let c_cache = PyDict::new(py);
-        for (k, v) in cache.items(py) {
-            c_cache.set_item(py, k, PySet::new(py, v)?)?;
-        }
-
-        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
-        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
-        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
-        index.find_snapshots(
-            start_rev,
-            end_rev,
-            &mut cache_wrapper,
-        ).map_err(|_| revlog_error(py))?;
-        Ok(py.None())
-    }
-
-    /// determine revisions with deltas to reconstruct fulltext
-    def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
-        let stop_rev =
-            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
-        let rev = index.check_revision(rev).ok_or_else(|| {
-            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
-        })?;
-        let stop_rev = if let Some(stop_rev) = stop_rev {
-            let stop_rev = UncheckedRevision(stop_rev);
-            Some(index.check_revision(stop_rev).ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
-            })?)
-        } else {None};
-        let using_general_delta = args.get_item(py, 2)
-            .extract::<Option<u32>>(py)?
-            .map(|i| i != 0);
-        let (chain, stopped) = index.delta_chain(
-            rev, stop_rev, using_general_delta
-        ).map_err(|e| {
-            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-        })?;
-
-        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
-        Ok(
-            PyTuple::new(
-                py,
-                &[
-                    chain.into_py_object(py).into_object(),
-                    stopped.into_py_object(py).into_object()
-                ]
-            ).into_object()
-        )
-
-    }
-
-    /// slice planned chunk read to reach a density threshold
-    def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_slicechunktodensity(
-            py,
-            args.get_item(py, 0),
-            args.get_item(py, 1).extract(py)?,
-            args.get_item(py, 2).extract(py)?
-        )?;
-        Ok(rust_res)
-    }
-
-    // index_sequence_methods and index_mapping_methods.
-    //
-    // Since we call back through the high level Python API,
-    // there's no point making a distinction between index_get
-    // and index_getitem.
-    // gracinet 2023: this above is no longer true for the pure Rust impl
-
-    def __len__(&self) -> PyResult<usize> {
-        self.len(py)
-    }
-
-    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
-        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
-        Ok(rust_res)
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        // ObjectProtocol does not seem to provide contains(), so
-        // this is an equivalent implementation of the index_contains()
-        // defined in revlog.c
-        match item.extract::<i32>(py) {
-            Ok(rev) => {
-                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
-            }
-            Err(_) => {
-                let item_bytes: PyBytes = item.extract(py)?;
-                let rust_res = self.has_node(py, item_bytes)?;
-                Ok(rust_res)
-            }
-        }
-    }
-
-    def nodemap_data_all(&self) -> PyResult<PyBytes> {
-        self.inner_nodemap_data_all(py)
-    }
-
-    def nodemap_data_incremental(&self) -> PyResult<PyObject> {
-        self.inner_nodemap_data_incremental(py)
-    }
-    def update_nodemap_data(
-        &self,
-        docket: PyObject,
-        nm_data: PyObject
-    ) -> PyResult<PyObject> {
-        self.inner_update_nodemap_data(py, docket, nm_data)
-    }
-
-    @property
-    def entry_size(&self) -> PyResult<PyInt> {
-        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def rust_ext_compat(&self) -> PyResult<PyInt> {
-        // will be entirely removed when the Rust index yet useful to
-        // implement in Rust to detangle things when removing `self.cindex`
-        let rust_res: PyInt = 1.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def is_rust(&self) -> PyResult<PyBool> {
-        Ok(false.to_py_object(py))
-    }
-
-});
-
 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
 /// buffer along with the Rust slice into said buffer. We need to keep the
 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
@@ -513,8 +138,7 @@
     } else {
         return Err(PyErr::new::<ValueError, _>(
             py,
-            "Nodemap data buffer has an invalid memory representation"
-                .to_string(),
+            "buffer has an invalid memory representation".to_string(),
         ));
     };
 
@@ -538,7 +162,7 @@
         .extract::<PyBytes>(py)?
         .data(py)
         .try_into()
-        .unwrap();
+        .expect("nodeid should be set");
     let flags = (offset_or_flags & 0xFFFF) as u16;
     let data_offset = offset_or_flags >> 16;
     Ok(RevisionDataParams {
@@ -622,36 +246,1252 @@
     }
 }
 
-impl Index {
-    fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
-        // Safety: we keep the buffer around inside the class as `index_mmap`
-        let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
+// There are no static generics in Rust (because their implementation is hard,
+// I'm guessing it's due to different compilation stages, etc.).
+// So manually generate all three caches and use them in `with_filelog_cache`.
+static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> =
+    OnceLock::new();
+static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> =
+    OnceLock::new();
+static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> =
+    OnceLock::new();
+
+/// Cache the first conversion from Python -> Rust config for all filelogs to
+/// save on conversion time when called in a loop.
+fn with_filelog_cache<T: Copy>(
+    py: Python,
+    py_config: &PyObject,
+    revlog_type: RevlogType,
+    cache: &OnceLock<(PyObject, T)>,
+    callback: impl Fn() -> PyResult<T>,
+) -> PyResult<T> {
+    let mut was_cached = false;
+    if revlog_type == RevlogType::Filelog {
+        if let Some((cached_py_config, rust_config)) = cache.get() {
+            was_cached = true;
+            // All filelogs in a given repository *most likely* have the
+            // exact same config, but it's not impossible that some extensions
+            // do some magic with configs or that this code will be used
+            // for longer-running processes. So compare the source `PyObject`
+            // in case the source changed, at the cost of some overhead.
+            // We can't use `py_config.eq(cached_py_config)` because all config
+            // objects are different in Python and `a is b` is false.
+            if py_config.compare(py, cached_py_config)?.is_eq() {
+                return Ok(*rust_config);
+            }
+        }
+    }
+    let config = callback()?;
+    // Do not call the lock unnecessarily if it's already been set.
+    if !was_cached && revlog_type == RevlogType::Filelog {
+        cache.set((py_config.clone_ref(py), config)).ok();
+    }
+    Ok(config)
+}
+
+fn extract_delta_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDeltaConfig> {
+    let get_delta_config = || {
+        let max_deltachain_span = py_config
+            .getattr(py, "max_deltachain_span")?
+            .extract::<i64>(py)?;
+
+        let revlog_delta_config = RevlogDeltaConfig {
+            general_delta: py_config
+                .getattr(py, "general_delta")?
+                .extract(py)?,
+            sparse_revlog: py_config
+                .getattr(py, "sparse_revlog")?
+                .extract(py)?,
+            max_chain_len: py_config
+                .getattr(py, "max_chain_len")?
+                .extract(py)?,
+            max_deltachain_span: if max_deltachain_span < 0 {
+                None
+            } else {
+                Some(max_deltachain_span as u64)
+            },
+            upper_bound_comp: py_config
+                .getattr(py, "upper_bound_comp")?
+                .extract(py)?,
+            delta_both_parents: py_config
+                .getattr(py, "delta_both_parents")?
+                .extract(py)?,
+            candidate_group_chunk_size: py_config
+                .getattr(py, "candidate_group_chunk_size")?
+                .extract(py)?,
+            debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?,
+            lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?,
+            lazy_delta_base: py_config
+                .getattr(py, "lazy_delta_base")?
+                .extract(py)?,
+        };
+        Ok(revlog_delta_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DELTA_CONFIG_CACHE,
+        get_delta_config,
+    )
+}
+
+fn extract_data_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDataConfig> {
+    let get_data_config = || {
+        Ok(RevlogDataConfig {
+            try_pending: py_config.getattr(py, "try_pending")?.extract(py)?,
+            try_split: py_config.getattr(py, "try_split")?.extract(py)?,
+            check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?,
+            mmap_large_index: py_config
+                .getattr(py, "mmap_large_index")?
+                .extract(py)?,
+            mmap_index_threshold: py_config
+                .getattr(py, "mmap_index_threshold")?
+                .extract(py)?,
+            chunk_cache_size: py_config
+                .getattr(py, "chunk_cache_size")?
+                .extract(py)?,
+            uncompressed_cache_factor: py_config
+                .getattr(py, "uncompressed_cache_factor")?
+                .extract(py)?,
+            uncompressed_cache_count: py_config
+                .getattr(py, "uncompressed_cache_count")?
+                .extract(py)?,
+            with_sparse_read: py_config
+                .getattr(py, "with_sparse_read")?
+                .extract(py)?,
+            sr_density_threshold: py_config
+                .getattr(py, "sr_density_threshold")?
+                .extract(py)?,
+            sr_min_gap_size: py_config
+                .getattr(py, "sr_min_gap_size")?
+                .extract(py)?,
+            general_delta: py_config
+                .getattr(py, "generaldelta")?
+                .extract(py)?,
+        })
+    };
+
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DATA_CONFIG_CACHE,
+        get_data_config,
+    )
+}
+
+fn extract_feature_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogFeatureConfig> {
+    let get_feature_config = || {
+        let engine_bytes = &py_config
+            .getattr(py, "compression_engine")?
+            .extract::<PyBytes>(py)?;
+        let compression_engine = engine_bytes.data(py);
+        let compression_engine = match compression_engine {
+            b"zlib" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zlib_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zlib.level"[..]));
+                let level = if let Some(level) = zlib_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    None
+                };
+                let mut engine = CompressionConfig::default();
+                if let Some(level) = level {
+                    engine
+                        .set_level(level)
+                        .expect("invalid compression level from Python");
+                }
+                engine
+            }
+            b"zstd" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zstd_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zstd.level"[..]));
+                let level = if let Some(level) = zstd_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    let level = compression_options
+                        .get_item(py, PyBytes::new(py, &b"level"[..]));
+                    if let Some(level) = level {
+                        if level.is_none(py) {
+                            None
+                        } else {
+                            Some(level.extract(py)?)
+                        }
+                    } else {
+                        None
+                    }
+                };
+                CompressionConfig::zstd(level)
+                    .expect("invalid compression level from Python")
+            }
+            b"none" => CompressionConfig::None,
+            e => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    format!(
+                        "invalid compression engine {}",
+                        String::from_utf8_lossy(e)
+                    ),
+                ))
+            }
+        };
+        let revlog_feature_config = RevlogFeatureConfig {
+            compression_engine,
+            censorable: py_config.getattr(py, "censorable")?.extract(py)?,
+            has_side_data: py_config
+                .getattr(py, "has_side_data")?
+                .extract(py)?,
+            compute_rank: py_config
+                .getattr(py, "compute_rank")?
+                .extract(py)?,
+            canonical_parent_order: py_config
+                .getattr(py, "canonical_parent_order")?
+                .extract(py)?,
+            enable_ellipsis: py_config
+                .getattr(py, "enable_ellipsis")?
+                .extract(py)?,
+        };
+        Ok(revlog_feature_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &FEATURE_CONFIG_CACHE,
+        get_feature_config,
+    )
+}
+
+fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr {
+    let msg = e.to_string();
 
-        Self::create_instance(
+    match py
+        .import("mercurial.error")
+        .and_then(|m| m.get(py, "RevlogError"))
+    {
+        Err(e) => e,
+        Ok(cls) => {
+            let msg = PyBytes::new(py, msg.as_bytes());
+            PyErr::from_instance(
+                py,
+                cls.call(py, (msg,), None).ok().into_py_object(py),
+            )
+        }
+    }
+}
+
+py_class!(pub class ReadingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .enter_reading_context()
+            .map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow()
+                .inner(py)
+                .borrow()
+                .exit_reading_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .exit_reading_context();
+        Ok(py.None())
+    }
+});
+
+// Only used from Python *tests*
+py_class!(pub class PyFileHandle |py| {
+    data inner_file: RefCell<std::os::fd::RawFd>;
+
+    def tell(&self) -> PyResult<PyObject> {
+        let locals = PyDict::new(py);
+        locals.set_item(py, "os", py.import("os")?)?;
+        locals.set_item(py, "fd", *self.inner_file(py).borrow())?;
+        let f = py.eval("os.fdopen(fd)", None, Some(&locals))?;
+
+        // Prevent Python from closing the file after garbage collecting.
+        // This is fine since Rust is still holding on to the actual File.
+        // (and also because it's only used in tests).
+        std::mem::forget(f.clone_ref(py));
+
+        locals.set_item(py, "f", f)?;
+        let res = py.eval("f.tell()", None, Some(&locals))?;
+        Ok(res)
+    }
+});
+
+/// Wrapper around a Python transaction object, to keep `hg-core` oblivious
+/// of the fact it's being called from Python.
+pub struct PyTransaction {
+    inner: PyObject,
+}
+
+impl PyTransaction {
+    pub fn new(inner: PyObject) -> Self {
+        Self { inner }
+    }
+}
+
+impl Clone for PyTransaction {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl Transaction for PyTransaction {
+    fn add(&mut self, file: impl AsRef<std::path::Path>, offset: usize) {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref()));
+        self.inner
+            .call_method(py, "add", (file, offset), None)
+            .expect("transaction add failed");
+    }
+}
+
+py_class!(pub class WritingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+    data transaction: RefCell<PyTransaction>;
+    data data_end: Cell<Option<usize>>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .enter_writing_context(
+                self.data_end(py).get(),
+                &mut *self.transaction(py).borrow_mut()
+            ).map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow_mut()
+                .inner(py)
+                .borrow_mut()
+                .exit_writing_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .exit_writing_context();
+        Ok(py.None())
+    }
+});
+
+struct PyFnCache {
+    fncache: PyObject,
+}
+impl PyFnCache {
+    fn new(fncache: PyObject) -> Self {
+        Self { fncache }
+    }
+}
+
+impl Clone for PyFnCache {
+    fn clone(&self) -> Self {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            fncache: self.fncache.clone_ref(py),
+        }
+    }
+}
+
+/// Cache whether the fncache is loaded to avoid Python round-trip every time.
+/// Once the fncache is loaded, it stays loaded unless we're in a very
+/// long-running process, none of which we actually support for now.
+static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false);
+
+impl FnCache for PyFnCache {
+    fn is_loaded(&self) -> bool {
+        if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) {
+            return true;
+        }
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        // TODO raise in case of error?
+        let is_loaded = self
+            .fncache
+            .getattr(py, "is_loaded")
+            .ok()
+            .map(|o| {
+                o.extract::<bool>(py)
+                    .expect("is_loaded returned something other than a bool")
+            })
+            .unwrap_or(false);
+        if is_loaded {
+            FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed);
+        }
+        is_loaded
+    }
+    fn add(&self, path: &std::path::Path) {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        // TODO raise in case of error?
+        self.fncache
+            .call_method(
+                py,
+                "add",
+                (PyBytes::new(py, &get_bytes_from_path(path)),),
+                None,
+            )
+            .ok();
+    }
+}
+
+py_class!(pub class InnerRevlog |py| {
+    @shared data inner: CoreInnerRevlog;
+    data nt: RefCell<Option<CoreNodeTree>>;
+    data docket: RefCell<Option<PyObject>>;
+    // Holds a reference to the mmap'ed persistent nodemap data
+    data nodemap_mmap: RefCell<Option<PyBuffer>>;
+    // Holds a reference to the mmap'ed persistent index data
+    data index_mmap: RefCell<PyBuffer>;
+    data head_revs_py_list: RefCell<Option<PyList>>;
+    data head_node_ids_py_list: RefCell<Option<PyList>>;
+    data revision_cache: RefCell<Option<PyObject>>;
+    data use_persistent_nodemap: bool;
+    data nodemap_queries: AtomicUsize;
+
+    def __new__(
+        _cls,
+        vfs_base: PyObject,
+        fncache: PyObject,
+        vfs_is_readonly: bool,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        chunk_cache: PyObject,
+        default_compression_header: PyObject,
+        revlog_type: usize,
+        use_persistent_nodemap: bool,
+    ) -> PyResult<Self> {
+        Self::inner_new(
             py,
-            hg::index::Index::new(
-                bytes,
-                IndexHeader::parse(&header.to_be_bytes())
-                    .expect("default header is broken")
-                    .unwrap(),
-            )
-            .map_err(|e| {
-                revlog_error_with_msg(py, e.to_string().as_bytes())
-            })?,
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(Some(buf)),
-            RefCell::new(None),
-            RefCell::new(None),
+            vfs_base,
+            fncache,
+            vfs_is_readonly,
+            index_data,
+            index_file,
+            data_file,
+            sidedata_file,
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+            chunk_cache,
+            default_compression_header,
+            revlog_type,
+            use_persistent_nodemap
         )
     }
 
+    def clear_cache(&self) -> PyResult<PyObject> {
+        assert!(!self.is_delaying(py)?);
+        self.revision_cache(py).borrow_mut().take();
+        self.inner(py).borrow_mut().clear_cache();
+        self.nodemap_queries(py).store(0, Ordering::Relaxed);
+        Ok(py.None())
+    }
+
+    @property def canonical_index_file(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py).borrow().canonical_index_file();
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    @property def is_delaying(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_delaying())
+    }
+
+    @property def _revisioncache(&self) -> PyResult<PyObject> {
+        let cache = &*self.revision_cache(py).borrow();
+        match cache {
+            None => Ok(py.None()),
+            Some(cache) => {
+                Ok(cache.clone_ref(py))
+            }
+        }
+
+    }
+
+    @property def _writinghandles(&self) -> PyResult<PyObject> {
+        use std::os::fd::AsRawFd;
+
+        let inner = self.inner(py).borrow();
+        let handles = inner.python_writing_handles();
+
+        match handles.as_ref() {
+            None => Ok(py.None()),
+            Some(handles) => {
+                let d_handle = if let Some(d_handle) = &handles.data_handle {
+                    let handle = RefCell::new(d_handle.file.as_raw_fd());
+                    Some(PyFileHandle::create_instance(py, handle)?)
+                } else {
+                    None
+                };
+                let handle =
+                    RefCell::new(handles.index_handle.file.as_raw_fd());
+                Ok(
+                    (
+                        PyFileHandle::create_instance(py, handle)?,
+                        d_handle,
+                        py.None(),  // Sidedata handle
+
+                    ).to_py_object(py).into_object()
+                )
+            }
+        }
+
+    }
+
+    @_revisioncache.setter def set_revision_cache(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        *self.revision_cache(py).borrow_mut() = value.clone_ref(py);
+        match value {
+            None => {
+                // This means the property has been deleted, *not* that the
+                // property has been set to `None`. Whatever happens is up
+                // to the implementation. Here we just set it to `None`.
+                self
+                    .inner(py)
+                    .borrow()
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held")
+                    .take();
+            },
+            Some(tuple) => {
+                if tuple.is_none(py) {
+                    self
+                        .inner(py)
+                        .borrow()
+                        .last_revision_cache
+                        .lock()
+                        .expect("lock should not be held")
+                        .take();
+                    return Ok(())
+                }
+                let node = tuple.get_item(py, 0)?.extract::<PyBytes>(py)?;
+                let node = node_from_py_bytes(py, &node)?;
+                let rev = tuple.get_item(py, 1)?.extract::<BaseRevision>(py)?;
+                // Ok because Python only sets this if the revision has been
+                // checked
+                let rev = Revision(rev);
+                let data = tuple.get_item(py, 2)?.extract::<PyBytes>(py)?;
+                let inner = self.inner(py).borrow();
+                let mut last_revision_cache = inner
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held");
+                *last_revision_cache =
+                    Some((node, rev, Box::new(PyBytesDeref::new(py, data))));
+            }
+        }
+        Ok(())
+    }
+
+    @property def inline(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_inline())
+    }
+
+    @inline.setter def set_inline(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        if let Some(v) = value {
+            self.inner(py).borrow_mut().inline = v.extract(py)?;
+        };
+        Ok(())
+    }
+
+    @property def index_file(&self) -> PyResult<PyBytes> {
+        Ok(
+            PyBytes::new(
+                py,
+                &get_bytes_from_path(&self.inner(py).borrow().index_file)
+            )
+        )
+    }
+
+    @index_file.setter def set_index_file(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        let path = get_path_from_bytes(
+            value
+                .expect("don't delete the index path")
+                .extract::<PyBytes>(py)?
+                .data(py)
+        ).to_owned();
+        self.inner(py).borrow_mut().index_file = path;
+        Ok(())
+    }
+
+    @property def is_writing(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_writing())
+    }
+
+    @property def is_open(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_open())
+    }
+
+    def issnapshot(&self, rev: PyRevision) -> PyResult<bool> {
+        self.inner_issnapshot(py, UncheckedRevision(rev.0))
+    }
+
+    def _deltachain(&self, *args, **kw) -> PyResult<PyObject> {
+        let inner = self.inner(py).borrow();
+        let general_delta = inner.index.uses_generaldelta();
+        let args = PyTuple::new(
+            py,
+            &[
+                args.get_item(py, 0),
+                kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py),
+                general_delta.to_py_object(py).into_object(),
+            ]
+        );
+        self._index_deltachain(py, &args, kw)
+    }
+
+    def compress(&self, data: PyObject) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let py_buffer = PyBuffer::get(py, &data)?;
+        let deref = PyBufferDeref::new(py, py_buffer)?;
+        let compressed = inner.compress(&deref)
+        .map_err(|e| revlog_error_from_msg(py, e))?;
+        let compressed = compressed.as_deref();
+        let header = if compressed.is_some() {
+            PyBytes::new(py, &b""[..])
+        } else {
+            PyBytes::new(py, &b"u"[..])
+        };
+        Ok(
+            (
+                header,
+                PyBytes::new(py, compressed.unwrap_or(&deref))
+            ).to_py_object(py)
+        )
+    }
+
+    def reading(&self) -> PyResult<ReadingContextManager> {
+        ReadingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+        )
+    }
+
+    def writing(
+        &self,
+        transaction: PyObject,
+        data_end: Option<usize>,
+        sidedata_end: Option<usize>,
+    ) -> PyResult<WritingContextManager> {
+        // Silence unused argument (only relevant for changelog v2)
+        let _ = sidedata_end;
+        WritingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+            RefCell::new(PyTransaction::new(transaction)),
+            Cell::new(data_end)
+        )
+    }
+
+    def split_inline(
+        &self,
+        _tr: PyObject,
+        header: i32,
+        new_index_file_path: Option<PyObject>
+    ) -> PyResult<PyBytes> {
+        let mut inner = self.inner(py).borrow_mut();
+        let new_index_file_path = match new_index_file_path {
+            Some(path) => {
+                let path = path.extract::<PyBytes>(py)?;
+                Some(get_path_from_bytes(path.data(py)).to_owned())
+            },
+            None => None,
+        };
+        let header = IndexHeader::parse(&header.to_be_bytes());
+        let header = header.expect("invalid header bytes");
+        let path = inner
+            .split_inline(header, new_index_file_path)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    def get_segment_for_revs(
+        &self,
+        startrev: PyRevision,
+        endrev: PyRevision,
+    ) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let (offset, data) = inner
+            .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data = PyBytes::new(py, &data);
+        Ok((offset, data).to_py_object(py))
+    }
+
+    def raw_text(
+        &self,
+        _node: PyObject,
+        rev: PyRevision
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let mut py_bytes = PyBytes::new(py, &[]);
+        inner
+            .raw_text(Revision(rev.0), |size, f| {
+                py_bytes = with_pybytes_buffer(py, size, f)?;
+                Ok(())
+            }).map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(py_bytes)
+    }
+
+    def _chunk(
+        &self,
+        rev: PyRevision,
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let chunk = inner
+            .chunk_for_rev(Revision(rev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let chunk = PyBytes::new(py, &chunk);
+        Ok(chunk)
+    }
+
+    def write_entry(
+        &self,
+        transaction: PyObject,
+        entry: PyObject,
+        data: PyTuple,
+        _link: PyObject,
+        offset: usize,
+        _sidedata: PyObject,
+        _sidedata_offset: PyInt,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+        _sidedata_end: Option<PyInt>,
+    ) -> PyResult<PyTuple> {
+        let mut inner = self.inner(py).borrow_mut();
+        let transaction = PyTransaction::new(transaction);
+        let py_bytes = entry.extract(py)?;
+        let entry = PyBytesDeref::new(py, py_bytes);
+        let header = data.get_item(py, 0).extract::<PyBytes>(py)?;
+        let header = header.data(py);
+        let data = data.get_item(py, 1);
+        let py_bytes = data.extract(py)?;
+        let data = PyBytesDeref::new(py, py_bytes);
+        Ok(
+            inner.write_entry(
+                transaction,
+                &entry,
+                (header, &data),
+                offset,
+                index_end,
+                data_end
+            ).map_err(|e| revlog_error_from_msg(py, e))?
+             .to_py_object(py)
+        )
+    }
+
+    def delay(&self) -> PyResult<Option<PyBytes>> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .delay()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p))))
+    }
+
+    def write_pending(&self) -> PyResult<PyTuple> {
+        let (path, any_pending) = self.inner(py)
+            .borrow_mut()
+            .write_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let maybe_path = match path {
+            Some(path) => {
+                PyBytes::new(py, &get_bytes_from_path(path)).into_object()
+            },
+            None => {
+                py.None()
+            }
+        };
+        Ok(
+            (
+                maybe_path,
+                any_pending
+            ).to_py_object(py)
+        )
+    }
+
+    def finalize_pending(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .finalize_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    // -- forwarded index methods --
+
+    def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+        let node = node_from_py_bytes(py, &node)?;
+        // Filelogs have no persistent nodemaps and are often small, use a
+        // brute force lookup from the end backwards. If there is a very large
+        // filelog (automation file that changes every commit etc.), it also
+        // seems to work quite well for all measured purposes so far.
+        let mut nodemap_queries =
+            self.nodemap_queries(py).fetch_add(1, Ordering::Relaxed);
+        // Still need to add since `fetch_add` returns the old value
+        nodemap_queries += 1;
+        if !*self.use_persistent_nodemap(py) && nodemap_queries <= 4 {
+            let idx = &self.inner(py).borrow().index;
+            let res =
+                idx.rev_from_node_no_persistent_nodemap(node.into()).ok();
+            return Ok(res.map(Into::into))
+        }
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let ridx = &self.inner(py).borrow().index;
+        let rust_rev =
+            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
+        Ok(rust_rev.map(Into::into))
+    }
+
+    /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node
+    /// is not found.
+    ///
+    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+    /// will catch and rewrap with it
+    def _index_rev(&self, node: PyBytes) -> PyResult<PyRevision> {
+        self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py))
+    }
+
+    /// return True if the node exist in the index
+    def _index_has_node(&self, node: PyBytes) -> PyResult<bool> {
+        // TODO OPTIM we could avoid a needless conversion here,
+        // to do when scaffolding for pure Rust switch is removed,
+        // as `_index_get_rev()` currently does the necessary assertions
+        self._index_get_rev(py, node).map(|opt| opt.is_some())
+    }
+
+    /// find length of shortest hex nodeid of a binary ID
+    def _index_shortest(&self, node: PyBytes) -> PyResult<usize> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
+        {
+            Ok(Some(l)) => Ok(l),
+            Ok(None) => Err(revlog_error(py)),
+            Err(e) => Err(nodemap_error(py, e)),
+        }
+    }
+
+    def _index_partialmatch(
+        &self,
+        node: PyObject
+    ) -> PyResult<Option<PyBytes>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+
+        let node = node.extract::<PyBytes>(py)?;
+        let node_as_string = String::from_utf8_lossy(node.data(py));
+
+        let prefix = NodePrefix::from_hex(node_as_string.to_string())
+            .map_err(|_| PyErr::new::<ValueError, _>(
+                py, format!("Invalid node or prefix '{}'", node_as_string))
+            )?;
+
+        nt.find_bin(idx, prefix)
+            // TODO make an inner API returning the node directly
+            .map(|opt| opt.map(|rev| {
+                    PyBytes::new(
+                        py,
+                        idx.node(rev).expect("node should exist").as_bytes()
+                    )
+            }))
+            .map_err(|e| nodemap_error(py, e))
+
+    }
+
+    /// append an index entry
+    def _index_append(&self, tup: PyTuple) -> PyResult<PyObject> {
+        if tup.len(py) < 8 {
+            // this is better than the panic promised by tup.get_item()
+            return Err(
+                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
+        }
+        let node_bytes = tup.get_item(py, 7).extract(py)?;
+        let node = node_from_py_object(py, &node_bytes)?;
+
+        let rev = self.len(py)? as BaseRevision;
+
+        // This is ok since we will just add the revision to the index
+        let rev = Revision(rev);
+        self.inner(py)
+            .borrow_mut()
+            .index
+            .append(py_tuple_to_revision_data_params(py, tup)?)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let idx = &self.inner(py).borrow().index;
+        self.get_nodetree(py)?
+            .borrow_mut()
+            .as_mut()
+            .expect("nodetree should be set")
+            .insert(idx, &node, rev)
+            .map_err(|e| nodemap_error(py, e))?;
+        Ok(py.None())
+    }
+
+    def _index___delitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
+        let start = if let Ok(rev) = key.extract(py) {
+            UncheckedRevision(rev)
+        } else {
+            let start = key.getattr(py, "start")?;
+            UncheckedRevision(start.extract(py)?)
+        };
+        let mut borrow = self.inner(py).borrow_mut();
+        let start = borrow
+            .index
+            .check_revision(start)
+            .ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
+            })?;
+        borrow.index
+            .remove(start)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        drop(borrow);
+        let mut opt = self.get_nodetree(py)?.borrow_mut();
+        let nt = opt.as_mut().expect("nodetree should be set");
+        nt.invalidate_all();
+        self.fill_nodemap(py, nt)?;
+        Ok(py.None())
+    }
+
+    /// return the gca set of the given revs
+    def _index_ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_ancestors(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// return the heads of the common ancestors of the given revs
+    def _index_commonancestorsheads(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let rust_res = self.inner_commonancestorsheads(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// Clear the index caches and inner py_class data.
+    /// It is Python's responsibility to call `update_nodemap_data` again.
+    def _index_clearcaches(&self) -> PyResult<PyObject> {
+        self.nt(py).borrow_mut().take();
+        self.docket(py).borrow_mut().take();
+        self.nodemap_mmap(py).borrow_mut().take();
+        self.head_revs_py_list(py).borrow_mut().take();
+        self.head_node_ids_py_list(py).borrow_mut().take();
+        self.inner(py).borrow_mut().index.clear_caches();
+        Ok(py.None())
+    }
+
+    /// return the raw binary string representing a revision
+    def _index_entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        let rust_bytes = rindex.check_revision(rev).and_then(
+            |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev)
+        )?;
+        let rust_res = PyBytes::new(py, rust_bytes).into_object();
+        Ok(rust_res)
+    }
+
+
+    /// return a binary packed version of the header
+    def _index_pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
+        let rust_res = PyBytes::new(py, &packed).into_object();
+        Ok(rust_res)
+    }
+
+    /// compute phases
+    def _index_computephasesmapsets(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
+        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
+        Ok(rust_res)
+    }
+
+    /// reachableroots
+    def _index_reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_reachableroots2(
+            py,
+            UncheckedRevision(args.get_item(py, 0).extract(py)?),
+            args.get_item(py, 1),
+            args.get_item(py, 2),
+            args.get_item(py, 3).extract(py)?,
+        )?;
+        Ok(rust_res)
+    }
+
+    /// get head revisions
+    def _index_headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
+        let (filtered_revs, stop_rev) = match &args.len(py) {
+             0 => Ok((py.None(), py.None())),
+             1 => Ok((args.get_item(py, 0), py.None())),
+             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
+             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
+        }?;
+        self.inner_headrevs(py, &filtered_revs, &stop_rev)
+    }
+
+    /// get head nodeids
+    def _index_head_node_ids(&self) -> PyResult<PyObject> {
+        let rust_res = self.inner_head_node_ids(py)?;
+        Ok(rust_res)
+    }
+
+    /// get diff in head revisions
+    def _index_headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_headrevsdiff(
+          py,
+          &args.get_item(py, 0),
+          &args.get_item(py, 1))?;
+        Ok(rust_res)
+    }
+
+    /// True if the object is a snapshot
+    def _index_issnapshot(&self, *args, **_kw) -> PyResult<bool> {
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        self.inner_issnapshot(py, rev)
+    }
+
+    /// Gather snapshot data in a cache dict
+    def _index_findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let cache: PyDict = args.get_item(py, 0).extract(py)?;
+        // this methods operates by setting new values in the cache,
+        // hence we will compare results by letting the C implementation
+        // operate over a deepcopy of the cache, and finally compare both
+        // caches.
+        let c_cache = PyDict::new(py);
+        for (k, v) in cache.items(py) {
+            c_cache.set_item(py, k, PySet::new(py, v)?)?;
+        }
+
+        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
+        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
+        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
+        index.find_snapshots(
+            start_rev,
+            end_rev,
+            &mut cache_wrapper,
+        ).map_err(|_| revlog_error(py))?;
+        Ok(py.None())
+    }
+
+    /// determine revisions with deltas to reconstruct fulltext
+    def _index_deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
+        let stop_rev =
+            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
+        let rev = index.check_revision(rev).ok_or_else(|| {
+            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
+        })?;
+        let stop_rev = if let Some(stop_rev) = stop_rev {
+            let stop_rev = UncheckedRevision(stop_rev);
+            Some(index.check_revision(stop_rev).ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
+            })?)
+        } else {None};
+        let using_general_delta = args.get_item(py, 2)
+            .extract::<Option<u32>>(py)?
+            .map(|i| i != 0);
+        let (chain, stopped) = index.delta_chain(
+            rev, stop_rev, using_general_delta
+        ).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+
+        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
+        Ok(
+            PyTuple::new(
+                py,
+                &[
+                    chain.into_py_object(py).into_object(),
+                    stopped.into_py_object(py).into_object()
+                ]
+            ).into_object()
+        )
+    }
+
+    /// slice planned chunk read to reach a density threshold
+    def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_slicechunktodensity(
+            py,
+            args.get_item(py, 0),
+            args.get_item(py, 1).extract(py)?,
+            args.get_item(py, 2).extract(py)?
+        )?;
+        Ok(rust_res)
+    }
+
+    def _index___len__(&self) -> PyResult<usize> {
+        self.len(py)
+    }
+
+    def _index___getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+        Ok(rust_res)
+    }
+
+    def _index___contains__(&self, item: PyObject) -> PyResult<bool> {
+        // ObjectProtocol does not seem to provide contains(), so
+        // this is an equivalent implementation of the index_contains()
+        // defined in revlog.c
+        match item.extract::<i32>(py) {
+            Ok(rev) => {
+                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
+            }
+            Err(_) => {
+                let item_bytes: PyBytes = item.extract(py)?;
+                let rust_res = self._index_has_node(py, item_bytes)?;
+                Ok(rust_res)
+            }
+        }
+    }
+
+    def _index_nodemap_data_all(&self) -> PyResult<PyBytes> {
+        self.inner_nodemap_data_all(py)
+    }
+
+    def _index_nodemap_data_incremental(&self) -> PyResult<PyObject> {
+        self.inner_nodemap_data_incremental(py)
+    }
+
+    def _index_update_nodemap_data(
+        &self,
+        docket: PyObject,
+        nm_data: PyObject
+    ) -> PyResult<PyObject> {
+        self.inner_update_nodemap_data(py, docket, nm_data)
+    }
+
+    @property
+    def _index_entry_size(&self) -> PyResult<PyInt> {
+        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_rust_ext_compat(&self) -> PyResult<PyInt> {
+        // will be entirely removed when the Rust index yet useful to
+        // implement in Rust to detangle things when removing `self.cindex`
+        let rust_res: PyInt = 1.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_is_rust(&self) -> PyResult<PyBool> {
+        Ok(false.to_py_object(py))
+    }
+
+
+});
+
+/// Forwarded index methods?
+impl InnerRevlog {
     fn len(&self, py: Python) -> PyResult<usize> {
-        let rust_index_len = self.index(py).borrow().len();
+        let rust_index_len = self.inner(py).borrow().index.len();
         Ok(rust_index_len)
     }
-
     /// This is scaffolding at this point, but it could also become
     /// a way to start a persistent nodemap or perform a
     /// vacuum / repack operation
@@ -660,11 +1500,11 @@
         py: Python,
         nt: &mut CoreNodeTree,
     ) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         for r in 0..self.len(py)? {
             let rev = Revision(r as BaseRevision);
             // in this case node() won't ever return None
-            nt.insert(&*index, index.node(rev).unwrap(), rev)
+            nt.insert(index, index.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
         Ok(py.None())
@@ -685,7 +1525,11 @@
 
     /// Returns the full nodemap bytes to be written as-is to disk
     fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
-        let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let nodemap = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
 
         // If there's anything readonly, we need to build the data again from
@@ -718,7 +1562,11 @@
             None => return Ok(py.None()),
         };
 
-        let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let node_tree = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let masked_blocks = node_tree.masked_readonly_blocks();
         let (_, data) = node_tree.into_readonly_and_added_bytes();
         let changed = masked_blocks * std::mem::size_of::<Block>();
@@ -748,7 +1596,7 @@
             .extract::<BaseRevision>(py)?
             .into();
         self.docket(py).borrow_mut().replace(docket.clone_ref(py));
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
             nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
         })?;
@@ -757,7 +1605,7 @@
         for r in (data_tip.0 + 1)..current_tip as BaseRevision {
             let rev = Revision(r);
             // in this case node() won't ever return None
-            nt.insert(&*idx, idx.node(rev).unwrap(), rev)
+            nt.insert(idx, idx.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
 
@@ -767,7 +1615,7 @@
     }
 
     fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         Ok(match key.extract::<BaseRevision>(py) {
             Ok(key_as_int) => {
                 let entry_params = if key_as_int == NULL_REVISION.0 {
@@ -787,15 +1635,17 @@
                 revision_data_params_to_py_tuple(py, entry_params)
                     .into_object()
             }
-            _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
-                || py.None(),
-                |py_rev| py_rev.into_py_object(py).into_object(),
-            ),
+            _ => self
+                ._index_get_rev(py, key.extract::<PyBytes>(py)?)?
+                .map_or_else(
+                    || py.None(),
+                    |py_rev| py_rev.into_py_object(py).into_object(),
+                ),
         })
     }
 
     fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
 
         // We don't use the shortcut here, as it's actually slower to loop
         // through the cached `PyList` than to re-do the whole computation for
@@ -827,7 +1677,7 @@
         filtered_revs: &PyObject,
         stop_rev: &PyObject,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let stop_rev = if stop_rev.is_none(py) {
             None
         } else {
@@ -883,7 +1733,7 @@
     }
 
     fn check_revision(
-        index: &hg::index::Index,
+        index: &Index,
         rev: UncheckedRevision,
         py: Python,
     ) -> PyResult<Revision> {
@@ -900,7 +1750,7 @@
     ) -> PyResult<PyObject> {
         let begin = begin.extract::<BaseRevision>(py)?;
         let end = end.extract::<BaseRevision>(py)?;
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let begin =
             Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
         let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
@@ -919,7 +1769,7 @@
         new_heads: &[Revision],
         py: Python<'_>,
     ) -> PyList {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let as_vec: Vec<PyObject> = new_heads
             .iter()
             .map(|r| {
@@ -959,7 +1809,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .ancestors(&revs)
@@ -975,7 +1825,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .common_ancestor_heads(&revs)
@@ -991,7 +1841,7 @@
         py: Python,
         py_roots: PyDict,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
             .items_list(py)
             .iter(py)
@@ -1038,7 +1888,7 @@
         target_density: f64,
         min_gap_size: usize,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
         let as_nested_vec =
             index.slice_chunk_to_density(&revs, target_density, min_gap_size);
@@ -1070,7 +1920,7 @@
         roots: PyObject,
         include_path: bool,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
             PyErr::new::<IndexError, _>(py, "head out of range")
         })?;
@@ -1092,6 +1942,96 @@
             .collect();
         Ok(PyList::new(py, &as_vec).into_object())
     }
+    fn inner_issnapshot(
+        &self,
+        py: Python,
+        rev: UncheckedRevision,
+    ) -> PyResult<bool> {
+        let inner = &self.inner(py).borrow();
+        let index = &self.inner(py).borrow().index;
+        let rev = index
+            .check_revision(rev)
+            .ok_or_else(|| rev_not_in_index(py, rev))?;
+        let result = inner.is_snapshot(rev).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+        Ok(result)
+    }
+}
+
+impl InnerRevlog {
+    pub fn inner_new(
+        py: Python,
+        vfs_base: PyObject,
+        fncache: PyObject,
+        vfs_is_readonly: bool,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        _sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        _chunk_cache: PyObject,
+        _default_compression_header: PyObject,
+        revlog_type: usize,
+        use_persistent_nodemap: bool,
+    ) -> PyResult<Self> {
+        let index_file =
+            get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let data_file =
+            get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let revlog_type = RevlogType::try_from(revlog_type)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data_config = extract_data_config(py, data_config, revlog_type)?;
+        let delta_config =
+            extract_delta_config(py, delta_config, revlog_type)?;
+        let feature_config =
+            extract_feature_config(py, feature_config, revlog_type)?;
+        let options = RevlogOpenOptions::new(
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+
+        // Safety: we keep the buffer around inside the class as `index_mmap`
+        let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
+        let index = Index::new(bytes, options.index_header())
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+
+        let base = &vfs_base.extract::<PyBytes>(py)?;
+        let base = get_path_from_bytes(base.data(py)).to_owned();
+        let core = CoreInnerRevlog::new(
+            Box::new(FnCacheVfs::new(
+                base,
+                vfs_is_readonly,
+                Box::new(PyFnCache::new(fncache)),
+            )),
+            index,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        Self::create_instance(
+            py,
+            core,
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(buf),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+            use_persistent_nodemap,
+            AtomicUsize::new(0),
+        )
+    }
 }
 
 py_class!(pub class NodeTree |py| {
@@ -1112,7 +2052,7 @@
     /// (generation-based guard), same as iterating on a `dict` that has
     /// been meanwhile mutated.
     def is_invalidated(&self) -> PyResult<bool> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let result = unsafe { leaked.try_borrow(py) };
         // two cases for result to be an error:
@@ -1124,7 +2064,7 @@
     }
 
     def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1136,7 +2076,7 @@
             return Err(rev_not_in_index(py, rev.into()))
         }
 
-        let entry = index.inner.get_entry(rev).unwrap();
+        let entry = index.inner.get_entry(rev).expect("entry should exist");
         let mut nt = self.nt(py).borrow_mut();
         nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
 
@@ -1159,7 +2099,7 @@
             )?;
 
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1171,7 +2111,7 @@
 
     def shortest(&self, node: PyBytes) -> PyResult<usize> {
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let idx = &*unsafe { leaked.try_borrow(py)? };
         match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
@@ -1183,6 +2123,120 @@
     }
 });
 
+fn panic_after_error(_py: Python) -> ! {
+    unsafe {
+        python3_sys::PyErr_Print();
+    }
+    panic!("Python API called failed");
+}
+
+/// # Safety
+///
+/// Don't call this. Its only caller is taken from `PyO3`.
+unsafe fn cast_from_owned_ptr_or_panic<T>(
+    py: Python,
+    p: *mut python3_sys::PyObject,
+) -> T
+where
+    T: cpython::PythonObjectWithCheckedDowncast,
+{
+    if p.is_null() {
+        panic_after_error(py);
+    } else {
+        PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+    }
+}
+
+fn with_pybytes_buffer<F>(
+    py: Python,
+    len: usize,
+    init: F,
+) -> Result<PyBytes, RevlogError>
+where
+    F: FnOnce(
+        &mut dyn RevisionBuffer<Target = PyBytes>,
+    ) -> Result<(), RevlogError>,
+{
+    // Largely inspired by code in PyO3
+    // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with
+    unsafe {
+        let pyptr = python3_sys::PyBytes_FromStringAndSize(
+            std::ptr::null(),
+            len as python3_sys::Py_ssize_t,
+        );
+        let pybytes = cast_from_owned_ptr_or_panic::<PyBytes>(py, pyptr);
+        let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast();
+        debug_assert!(!buffer.is_null());
+        let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len);
+        // Initialise the bytestring in init
+        // If init returns an Err, the buffer is deallocated by `pybytes`
+        init(&mut rev_buf).map(|_| rev_buf.finish())
+    }
+}
+
+/// Wrapper around a Python-provided buffer into which the revision contents
+/// will be written. Done for speed in order to save a large allocation + copy.
+struct PyRevisionBuffer {
+    py_bytes: PyBytes,
+    _buf: *mut u8,
+    len: usize,
+    current_buf: *mut u8,
+    current_len: usize,
+}
+
+impl PyRevisionBuffer {
+    /// # Safety
+    ///
+    /// `buf` should be the start of the allocated bytes of `bytes`, and `len`
+    /// exactly the length of said allocated bytes.
+    #[inline]
+    unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self {
+        Self {
+            py_bytes: bytes,
+            _buf: buf,
+            len,
+            current_len: 0,
+            current_buf: buf,
+        }
+    }
+
+    /// Number of bytes that have been copied to. Will be different to the
+    /// total allocated length of the buffer unless the revision is done being
+    /// written.
+    #[inline]
+    fn current_len(&self) -> usize {
+        self.current_len
+    }
+}
+
+impl RevisionBuffer for PyRevisionBuffer {
+    type Target = PyBytes;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        assert!(self.current_len + slice.len() <= self.len);
+        unsafe {
+            // We cannot use `copy_from_nonoverlapping` since it's *possible*
+            // to create a slice from the same Python memory region using
+            // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway?
+            self.current_buf.copy_from(slice.as_ptr(), slice.len());
+            self.current_buf = self.current_buf.add(slice.len());
+        }
+        self.current_len += slice.len()
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        // catch unzeroed bytes before it becomes undefined behavior
+        assert_eq!(
+            self.current_len(),
+            self.len,
+            "not enough bytes read for revision"
+        );
+        self.py_bytes
+    }
+}
+
 fn revlog_error(py: Python) -> PyErr {
     match py
         .import("mercurial.error")
@@ -1196,21 +2250,6 @@
     }
 }
 
-fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
-    match py
-        .import("mercurial.error")
-        .and_then(|m| m.get(py, "RevlogError"))
-    {
-        Err(e) => e,
-        Ok(cls) => PyErr::from_instance(
-            py,
-            cls.call(py, (PyBytes::new(py, msg),), None)
-                .ok()
-                .into_py_object(py),
-        ),
-    }
-}
-
 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
     // ParentOutOfRange is currently the only alternative
     // in `hg::GraphError`. The C index always raises this simple ValueError.
@@ -1250,8 +2289,8 @@
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "RevLog - Rust implementations")?;
 
-    m.add_class::<Index>(py)?;
     m.add_class::<NodeTree>(py)?;
+    m.add_class::<InnerRevlog>(py)?;
 
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/vfs.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -0,0 +1,303 @@
+use std::{
+    cell::Cell,
+    fs::File,
+    io::Error,
+    os::fd::{AsRawFd, FromRawFd},
+    path::{Path, PathBuf},
+};
+
+use cpython::{
+    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use hg::{
+    errors::{HgError, IoResultExt},
+    exit_codes,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    vfs::{Vfs, VfsFile},
+};
+
+/// Wrapper around a Python VFS object to call back into Python from `hg-core`.
+pub struct PyVfs {
+    inner: PyObject,
+    base: PathBuf,
+}
+
+impl Clone for PyVfs {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+            base: self.base.clone(),
+        }
+    }
+}
+
+impl PyVfs {
+    pub fn new(
+        _py: Python,
+        py_vfs: PyObject,
+        base: PathBuf,
+    ) -> PyResult<Self> {
+        Ok(Self {
+            inner: py_vfs,
+            base,
+        })
+    }
+
+    fn inner_open(
+        &self,
+        filename: &Path,
+        create: bool,
+        check_ambig: bool,
+        atomic_temp: bool,
+        write: bool,
+    ) -> Result<(File, Option<PathBuf>), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let mode = if atomic_temp {
+            PyBytes::new(py, b"w")
+        } else if create {
+            PyBytes::new(py, b"w+")
+        } else if write {
+            PyBytes::new(py, b"r+")
+        } else {
+            PyBytes::new(py, b"rb")
+        };
+        let res = self.inner.call(
+            py,
+            (
+                PyBytes::new(py, &get_bytes_from_path(filename)),
+                mode,
+                atomic_temp,
+                check_ambig,
+            ),
+            None,
+        );
+        match res {
+            Ok(tup) => {
+                let tup = tup
+                    .extract::<PyTuple>(py)
+                    .map_err(|e| vfs_error("vfs did not return a tuple", e))?;
+                let fileno = tup.get_item(py, 0).extract(py).map_err(|e| {
+                    vfs_error("vfs did not return a valid fileno", e)
+                })?;
+                let temp_name = tup.get_item(py, 1);
+                // Safety: this must be a valid owned file descriptor, and
+                // Python has just given it to us, it will only exist here now
+                let file = unsafe { File::from_raw_fd(fileno) };
+                let temp_name = if atomic_temp {
+                    Some(
+                        get_path_from_bytes(
+                            temp_name
+                                .extract::<PyBytes>(py)
+                                .map_err(|e| vfs_error("invalid tempname", e))?
+                                .data(py),
+                        )
+                        .to_owned(),
+                    )
+                } else {
+                    None
+                };
+                Ok((file, temp_name))
+            }
+            Err(mut e) => {
+                // TODO surely there is a better way of comparing
+                if e.instance(py).get_type(py).name(py) == "FileNotFoundError"
+                {
+                    return Err(HgError::IoError {
+                        error: Error::new(
+                            std::io::ErrorKind::NotFound,
+                            e.instance(py).to_string(),
+                        ),
+                        context: hg::errors::IoErrorContext::ReadingFile(
+                            filename.to_owned(),
+                        ),
+                    });
+                }
+                Err(vfs_error("failed to call opener", e))
+            }
+        }
+    }
+}
+
+fn vfs_error(reason: impl Into<String>, mut error: PyErr) -> HgError {
+    let gil = &Python::acquire_gil();
+    let py = gil.python();
+    HgError::abort(
+        format!("{}: {}", reason.into(), error.instance(py)),
+        exit_codes::ABORT,
+        None,
+    )
+}
+
+py_class!(pub class PyFile |py| {
+    data number: Cell<i32>;
+
+    def fileno(&self) -> PyResult<PyInt> {
+        Ok(self.number(py).get().to_py_object(py))
+    }
+});
+
+impl Vfs for PyVfs {
+    fn open(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        self.inner_open(filename, false, false, false, true)
+            .map(|(f, _)| VfsFile::normal(f, filename.to_owned()))
+    }
+    fn open_read(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        self.inner_open(filename, false, false, false, false)
+            .map(|(f, _)| VfsFile::normal(f, filename.to_owned()))
+    }
+
+    fn open_check_ambig(&self, filename: &Path) -> Result<VfsFile, HgError> {
+        self.inner_open(filename, false, true, false, true)
+            .map(|(f, _)| VfsFile::normal(f, filename.to_owned()))
+    }
+
+    fn create(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError> {
+        self.inner_open(filename, true, check_ambig, false, true)
+            .map(|(f, _)| VfsFile::normal(f, filename.to_owned()))
+    }
+
+    fn create_atomic(
+        &self,
+        filename: &Path,
+        check_ambig: bool,
+    ) -> Result<VfsFile, HgError> {
+        self.inner_open(filename, true, false, true, true).map(
+            |(fp, temp_name)| {
+                VfsFile::Atomic(hg::vfs::AtomicFile::from_file(
+                    fp,
+                    check_ambig,
+                    temp_name.expect("temp name should exist"),
+                    filename.to_owned(),
+                ))
+            },
+        )
+    }
+
+    fn file_size(&self, file: &VfsFile) -> Result<u64, HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let raw_fd = file.as_raw_fd();
+        let py_fd = PyFile::create_instance(py, Cell::new(raw_fd))
+            .expect("create_instance cannot fail");
+        let fstat = self
+            .inner
+            .call_method(py, "fstat", (py_fd,), None)
+            .map_err(|e| {
+                vfs_error(format!("failed to fstat fd '{}'", raw_fd), e)
+            })?;
+        fstat
+            .getattr(py, "st_size")
+            .map(|v| {
+                v.extract(py).map_err(|e| {
+                    vfs_error(format!("invalid size for fd '{}'", raw_fd), e)
+                })
+            })
+            .map_err(|e| {
+                vfs_error(format!("failed to get size of fd '{}'", raw_fd), e)
+            })?
+    }
+
+    fn exists(&self, filename: &Path) -> bool {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        self.inner
+            .call_method(
+                py,
+                "exists",
+                (PyBytes::new(py, &get_bytes_from_path(filename)),),
+                None,
+            )
+            .unwrap_or_else(|_| false.into_py_object(py).into_object())
+            .extract(py)
+            .unwrap()
+    }
+
+    fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        if let Err(e) = self.inner.call_method(
+            py,
+            "unlink",
+            (PyBytes::new(py, &get_bytes_from_path(filename)),),
+            None,
+        ) {
+            return Err(vfs_error(
+                format!("failed to unlink '{}'", filename.display()),
+                e,
+            ));
+        }
+        Ok(())
+    }
+
+    fn rename(
+        &self,
+        from: &Path,
+        to: &Path,
+        check_ambig: bool,
+    ) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let kwargs = PyDict::new(py);
+        kwargs
+            .set_item(py, "checkambig", check_ambig)
+            .map_err(|e| vfs_error("dict setitem failed", e))?;
+        if let Err(e) = self.inner.call_method(
+            py,
+            "rename",
+            (
+                PyBytes::new(py, &get_bytes_from_path(from)),
+                PyBytes::new(py, &get_bytes_from_path(to)),
+            ),
+            Some(&kwargs),
+        ) {
+            let msg = format!(
+                "failed to rename '{}' to '{}'",
+                from.display(),
+                to.display()
+            );
+            return Err(vfs_error(msg, e));
+        }
+        Ok(())
+    }
+
+    fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let from = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(from)),),
+                None,
+            )
+            .unwrap();
+        let from = from.extract::<PyBytes>(py).unwrap();
+        let from = get_path_from_bytes(from.data(py));
+        let to = self
+            .inner
+            .call_method(
+                py,
+                "join",
+                (PyBytes::new(py, &get_bytes_from_path(to)),),
+                None,
+            )
+            .unwrap();
+        let to = to.extract::<PyBytes>(py).unwrap();
+        let to = get_path_from_bytes(to.data(py));
+        std::fs::copy(from, to).when_writing_file(to)?;
+        Ok(())
+    }
+
+    fn base(&self) -> &Path {
+        &self.base
+    }
+}
--- a/rust/rhg/src/commands/debugdata.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/rhg/src/commands/debugdata.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -2,7 +2,7 @@
 use clap::Arg;
 use clap::ArgGroup;
 use hg::operations::debug_data;
-use hg::RevlogType;
+use hg::revlog::RevlogType;
 
 pub const HELP_TEXT: &str = "
 Dump the contents of a data file revision
--- a/rust/rhg/src/commands/status.rs	Wed Nov 20 15:38:57 2024 +0100
+++ b/rust/rhg/src/commands/status.rs	Wed Nov 20 15:53:19 2024 +0100
@@ -20,20 +20,22 @@
 use hg::errors::{HgError, IoResultExt};
 use hg::filepatterns::parse_pattern_args;
 use hg::lock::LockError;
-use hg::manifest::Manifest;
 use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
 use hg::repo::Repo;
+use hg::revlog::manifest::Manifest;
+use hg::revlog::options::{default_revlog_options, RevlogOpenOptions};
+use hg::revlog::RevlogType;
 use hg::utils::debug::debug_wait_for_file;
 use hg::utils::files::{
     get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
 };
 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
+use hg::DirstateStatus;
+use hg::PatternFileWarning;
 use hg::Revision;
 use hg::StatusError;
 use hg::StatusOptions;
 use hg::{self, narrow, sparse};
-use hg::{DirstateStatus, RevlogOpenOptions};
-use hg::{PatternFileWarning, RevlogType};
 use log::info;
 use rayon::prelude::*;
 use std::borrow::Cow;
@@ -383,8 +385,11 @@
             })?;
             let working_directory_vfs = repo.working_directory_vfs();
             let store_vfs = repo.store_vfs();
-            let filelog_open_options =
-                repo.default_revlog_options(RevlogType::Filelog)?;
+            let filelog_open_options = default_revlog_options(
+                repo.config(),
+                repo.requirements(),
+                RevlogType::Filelog,
+            )?;
             let res: Vec<_> = take(&mut ds_status.unsure)
                 .into_par_iter()
                 .map(|to_check| {
@@ -785,7 +790,7 @@
     if entry_flags.map(|f| f.into()) != fs_flags {
         return Ok(UnsureOutcome::Modified);
     }
-    let filelog = hg::filelog::Filelog::open_vfs(
+    let filelog = hg::revlog::filelog::Filelog::open_vfs(
         store_vfs,
         hg_path,
         revlog_open_options,
--- a/tests/test-bundle.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-bundle.t	Wed Nov 20 15:53:19 2024 +0100
@@ -294,7 +294,7 @@
   packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 (no-rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| (no-rust !)
   0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| (no-rust !)
-  packed.hg: size=3181, sha1=b202787710a1c109246554be589506cd2916acb7 (rust !)
+  packed.hg: size=3181, sha1=3e865df183d388222969c5b19c844dd8697c85c6 (rust !)
   0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 09 00 00 |HGS1UN..........| (rust !)
   0010: 00 00 00 00 0b 67 00 3b 67 65 6e 65 72 61 6c 64 |.....g.;generald| (rust !)
   0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 2d 63 6f 6d 70 |elta,revlog-comp|
--- a/tests/test-contrib-perf.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-contrib-perf.t	Wed Nov 20 15:53:19 2024 +0100
@@ -283,8 +283,15 @@
 #if reporevlogstore
   $ hg perfrevlogrevisions .hg/store/data/a.i
 #endif
+
+#if no-rust
+Cannot test in Rust because this these are highly invasive and expect a certain
+structure from Python code.
+
   $ hg perfrevlogrevision -m 0
   $ hg perfrevlogchunks -c
+#endif
+
   $ hg perfrevrange
   $ hg perfrevset 'all()'
   $ hg perfstartup
--- a/tests/test-journal-exists.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-journal-exists.t	Wed Nov 20 15:53:19 2024 +0100
@@ -45,12 +45,21 @@
   $ hg bundle -qa repo.hg
   $ chmod -w foo/.hg/store/00changelog.i
 
+#if rust
+  $ hg -R foo unbundle repo.hg
+  adding changesets
+  transaction abort!
+  rollback completed
+  abort: abort: when writing $TESTTMP/repo/foo/.hg/store/00changelog.i: $EACCES$
+  [50]
+#else
   $ hg -R foo unbundle repo.hg
   adding changesets
   transaction abort!
   rollback completed
   abort: $EACCES$: '$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
   [255]
+#endif
 
   $ if test -f foo/.hg/store/journal; then echo 'journal exists :-('; fi
 #endif
--- a/tests/test-permissions.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-permissions.t	Wed Nov 20 15:53:19 2024 +0100
@@ -34,10 +34,16 @@
   $ chmod -w .hg/store/data/a.i
 
   $ echo barber > a
+#if rust
+  $ hg commit -m "2"
+  abort: abort: when writing $TESTTMP/t/.hg/store/data/a.i: $EACCES$
+  [50]
+#else
   $ hg commit -m "2"
   trouble committing a!
   abort: $EACCES$: '$TESTTMP/t/.hg/store/data/a.i'
   [255]
+#endif
 
   $ chmod -w .
 
--- a/tests/test-remotefilelog-bgprefetch.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-remotefilelog-bgprefetch.t	Wed Nov 20 15:53:19 2024 +0100
@@ -33,8 +33,8 @@
   transferred 776 bytes in * seconds (*/sec) (glob) (no-zstd !)
   3 files to transfer, 784 bytes of data (zstd no-rust !)
   transferred 784 bytes in * seconds (*/sec) (glob) (zstd no-rust !)
-  5 files to transfer, 910 bytes of data (rust !)
-  transferred 910 bytes in * seconds (*/sec) (glob) (rust !)
+  5 files to transfer, 911 bytes of data (rust !)
+  transferred 911 bytes in * seconds (*/sec) (glob) (rust !)
   searching for changes
   no changes found
 
--- a/tests/test-remotefilelog-prefetch.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-remotefilelog-prefetch.t	Wed Nov 20 15:53:19 2024 +0100
@@ -26,8 +26,8 @@
   transferred 528 bytes in * seconds (* */sec) (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   $ cd shallow
@@ -172,8 +172,8 @@
   transferred 528 bytes in * seconds * (glob) (no-zstd !)
   3 files to transfer, 532 bytes of data (zstd no-rust !)
   transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !)
-  5 files to transfer, 658 bytes of data (zstd rust !)
-  transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !)
+  5 files to transfer, 659 bytes of data (zstd rust !)
+  transferred 659 bytes in * seconds (*/sec) (glob) (zstd rust !)
   searching for changes
   no changes found
   updating to branch default
--- a/tests/test-repo-compengines.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-repo-compengines.t	Wed Nov 20 15:53:19 2024 +0100
@@ -194,9 +194,11 @@
   > done
 
   $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/*
-  zstd-level-1/.hg/store/data/a.i: size=4114
+  zstd-level-1/.hg/store/data/a.i: size=4114 (no-rust !)
+  zstd-level-1/.hg/store/data/a.i: size=4112 (rust !)
   zstd-level-22/.hg/store/data/a.i: size=4091
-  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re)
+  zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) (no-rust !)
+  zstd-level-default/.hg/store/data/a.i: size=4097 (rust !)
 
 Test error cases
 
--- a/tests/test-rust-revlog.py	Wed Nov 20 15:38:57 2024 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-import struct
-import unittest
-
-from mercurial.node import hex
-
-try:
-    from mercurial import rustext
-
-    rustext.__name__  # trigger immediate actual import
-except ImportError:
-    rustext = None
-else:
-    from mercurial.rustext import revlog
-
-    # this would fail already without appropriate ancestor.__package__
-    from mercurial.rustext.ancestor import LazyAncestors
-
-from mercurial.testing import revlog as revlogtesting
-
-header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
-    def test_heads(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(rustidx.headrevs(), idx.headrevs())
-
-    def test_len(self):
-        idx = self.parseindex()
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        self.assertEqual(len(rustidx), len(idx))
-
-    def test_ancestors(self):
-        rustidx = revlog.Index(revlogtesting.data_non_inlined, header)
-        lazy = LazyAncestors(rustidx, [3], 0, True)
-        # we have two more references to the index:
-        # - in its inner iterator for __contains__ and __bool__
-        # - in the LazyAncestors instance itself (to spawn new iterators)
-        self.assertTrue(2 in lazy)
-        self.assertTrue(bool(lazy))
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-        # a second time to validate that we spawn new iterators
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-
-        # let's check bool for an empty one
-        self.assertFalse(LazyAncestors(rustidx, [0], 0, False))
-
-
-@unittest.skipIf(
-    rustext is None,
-    "rustext module revlog relies on is not available",
-)
-class RustRevlogNodeTreeClassTest(revlogtesting.RustRevlogBasedTestBase):
-    def test_standalone_nodetree(self):
-        idx = self.parserustindex()
-        nt = revlog.NodeTree(idx)
-        for i in range(4):
-            nt.insert(i)
-
-        bin_nodes = [entry[7] for entry in idx]
-        hex_nodes = [hex(n) for n in bin_nodes]
-
-        for i, node in enumerate(hex_nodes):
-            self.assertEqual(nt.prefix_rev_lookup(node), i)
-            self.assertEqual(nt.prefix_rev_lookup(node[:5]), i)
-
-        # all 4 revisions in idx (standard data set) have different
-        # first nybbles in their Node IDs,
-        # hence `nt.shortest()` should return 1 for them, except when
-        # the leading nybble is 0 (ambiguity with NULL_NODE)
-        for i, (bin_node, hex_node) in enumerate(zip(bin_nodes, hex_nodes)):
-            shortest = nt.shortest(bin_node)
-            expected = 2 if hex_node[0] == ord('0') else 1
-            self.assertEqual(shortest, expected)
-            self.assertEqual(nt.prefix_rev_lookup(hex_node[:shortest]), i)
-
-        # test invalidation (generation poisoning) detection
-        del idx[3]
-        self.assertTrue(nt.is_invalidated())
-
-
-if __name__ == '__main__':
-    import silenttestrunner
-
-    silenttestrunner.main(__name__)
--- a/tests/test-treemanifest.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-treemanifest.t	Wed Nov 20 15:53:19 2024 +0100
@@ -869,11 +869,13 @@
   > done
   $ hg ci -m 'have some content'
   $ f -s .hg/store/00manifest.*
-  .hg/store/00manifest.i: size=798 (no-pure !)
-  .hg/store/00manifest.i: size=784 (pure !)
+  .hg/store/00manifest.i: size=798 (no-pure no-rust !)
+  .hg/store/00manifest.i: size=800 (rust !)
+  .hg/store/00manifest.i: size=784 (pure no-rust !)
   $ f -s .hg/store/meta/dir/00manifest*
-  .hg/store/meta/dir/00manifest.i: size=556 (no-pure !)
-  .hg/store/meta/dir/00manifest.i: size=544 (pure !)
+  .hg/store/meta/dir/00manifest.i: size=556 (no-pure no-rust !)
+  .hg/store/meta/dir/00manifest.i: size=557 (rust !)
+  .hg/store/meta/dir/00manifest.i: size=544 (pure no-rust !)
   $ hg debugupgraderepo --config format.revlog-compression=none --config experimental.treemanifest=yes --run --quiet --no-backup
   upgrade will perform the following actions:
   
--- a/tests/test-verify.t	Wed Nov 20 15:38:57 2024 +0100
+++ b/tests/test-verify.t	Wed Nov 20 15:53:19 2024 +0100
@@ -321,7 +321,8 @@
   $ cat start b > .hg/store/data/a.i
 
   $ hg verify -q
-   a@1: broken revlog! (index a is corrupted)
+   a@1: broken revlog! (index a is corrupted) (no-rust !)
+   a@1: broken revlog! (abort: unexpected inline revlog length) (rust !)
   warning: orphan data file 'data/a.i'
   not checking dirstate because of previous errors
   1 warnings encountered!