revlog: add glue to use a pure-Rust VFS
authorRaphaël Gomès <rgomes@octobus.net>
Mon, 29 Jul 2024 20:39:34 +0200
changeset 52176 72bc29f01570
parent 52175 bd43465af568
child 52177 1da6995835b4
revlog: add glue to use a pure-Rust VFS This will save us a lot of calling back into Python, which is always horribly expensive. We are now faster in all benchmarked cases except for `log --patch` specifically on mozilla-try. Fixing this will happen in a later patch. ``` ### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.760765 ~~~~~ 5559d7e63ec3: 1.555513 (-11.66%, -0.21) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 62.848869 ~~~~~ 5559d7e63ec3: 58.113051 (-7.54%, -4.74) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 3.173532 ~~~~~ 5559d7e63ec3: 3.543591 (+11.66%, +0.37) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 1.214698 ~~~~~ 5559d7e63ec3: 1.192478 (-1.83%, -0.02) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 56.205474 ~~~~~ 5559d7e63ec3: 51.520074 (-8.34%, -4.69) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 2.105419 ~~~~~ 5559d7e63ec3: 2.051849 (-2.54%, -0.05) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.309960 ~~~~~ 5559d7e63ec3: 0.299035 (-3.52%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.849832 ~~~~~ 5559d7e63ec3: 1.805076 (-2.42%, -0.04) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 0.289521 ~~~~~ 5559d7e63ec3: 0.279889 (-3.33%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.332270 ~~~~~ 5559d7e63ec3: 0.323324 (-2.69%, -0.01) ```
mercurial/revlog.py
rust/hg-cpython/src/revlog.rs
rust/hg-cpython/src/vfs.rs
tests/test-journal-exists.t
tests/test-permissions.t
--- a/mercurial/revlog.py	Mon Jul 29 20:35:44 2024 +0200
+++ b/mercurial/revlog.py	Mon Jul 29 20:39:34 2024 +0200
@@ -1793,6 +1793,17 @@
             if self._format_version != REVLOGV1:
                 use_rust_index = False
 
+        if hasattr(self.opener, "fncache"):
+            vfs = self.opener.vfs
+            if not self.opener.uses_dotencode:
+                use_rust_index = False
+            if not isinstance(vfs, vfsmod.vfs):
+                # Be cautious since we don't support other vfs
+                use_rust_index = False
+        else:
+            # Rust only supports repos with fncache
+            use_rust_index = False
+
         self._parse_index = parse_index_v1
         if self._format_version == REVLOGV0:
             self._parse_index = revlogv0.parse_index_v0
@@ -1828,8 +1839,26 @@
             default_compression_header = self._docket.default_compression_header
 
         if self.uses_rust:
+            vfs_is_readonly = False
+            fncache = None
+
+            if hasattr(self.opener, "vfs"):
+                vfs = self.opener
+                if isinstance(vfs, vfsmod.readonlyvfs):
+                    vfs_is_readonly = True
+                    vfs = vfs.vfs
+                fncache = vfs.fncache
+                vfs = vfs.vfs
+            else:
+                vfs = self.opener
+
+            vfs_base = vfs.base
+            assert fncache is not None, "Rust only supports repos with fncache"
+
             self._inner = rustrevlog.InnerRevlog(
-                opener=RustVFSWrapper(self.opener),
+                vfs_base=vfs_base,
+                fncache=fncache,
+                vfs_is_readonly=vfs_is_readonly,
                 index_data=index,
                 index_file=self._indexfile,
                 data_file=self._datafile,
--- a/rust/hg-cpython/src/revlog.rs	Mon Jul 29 20:35:44 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Mon Jul 29 20:39:34 2024 +0200
@@ -10,7 +10,6 @@
     conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
     pybytes_deref::{PyBufferDeref, PyBytesDeref},
     utils::{node_from_py_bytes, node_from_py_object},
-    vfs::PyVfs,
     PyRevision,
 };
 use cpython::{
@@ -22,24 +21,29 @@
 };
 use hg::{
     errors::HgError,
+    fncache::FnCache,
     index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
     nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
-    revlog::compression::CompressionConfig,
-    revlog::inner_revlog::InnerRevlog as CoreInnerRevlog,
-    revlog::inner_revlog::RevisionBuffer,
-    revlog::options::{
-        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
-        RevlogOpenOptions,
+    revlog::{
+        compression::CompressionConfig,
+        inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer},
+        nodemap::NodeMap,
+        options::{
+            RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+            RevlogOpenOptions,
+        },
+        Graph, NodePrefix, RevlogError, RevlogIndex,
     },
-    revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
     transaction::Transaction,
     utils::files::{get_bytes_from_path, get_path_from_bytes},
+    vfs::FnCacheVfs,
     BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
     NULL_REVISION,
 };
 use std::{
     cell::{Cell, RefCell},
     collections::{HashMap, HashSet},
+    sync::atomic::{AtomicBool, Ordering},
     sync::OnceLock,
 };
 use vcsgraph::graph::Graph as VCSGraph;
@@ -647,6 +651,67 @@
     }
 });
 
+struct PyFnCache {
+    fncache: PyObject,
+}
+impl PyFnCache {
+    fn new(fncache: PyObject) -> Self {
+        Self { fncache }
+    }
+}
+
+impl Clone for PyFnCache {
+    fn clone(&self) -> Self {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            fncache: self.fncache.clone_ref(py),
+        }
+    }
+}
+
+/// Cache whether the fncache is loaded to avoid Python round-trip every time.
+/// Once the fncache is loaded, it stays loaded unless we're in a very
+/// long-running process, none of which we actually support for now.
+static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false);
+
+impl FnCache for PyFnCache {
+    fn is_loaded(&self) -> bool {
+        if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) {
+            return true;
+        }
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        // TODO raise in case of error?
+        let is_loaded = self
+            .fncache
+            .getattr(py, "is_loaded")
+            .ok()
+            .map(|o| {
+                o.extract::<bool>(py)
+                    .expect("is_loaded returned something other than a bool")
+            })
+            .unwrap_or(false);
+        if is_loaded {
+            FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed);
+        }
+        is_loaded
+    }
+    fn add(&self, path: &std::path::Path) {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        // TODO raise in case of error?
+        self.fncache
+            .call_method(
+                py,
+                "add",
+                (PyBytes::new(py, &get_bytes_from_path(path)),),
+                None,
+            )
+            .ok();
+    }
+}
+
 py_class!(pub class InnerRevlog |py| {
     @shared data inner: CoreInnerRevlog;
     data nt: RefCell<Option<CoreNodeTree>>;
@@ -661,7 +726,9 @@
 
     def __new__(
         _cls,
-        opener: PyObject,
+        vfs_base: PyObject,
+        fncache: PyObject,
+        vfs_is_readonly: bool,
         index_data: PyObject,
         index_file: PyObject,
         data_file: PyObject,
@@ -676,7 +743,9 @@
     ) -> PyResult<Self> {
         Self::inner_new(
             py,
-            opener,
+            vfs_base,
+            fncache,
+            vfs_is_readonly,
             index_data,
             index_file,
             data_file,
@@ -1874,7 +1943,9 @@
 impl InnerRevlog {
     pub fn inner_new(
         py: Python,
-        opener: PyObject,
+        vfs_base: PyObject,
+        fncache: PyObject,
+        vfs_is_readonly: bool,
         index_data: PyObject,
         index_file: PyObject,
         data_file: PyObject,
@@ -1887,7 +1958,6 @@
         _default_compression_header: PyObject,
         revlog_type: usize,
     ) -> PyResult<Self> {
-        let vfs = Box::new(PyVfs::new(py, opener)?);
         let index_file =
             get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
                 .to_owned();
@@ -1907,12 +1977,20 @@
             delta_config,
             feature_config,
         );
+
         // Safety: we keep the buffer around inside the class as `index_mmap`
         let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
         let index = hg::index::Index::new(bytes, options.index_header())
             .map_err(|e| revlog_error_from_msg(py, e))?;
+
+        let base = &vfs_base.extract::<PyBytes>(py)?;
+        let base = get_path_from_bytes(base.data(py)).to_owned();
         let core = CoreInnerRevlog::new(
-            vfs,
+            Box::new(FnCacheVfs::new(
+                base,
+                vfs_is_readonly,
+                Box::new(PyFnCache::new(fncache)),
+            )),
             index,
             index_file,
             data_file,
--- a/rust/hg-cpython/src/vfs.rs	Mon Jul 29 20:35:44 2024 +0200
+++ b/rust/hg-cpython/src/vfs.rs	Mon Jul 29 20:39:34 2024 +0200
@@ -20,6 +20,7 @@
 /// Wrapper around a Python VFS object to call back into Python from `hg-core`.
 pub struct PyVfs {
     inner: PyObject,
+    base: PathBuf,
 }
 
 impl Clone for PyVfs {
@@ -28,13 +29,21 @@
         let py = gil.python();
         Self {
             inner: self.inner.clone_ref(py),
+            base: self.base.clone(),
         }
     }
 }
 
 impl PyVfs {
-    pub fn new(_py: Python, py_vfs: PyObject) -> PyResult<Self> {
-        Ok(Self { inner: py_vfs })
+    pub fn new(
+        _py: Python,
+        py_vfs: PyObject,
+        base: PathBuf,
+    ) -> PyResult<Self> {
+        Ok(Self {
+            inner: py_vfs,
+            base,
+        })
     }
 
     fn inner_open(
@@ -288,7 +297,6 @@
     }
 
     fn base(&self) -> &Path {
-        // This will only be useful in a later patch
-        todo!()
+        &self.base
     }
 }
--- a/tests/test-journal-exists.t	Mon Jul 29 20:35:44 2024 +0200
+++ b/tests/test-journal-exists.t	Mon Jul 29 20:39:34 2024 +0200
@@ -50,7 +50,7 @@
   adding changesets
   transaction abort!
   rollback completed
-  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob)
+  abort: abort: when writing $TESTTMP/repo/foo/.hg/store/00changelog.i: $EACCES$
   [50]
 #else
   $ hg -R foo unbundle repo.hg
--- a/tests/test-permissions.t	Mon Jul 29 20:35:44 2024 +0200
+++ b/tests/test-permissions.t	Mon Jul 29 20:39:34 2024 +0200
@@ -36,7 +36,7 @@
   $ echo barber > a
 #if rust
   $ hg commit -m "2"
-  abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/t/.hg/store/data/a.i'
+  abort: abort: when writing $TESTTMP/t/.hg/store/data/a.i: $EACCES$
   [50]
 #else
   $ hg commit -m "2"