Mercurial > hg
changeset 52176:72bc29f01570
revlog: add glue to use a pure-Rust VFS
This will save us a lot of calling back into Python, which is always
horribly expensive.
We are now faster in all benchmarked cases except for `log --patch`
specifically on mozilla-try. Fixing this will happen in a later patch.
```
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.760765 ~~~~~
5559d7e63ec3: 1.555513 (-11.66%, -0.21)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 62.848869 ~~~~~
5559d7e63ec3: 58.113051 (-7.54%, -4.74)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 3.173532 ~~~~~
5559d7e63ec3: 3.543591 (+11.66%, +0.37)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 1.214698 ~~~~~
5559d7e63ec3: 1.192478 (-1.83%, -0.02)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 56.205474 ~~~~~
5559d7e63ec3: 51.520074 (-8.34%, -4.69)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 2.105419 ~~~~~
5559d7e63ec3: 2.051849 (-2.54%, -0.05)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.309960 ~~~~~
5559d7e63ec3: 0.299035 (-3.52%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.849832 ~~~~~
5559d7e63ec3: 1.805076 (-2.42%, -0.04)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 0.289521 ~~~~~
5559d7e63ec3: 0.279889 (-3.33%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.332270 ~~~~~
5559d7e63ec3: 0.323324 (-2.69%, -0.01)
```
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 29 Jul 2024 20:39:34 +0200 |
parents | bd43465af568 |
children | 1da6995835b4 |
files | mercurial/revlog.py rust/hg-cpython/src/revlog.rs rust/hg-cpython/src/vfs.rs tests/test-journal-exists.t tests/test-permissions.t |
diffstat | 5 files changed, 135 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlog.py Mon Jul 29 20:35:44 2024 +0200 +++ b/mercurial/revlog.py Mon Jul 29 20:39:34 2024 +0200 @@ -1793,6 +1793,17 @@ if self._format_version != REVLOGV1: use_rust_index = False + if hasattr(self.opener, "fncache"): + vfs = self.opener.vfs + if not self.opener.uses_dotencode: + use_rust_index = False + if not isinstance(vfs, vfsmod.vfs): + # Be cautious since we don't support other vfs + use_rust_index = False + else: + # Rust only supports repos with fncache + use_rust_index = False + self._parse_index = parse_index_v1 if self._format_version == REVLOGV0: self._parse_index = revlogv0.parse_index_v0 @@ -1828,8 +1839,26 @@ default_compression_header = self._docket.default_compression_header if self.uses_rust: + vfs_is_readonly = False + fncache = None + + if hasattr(self.opener, "vfs"): + vfs = self.opener + if isinstance(vfs, vfsmod.readonlyvfs): + vfs_is_readonly = True + vfs = vfs.vfs + fncache = vfs.fncache + vfs = vfs.vfs + else: + vfs = self.opener + + vfs_base = vfs.base + assert fncache is not None, "Rust only supports repos with fncache" + self._inner = rustrevlog.InnerRevlog( - opener=RustVFSWrapper(self.opener), + vfs_base=vfs_base, + fncache=fncache, + vfs_is_readonly=vfs_is_readonly, index_data=index, index_file=self._indexfile, data_file=self._datafile,
--- a/rust/hg-cpython/src/revlog.rs Mon Jul 29 20:35:44 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Mon Jul 29 20:39:34 2024 +0200 @@ -10,7 +10,6 @@ conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, pybytes_deref::{PyBufferDeref, PyBytesDeref}, utils::{node_from_py_bytes, node_from_py_object}, - vfs::PyVfs, PyRevision, }; use cpython::{ @@ -22,24 +21,29 @@ }; use hg::{ errors::HgError, + fncache::FnCache, index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, - revlog::compression::CompressionConfig, - revlog::inner_revlog::InnerRevlog as CoreInnerRevlog, - revlog::inner_revlog::RevisionBuffer, - revlog::options::{ - RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, - RevlogOpenOptions, + revlog::{ + compression::CompressionConfig, + inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer}, + nodemap::NodeMap, + options::{ + RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, + RevlogOpenOptions, + }, + Graph, NodePrefix, RevlogError, RevlogIndex, }, - revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, transaction::Transaction, utils::files::{get_bytes_from_path, get_path_from_bytes}, + vfs::FnCacheVfs, BaseRevision, Node, Revision, RevlogType, UncheckedRevision, NULL_REVISION, }; use std::{ cell::{Cell, RefCell}, collections::{HashMap, HashSet}, + sync::atomic::{AtomicBool, Ordering}, sync::OnceLock, }; use vcsgraph::graph::Graph as VCSGraph; @@ -647,6 +651,67 @@ } }); +struct PyFnCache { + fncache: PyObject, +} +impl PyFnCache { + fn new(fncache: PyObject) -> Self { + Self { fncache } + } +} + +impl Clone for PyFnCache { + fn clone(&self) -> Self { + let gil = Python::acquire_gil(); + let py = gil.python(); + Self { + fncache: self.fncache.clone_ref(py), + } + } +} + +/// Cache whether the fncache is loaded to avoid Python round-trip every time. +/// Once the fncache is loaded, it stays loaded unless we're in a very +/// long-running process, none of which we actually support for now. +static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false); + +impl FnCache for PyFnCache { + fn is_loaded(&self) -> bool { + if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) { + return true; + } + let gil = Python::acquire_gil(); + let py = gil.python(); + // TODO raise in case of error? + let is_loaded = self + .fncache + .getattr(py, "is_loaded") + .ok() + .map(|o| { + o.extract::<bool>(py) + .expect("is_loaded returned something other than a bool") + }) + .unwrap_or(false); + if is_loaded { + FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed); + } + is_loaded + } + fn add(&self, path: &std::path::Path) { + let gil = Python::acquire_gil(); + let py = gil.python(); + // TODO raise in case of error? + self.fncache + .call_method( + py, + "add", + (PyBytes::new(py, &get_bytes_from_path(path)),), + None, + ) + .ok(); + } +} + py_class!(pub class InnerRevlog |py| { @shared data inner: CoreInnerRevlog; data nt: RefCell<Option<CoreNodeTree>>; @@ -661,7 +726,9 @@ def __new__( _cls, - opener: PyObject, + vfs_base: PyObject, + fncache: PyObject, + vfs_is_readonly: bool, index_data: PyObject, index_file: PyObject, data_file: PyObject, @@ -676,7 +743,9 @@ ) -> PyResult<Self> { Self::inner_new( py, - opener, + vfs_base, + fncache, + vfs_is_readonly, index_data, index_file, data_file, @@ -1874,7 +1943,9 @@ impl InnerRevlog { pub fn inner_new( py: Python, - opener: PyObject, + vfs_base: PyObject, + fncache: PyObject, + vfs_is_readonly: bool, index_data: PyObject, index_file: PyObject, data_file: PyObject, @@ -1887,7 +1958,6 @@ _default_compression_header: PyObject, revlog_type: usize, ) -> PyResult<Self> { - let vfs = Box::new(PyVfs::new(py, opener)?); let index_file = get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) .to_owned(); @@ -1907,12 +1977,20 @@ delta_config, feature_config, ); + // Safety: we keep the buffer around inside the class as `index_mmap` let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; let index = hg::index::Index::new(bytes, options.index_header()) .map_err(|e| revlog_error_from_msg(py, e))?; + + let base = &vfs_base.extract::<PyBytes>(py)?; + let base = get_path_from_bytes(base.data(py)).to_owned(); let core = CoreInnerRevlog::new( - vfs, + Box::new(FnCacheVfs::new( + base, + vfs_is_readonly, + Box::new(PyFnCache::new(fncache)), + )), index, index_file, data_file,
--- a/rust/hg-cpython/src/vfs.rs Mon Jul 29 20:35:44 2024 +0200 +++ b/rust/hg-cpython/src/vfs.rs Mon Jul 29 20:39:34 2024 +0200 @@ -20,6 +20,7 @@ /// Wrapper around a Python VFS object to call back into Python from `hg-core`. pub struct PyVfs { inner: PyObject, + base: PathBuf, } impl Clone for PyVfs { @@ -28,13 +29,21 @@ let py = gil.python(); Self { inner: self.inner.clone_ref(py), + base: self.base.clone(), } } } impl PyVfs { - pub fn new(_py: Python, py_vfs: PyObject) -> PyResult<Self> { - Ok(Self { inner: py_vfs }) + pub fn new( + _py: Python, + py_vfs: PyObject, + base: PathBuf, + ) -> PyResult<Self> { + Ok(Self { + inner: py_vfs, + base, + }) } fn inner_open( @@ -288,7 +297,6 @@ } fn base(&self) -> &Path { - // This will only be useful in a later patch - todo!() + &self.base } }
--- a/tests/test-journal-exists.t Mon Jul 29 20:35:44 2024 +0200 +++ b/tests/test-journal-exists.t Mon Jul 29 20:39:34 2024 +0200 @@ -50,7 +50,7 @@ adding changesets transaction abort! rollback completed - abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob) + abort: abort: when writing $TESTTMP/repo/foo/.hg/store/00changelog.i: $EACCES$ [50] #else $ hg -R foo unbundle repo.hg
--- a/tests/test-permissions.t Mon Jul 29 20:35:44 2024 +0200 +++ b/tests/test-permissions.t Mon Jul 29 20:39:34 2024 +0200 @@ -36,7 +36,7 @@ $ echo barber > a #if rust $ hg commit -m "2" - abort: failed to call opener: [Errno 13] $EACCES$: b'$TESTTMP/t/.hg/store/data/a.i' + abort: abort: when writing $TESTTMP/t/.hg/store/data/a.i: $EACCES$ [50] #else $ hg commit -m "2"