Mercurial > hg
changeset 52178:bcd4962e0df9
rust-revlog: don't create an in-memory nodemap for filelogs from Python
Explanations inline.
Benchmarks from this change affect positively the only repo that showed this
being a problem:
```
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
default: 62.848869 ~~~~~
before-this-patch: 58.113051 (-7.54%, -4.74)
this-patch: 57.407533 (-8.66%, -5.44)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
default: 3.173532 ~~~~~
before-this-patch: 3.543591 (+11.66%, +0.37)
this-patch: 3.297235 (+3.90%, +0.12)
```
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 31 Jul 2024 13:35:54 +0200 |
parents | 1da6995835b4 |
children | c90e0f65896e |
files | mercurial/revlog.py rust/hg-cpython/src/revlog.rs |
diffstat | 2 files changed, 21 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revlog.py Wed Jul 31 15:02:55 2024 +0200 +++ b/mercurial/revlog.py Wed Jul 31 13:35:54 2024 +0200 @@ -1870,6 +1870,7 @@ chunk_cache=chunk_cache, default_compression_header=default_compression_header, revlog_type=self.target[0], + use_persistent_nodemap=self._nodemap_file is not None, ) self.index = RustIndexProxy(self._inner) self._register_nodemap_info(self.index)
--- a/rust/hg-cpython/src/revlog.rs Wed Jul 31 15:02:55 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Wed Jul 31 13:35:54 2024 +0200 @@ -723,6 +723,7 @@ data head_revs_py_list: RefCell<Option<PyList>>; data head_node_ids_py_list: RefCell<Option<PyList>>; data revision_cache: RefCell<Option<PyObject>>; + data use_persistent_nodemap: bool; def __new__( _cls, @@ -740,6 +741,7 @@ chunk_cache: PyObject, default_compression_header: PyObject, revlog_type: usize, + use_persistent_nodemap: bool, ) -> PyResult<Self> { Self::inner_new( py, @@ -756,7 +758,8 @@ feature_config, chunk_cache, default_compression_header, - revlog_type + revlog_type, + use_persistent_nodemap ) } @@ -1108,10 +1111,23 @@ // -- forwarded index methods -- def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> { + let node = node_from_py_bytes(py, &node)?; + // Filelogs have no persistent nodemaps and are often small, use a + // brute force lookup from the end backwards. If there is a very large + // filelog (automation file that changes every commit etc.), it also + // seems to work quite well for all measured purposes so far. + // + // TODO build an in-memory nodemap if more than 4 queries to the same + // revlog are made? + if !*self.use_persistent_nodemap(py) { + let idx = &self.inner(py).borrow().index; + let res = + idx.rev_from_node_no_persistent_nodemap(node.into()).ok(); + return Ok(res.map(Into::into)) + } let opt = self.get_nodetree(py)?.borrow(); let nt = opt.as_ref().expect("nodetree should be set"); let ridx = &self.inner(py).borrow().index; - let node = node_from_py_bytes(py, &node)?; let rust_rev = nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?; Ok(rust_rev.map(Into::into)) @@ -1957,6 +1973,7 @@ _chunk_cache: PyObject, _default_compression_header: PyObject, revlog_type: usize, + use_persistent_nodemap: bool, ) -> PyResult<Self> { let index_file = get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) @@ -2008,6 +2025,7 @@ RefCell::new(None), RefCell::new(None), RefCell::new(None), + use_persistent_nodemap, ) } }