rust-revlog: don't create an in-memory nodemap for filelogs from Python
Explanations inline.
Benchmarks from this change affect positively the only repo that showed this
being a problem:
```
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
default: 62.848869 ~~~~~
before-this-patch: 58.113051 (-7.54%, -4.74)
this-patch: 57.407533 (-8.66%, -5.44)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
default: 3.173532 ~~~~~
before-this-patch: 3.543591 (+11.66%, +0.37)
this-patch: 3.297235 (+3.90%, +0.12)
```
--- a/mercurial/revlog.py Wed Jul 31 15:02:55 2024 +0200
+++ b/mercurial/revlog.py Wed Jul 31 13:35:54 2024 +0200
@@ -1870,6 +1870,7 @@
chunk_cache=chunk_cache,
default_compression_header=default_compression_header,
revlog_type=self.target[0],
+ use_persistent_nodemap=self._nodemap_file is not None,
)
self.index = RustIndexProxy(self._inner)
self._register_nodemap_info(self.index)
--- a/rust/hg-cpython/src/revlog.rs Wed Jul 31 15:02:55 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs Wed Jul 31 13:35:54 2024 +0200
@@ -723,6 +723,7 @@
data head_revs_py_list: RefCell<Option<PyList>>;
data head_node_ids_py_list: RefCell<Option<PyList>>;
data revision_cache: RefCell<Option<PyObject>>;
+ data use_persistent_nodemap: bool;
def __new__(
_cls,
@@ -740,6 +741,7 @@
chunk_cache: PyObject,
default_compression_header: PyObject,
revlog_type: usize,
+ use_persistent_nodemap: bool,
) -> PyResult<Self> {
Self::inner_new(
py,
@@ -756,7 +758,8 @@
feature_config,
chunk_cache,
default_compression_header,
- revlog_type
+ revlog_type,
+ use_persistent_nodemap
)
}
@@ -1108,10 +1111,23 @@
// -- forwarded index methods --
def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+ let node = node_from_py_bytes(py, &node)?;
+ // Filelogs have no persistent nodemaps and are often small, use a
+ // brute force lookup from the end backwards. If there is a very large
+ // filelog (automation file that changes every commit etc.), it also
+ // seems to work quite well for all measured purposes so far.
+ //
+ // TODO build an in-memory nodemap if more than 4 queries to the same
+ // revlog are made?
+ if !*self.use_persistent_nodemap(py) {
+ let idx = &self.inner(py).borrow().index;
+ let res =
+ idx.rev_from_node_no_persistent_nodemap(node.into()).ok();
+ return Ok(res.map(Into::into))
+ }
let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().expect("nodetree should be set");
let ridx = &self.inner(py).borrow().index;
- let node = node_from_py_bytes(py, &node)?;
let rust_rev =
nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
Ok(rust_rev.map(Into::into))
@@ -1957,6 +1973,7 @@
_chunk_cache: PyObject,
_default_compression_header: PyObject,
revlog_type: usize,
+ use_persistent_nodemap: bool,
) -> PyResult<Self> {
let index_file =
get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
@@ -2008,6 +2025,7 @@
RefCell::new(None),
RefCell::new(None),
RefCell::new(None),
+ use_persistent_nodemap,
)
}
}