rust-revlog: don't create an in-memory nodemap for filelogs from Python
authorRaphaël Gomès <rgomes@octobus.net>
Wed, 31 Jul 2024 13:35:54 +0200
changeset 52178 bcd4962e0df9
parent 52177 1da6995835b4
child 52179 c90e0f65896e
rust-revlog: don't create an in-memory nodemap for filelogs from Python Explanations inline. Benchmarks from this change affect positively the only repo that showed this being a problem: ``` ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip default: 62.848869 ~~~~~ before-this-patch: 58.113051 (-7.54%, -4.74) this-patch: 57.407533 (-8.66%, -5.44) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none default: 3.173532 ~~~~~ before-this-patch: 3.543591 (+11.66%, +0.37) this-patch: 3.297235 (+3.90%, +0.12) ```
mercurial/revlog.py
rust/hg-cpython/src/revlog.rs
--- a/mercurial/revlog.py	Wed Jul 31 15:02:55 2024 +0200
+++ b/mercurial/revlog.py	Wed Jul 31 13:35:54 2024 +0200
@@ -1870,6 +1870,7 @@
                 chunk_cache=chunk_cache,
                 default_compression_header=default_compression_header,
                 revlog_type=self.target[0],
+                use_persistent_nodemap=self._nodemap_file is not None,
             )
             self.index = RustIndexProxy(self._inner)
             self._register_nodemap_info(self.index)
--- a/rust/hg-cpython/src/revlog.rs	Wed Jul 31 15:02:55 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Wed Jul 31 13:35:54 2024 +0200
@@ -723,6 +723,7 @@
     data head_revs_py_list: RefCell<Option<PyList>>;
     data head_node_ids_py_list: RefCell<Option<PyList>>;
     data revision_cache: RefCell<Option<PyObject>>;
+    data use_persistent_nodemap: bool;
 
     def __new__(
         _cls,
@@ -740,6 +741,7 @@
         chunk_cache: PyObject,
         default_compression_header: PyObject,
         revlog_type: usize,
+        use_persistent_nodemap: bool,
     ) -> PyResult<Self> {
         Self::inner_new(
             py,
@@ -756,7 +758,8 @@
             feature_config,
             chunk_cache,
             default_compression_header,
-            revlog_type
+            revlog_type,
+            use_persistent_nodemap
         )
     }
 
@@ -1108,10 +1111,23 @@
     // -- forwarded index methods --
 
     def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+        let node = node_from_py_bytes(py, &node)?;
+        // Filelogs have no persistent nodemaps and are often small, use a
+        // brute force lookup from the end backwards. If there is a very large
+        // filelog (automation file that changes every commit etc.), it also
+        // seems to work quite well for all measured purposes so far.
+        //
+        // TODO build an in-memory nodemap if more than 4 queries to the same
+        // revlog are made?
+        if !*self.use_persistent_nodemap(py) {
+            let idx = &self.inner(py).borrow().index;
+            let res =
+                idx.rev_from_node_no_persistent_nodemap(node.into()).ok();
+            return Ok(res.map(Into::into))
+        }
         let opt = self.get_nodetree(py)?.borrow();
         let nt = opt.as_ref().expect("nodetree should be set");
         let ridx = &self.inner(py).borrow().index;
-        let node = node_from_py_bytes(py, &node)?;
         let rust_rev =
             nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
         Ok(rust_rev.map(Into::into))
@@ -1957,6 +1973,7 @@
         _chunk_cache: PyObject,
         _default_compression_header: PyObject,
         revlog_type: usize,
+        use_persistent_nodemap: bool,
     ) -> PyResult<Self> {
         let index_file =
             get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
@@ -2008,6 +2025,7 @@
             RefCell::new(None),
             RefCell::new(None),
             RefCell::new(None),
+            use_persistent_nodemap,
         )
     }
 }