rust-index: add fast-path for getting a list of all heads as nodes
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 05 Dec 2023 14:50:05 +0100
changeset 51262 f20c4b307a5a
parent 51261 9088c6d65ef6
child 51263 5b4995b40db0
rust-index: add fast-path for getting a list of all heads as nodes This avoids a lot of back-and-forth between Python and Rust. We forgo adding a fast-path in the `filteredchangelog` case yet. If it shows up in profiling, we might add the variant with a filter.
mercurial/repoview.py
mercurial/revlog.py
rust/hg-cpython/src/revlog.rs
--- a/mercurial/repoview.py	Wed Nov 29 23:22:51 2023 -0500
+++ b/mercurial/repoview.py	Tue Dec 05 14:50:05 2023 +0100
@@ -305,6 +305,10 @@
             raise error.FilteredIndexError(rev)
         return revs
 
+    def _head_node_ids(self):
+        # no Rust fast path implemented yet, so just loop in Python
+        return [self.node(r) for r in self.headrevs()]
+
     def headrevs(self, revs=None):
         if revs is None:
             try:
--- a/mercurial/revlog.py	Wed Nov 29 23:22:51 2023 -0500
+++ b/mercurial/revlog.py	Tue Dec 05 14:50:05 2023 +0100
@@ -2362,6 +2362,12 @@
             ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
         return [r for r, val in enumerate(ishead) if val]
 
+    def _head_node_ids(self):
+        try:
+            return self.index.head_node_ids()
+        except AttributeError:
+            return [self.node(r) for r in self.headrevs()]
+
     def heads(self, start=None, stop=None):
         """return the list of all nodes that have no children
 
@@ -2373,8 +2379,7 @@
         if start is None and stop is None:
             if not len(self):
                 return [self.nullid]
-            return [self.node(r) for r in self.headrevs()]
-
+            return self._head_node_ids()
         if start is None:
             start = nullrev
         else:
--- a/rust/hg-cpython/src/revlog.rs	Wed Nov 29 23:22:51 2023 -0500
+++ b/rust/hg-cpython/src/revlog.rs	Tue Dec 05 14:50:05 2023 +0100
@@ -307,6 +307,12 @@
         Ok(rust_res)
     }
 
+    /// get head nodeids
+    def head_node_ids(&self) -> PyResult<PyObject> {
+        let rust_res = self.inner_head_node_ids(py)?;
+        Ok(rust_res)
+    }
+
     /// get filtered head revisions
     def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
         let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
@@ -774,6 +780,32 @@
         })
     }
 
+    fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
+        let index = &*self.index(py).borrow();
+
+        // We don't use the shortcut here, as it's actually slower to loop
+        // through the cached `PyList` than to re-do the whole computation for
+        // large lists, which are the performance sensitive ones anyway.
+        let head_revs = index.head_revs().map_err(|e| graph_error(py, e))?;
+        let res: Vec<_> = head_revs
+            .iter()
+            .map(|r| {
+                PyBytes::new(
+                    py,
+                    index
+                        .node(*r)
+                        .expect("rev should have been in the index")
+                        .as_bytes(),
+                )
+                .into_object()
+            })
+            .collect();
+
+        self.cache_new_heads_py_list(head_revs, py);
+
+        Ok(PyList::new(py, &res).into_object())
+    }
+
     fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> {
         let index = &*self.index(py).borrow();
         if let Some(new_heads) =