head-revs: add a native implementation of the `stop_rev` parameter
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 27 Sep 2024 03:55:40 +0200
changeset 51979 609700e5d8df
parent 51978 5d1e6f447d2d
child 51980 3e135e79b7f7
head-revs: add a native implementation of the `stop_rev` parameter This does not add too much complexity to the native code and help with branchmap v3 performance. Note that the final conversion of the heads from native-code to Python is still too costly, especially in Rust. In addition the current caching around headrevs is too simple and fragile. However these are an unrelated problem. ### benchmark.name = hg.command.unbundle # bin-env-vars.hg.py-re2-module = default # benchmark.variants.issue6528 = disabled # benchmark.variants.resource-usage = default # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle # benchmark.variants.validate = default # benchmark.variants.verbosity = quiet ## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.233711 ~~~~~ branch-v3 before: 0.239857 (+2.63%, +0.01) branch-v3 after: 0.239558 (+2.50%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.235230 ~~~~~ branch-v3 before: 0.240972 (+2.44%, +0.01) branch-v3 after: 0.239917 (+1.99%, +0.00) ## data-env-vars.name = netbeans-2018-08-01-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.255586 ~~~~~ branch-v3 before: 0.268560 (+5.08%, +0.01) branch-v3 after: 0.262261 (+2.61%, +0.01) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.339010 ~~~~~ branch-v3 before: 0.349389 (+3.06%, +0.01) branch-v3 after: 0.348247 (+2.72%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.346525 ~~~~~ branch-v3 before: 0.355661 (+2.64%, +0.01) branch-v3 after: 0.350906 (+1.26%, +0.00) ## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.380202 ~~~~~ branch-v3 before: 0.408851 (+7.54%, +0.03) branch-v3 after: 0.406511 (+6.92%, +0.03) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.412165 ~~~~~ branch-v3 before: 0.427782 (+3.79%, +0.02) branch-v3 after: 0.422595 (+2.53%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.412397 ~~~~~ branch-v3 before: 0.422354 (+2.41%, +0.01) branch-v3 after: 0.421079 (+2.11%, +0.01) ## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.429501 ~~~~~ branch-v3 before: 0.443197 (+3.19%, +0.01) branch-v3 after: 0.449432 (+4.64%, +0.02) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 3.403171 ~~~~~ branch-v3 before: 3.819477 (+12.23%, +0.42) branch-v3 after: 3.658482 (+7.50%, +0.26) # bin-env-vars.hg.flavor = rust branch-v2: 3.454876 ~~~~~ branch-v3 before: 3.590284 (+3.92%, +0.14) branch-v3 after: 3.545843 (+2.63%, +0.09) ## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 3.465435 ~~~~~ branch-v3 before: 3.633278 (+4.84%, +0.17) branch-v3 after: 3.556074 (+2.62%, +0.09)
mercurial/cext/revlog.c
mercurial/pure/parsers.py
mercurial/repoview.py
mercurial/revlog.py
mercurial/revlogutils/revlogv0.py
rust/hg-core/src/revlog/index.rs
rust/hg-cpython/src/revlog.rs
--- a/mercurial/cext/revlog.c	Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/cext/revlog.c	Fri Sep 27 03:55:40 2024 +0200
@@ -1237,22 +1237,36 @@
 
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
 {
-	Py_ssize_t i, j, len;
+	Py_ssize_t i, j, len, stop_rev;
 	char *nothead = NULL;
 	PyObject *heads = NULL;
 	PyObject *filter = NULL;
 	PyObject *filteredrevs = Py_None;
-
-	if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
+	PyObject *stop_rev_obj = Py_None;
+
+	if (!PyArg_ParseTuple(args, "|OO", &filteredrevs, &stop_rev_obj)) {
 		return NULL;
 	}
 
-	if (self->headrevs && filteredrevs == self->filteredrevs)
+	len = index_length(self);
+	if (stop_rev_obj == Py_None) {
+		stop_rev = len;
+	} else {
+		stop_rev = PyLong_AsSsize_t(stop_rev_obj);
+		if (stop_rev == -1 && PyErr_Occurred() != NULL) {
+			return NULL;
+		}
+	}
+
+	if (self->headrevs && filteredrevs == self->filteredrevs &&
+	    stop_rev == len)
 		return list_copy(self->headrevs);
 
-	Py_DECREF(self->filteredrevs);
-	self->filteredrevs = filteredrevs;
-	Py_INCREF(filteredrevs);
+	if (stop_rev == len) {
+		Py_DECREF(self->filteredrevs);
+		self->filteredrevs = filteredrevs;
+		Py_INCREF(filteredrevs);
+	}
 
 	if (filteredrevs != Py_None) {
 		filter = PyObject_GetAttrString(filteredrevs, "__contains__");
@@ -1264,11 +1278,10 @@
 		}
 	}
 
-	len = index_length(self);
 	heads = PyList_New(0);
 	if (heads == NULL)
 		goto bail;
-	if (len == 0) {
+	if (stop_rev == 0) {
 		if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) {
 			Py_XDECREF(nullid);
 			goto bail;
@@ -1276,13 +1289,13 @@
 		goto done;
 	}
 
-	nothead = calloc(len, 1);
+	nothead = calloc(stop_rev, 1);
 	if (nothead == NULL) {
 		PyErr_NoMemory();
 		goto bail;
 	}
 
-	for (i = len - 1; i >= 0; i--) {
+	for (i = stop_rev - 1; i >= 0; i--) {
 		int isfiltered;
 		int parents[2];
 
@@ -1304,7 +1317,7 @@
 			}
 		}
 
-		if (index_get_parents(self, i, parents, (int)len - 1) < 0)
+		if (index_get_parents(self, i, parents, (int)stop_rev - 1) < 0)
 			goto bail;
 		for (j = 0; j < 2; j++) {
 			if (parents[j] >= 0)
@@ -1312,7 +1325,7 @@
 		}
 	}
 
-	for (i = 0; i < len; i++) {
+	for (i = 0; i < stop_rev; i++) {
 		if (nothead[i])
 			continue;
 		if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) {
@@ -1321,10 +1334,14 @@
 	}
 
 done:
-	self->headrevs = heads;
 	Py_XDECREF(filter);
 	free(nothead);
-	return list_copy(self->headrevs);
+	if (stop_rev == len) {
+		self->headrevs = heads;
+		return list_copy(self->headrevs);
+	} else {
+		return heads;
+	}
 bail:
 	Py_XDECREF(filter);
 	Py_XDECREF(heads);
--- a/mercurial/pure/parsers.py	Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/pure/parsers.py	Fri Sep 27 03:55:40 2024 +0200
@@ -696,8 +696,10 @@
             p = p[revlog_constants.INDEX_HEADER.size :]
         return p
 
-    def headrevs(self, excluded_revs=None):
+    def headrevs(self, excluded_revs=None, stop_rev=None):
         count = len(self)
+        if stop_rev is not None:
+            count = min(count, stop_rev)
         if not count:
             return [nullrev]
         # we won't iter over filtered rev so nobody is a head at start
--- a/mercurial/repoview.py	Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/repoview.py	Fri Sep 27 03:55:40 2024 +0200
@@ -312,11 +312,8 @@
 
     def headrevs(self, revs=None, stop_rev=None):
         if revs is None:
-            filtered = self.filteredrevs
-            if stop_rev is not None and stop_rev < len(self.index):
-                filtered = set(self.filteredrevs)
-                filtered.update(range(stop_rev, len(self.index)))
-            return self.index.headrevs(filtered)
+            return self.index.headrevs(self.filteredrevs, stop_rev)
+        # it is ignored from here, so better double check we passed the right argument
         assert stop_rev is None
 
         revs = self._checknofilteredinrevs(revs)
--- a/mercurial/revlog.py	Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/revlog.py	Fri Sep 27 03:55:40 2024 +0200
@@ -2382,12 +2382,7 @@
 
     def headrevs(self, revs=None, stop_rev=None):
         if revs is None:
-            excluded = None
-            if stop_rev is not None and stop_rev < len(self.index):
-                # We should let the native code handle it, but that a
-                # simple enough first step.
-                excluded = range(stop_rev, len(self.index))
-            return self.index.headrevs(excluded)
+            return self.index.headrevs(None, stop_rev)
         assert stop_rev is None
         if rustdagop is not None and self.index.rust_ext_compat:
             return rustdagop.headrevs(self.index, revs)
--- a/mercurial/revlogutils/revlogv0.py	Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/revlogutils/revlogv0.py	Fri Sep 27 03:55:40 2024 +0200
@@ -111,8 +111,10 @@
         )
         return INDEX_ENTRY_V0.pack(*e2)
 
-    def headrevs(self, excluded_revs=None):
+    def headrevs(self, excluded_revs=None, stop_rev=None):
         count = len(self)
+        if stop_rev is not None:
+            count = min(count, stop_rev)
         if not count:
             return [node.nullrev]
         # we won't iter over filtered rev so nobody is a head at start
--- a/rust/hg-core/src/revlog/index.rs	Thu Sep 26 01:52:09 2024 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Fri Sep 27 03:55:40 2024 +0200
@@ -541,14 +541,15 @@
 
     /// Return the head revisions of this index
     pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
-        self.head_revs_filtered(&HashSet::new(), false)
+        self.head_revs_advanced(&HashSet::new(), None, false)
             .map(|h| h.unwrap())
     }
 
     /// Return the head revisions of this index
-    pub fn head_revs_filtered(
+    pub fn head_revs_advanced(
         &self,
         filtered_revs: &HashSet<Revision>,
+        stop_rev: Option<Revision>,
         py_shortcut: bool,
     ) -> Result<Option<Vec<Revision>>, GraphError> {
         {
@@ -560,6 +561,7 @@
             let self_filtered_revs = &guard.1;
             if !self_head_revs.is_empty()
                 && filtered_revs == self_filtered_revs
+                && stop_rev.is_none()
             {
                 if py_shortcut {
                     // Don't copy the revs since we've already cached them
@@ -571,32 +573,42 @@
             }
         }
 
-        let as_vec = if self.is_empty() {
-            vec![NULL_REVISION]
+        let (as_vec, cachable) = if self.is_empty() {
+            (vec![NULL_REVISION], true)
         } else {
-            let mut not_heads = bitvec![0; self.len()];
+            let length: usize = match stop_rev {
+                Some(r) => r.0 as usize,
+                None => self.len(),
+            };
+            let cachable = self.len() == length;
+            let mut not_heads = bitvec![0; length];
             dagops::retain_heads_fast(
                 self,
                 not_heads.as_mut_bitslice(),
                 filtered_revs,
             )?;
-            not_heads
-                .into_iter()
-                .enumerate()
-                .filter_map(|(idx, is_not_head)| {
-                    if is_not_head {
-                        None
-                    } else {
-                        Some(Revision(idx as BaseRevision))
-                    }
-                })
-                .collect()
+            (
+                not_heads
+                    .into_iter()
+                    .enumerate()
+                    .filter_map(|(idx, is_not_head)| {
+                        if is_not_head {
+                            None
+                        } else {
+                            Some(Revision(idx as BaseRevision))
+                        }
+                    })
+                    .collect(),
+                cachable,
+            )
         };
-        *self
-            .head_revs
-            .write()
-            .expect("RwLock on Index.head_revs should not be poisoned") =
-            (as_vec.to_owned(), filtered_revs.to_owned());
+        if cachable {
+            *self
+                .head_revs
+                .write()
+                .expect("RwLock on Index.head_revs should not be poisoned") =
+                (as_vec.to_owned(), filtered_revs.to_owned());
+        }
         Ok(Some(as_vec))
     }
 
@@ -604,7 +616,7 @@
     pub fn head_revs_shortcut(
         &self,
     ) -> Result<Option<Vec<Revision>>, GraphError> {
-        self.head_revs_filtered(&HashSet::new(), true)
+        self.head_revs_advanced(&HashSet::new(), None, true)
     }
 
     /// Return the heads removed and added by advancing from `begin` to `end`.
--- a/rust/hg-cpython/src/revlog.rs	Thu Sep 26 01:52:09 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Fri Sep 27 03:55:40 2024 +0200
@@ -27,7 +27,10 @@
     revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
     BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
 };
-use std::{cell::RefCell, collections::HashMap};
+use std::{
+    cell::RefCell,
+    collections::{HashMap, HashSet},
+};
 use vcsgraph::graph::Graph as VCSGraph;
 
 pub struct PySharedIndex {
@@ -305,12 +308,13 @@
 
     /// get head revisions
     def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
-        let filtered_revs = match &args.len(py) {
-             0 => Ok(py.None()),
-             1 => Ok(args.get_item(py, 0)),
+        let (filtered_revs, stop_rev) = match &args.len(py) {
+             0 => Ok((py.None(), py.None())),
+             1 => Ok((args.get_item(py, 0), py.None())),
+             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
              _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
         }?;
-        self.inner_headrevs(py, &filtered_revs)
+        self.inner_headrevs(py, &filtered_revs, &stop_rev)
     }
 
     /// get head nodeids
@@ -821,29 +825,62 @@
         &self,
         py: Python,
         filtered_revs: &PyObject,
+        stop_rev: &PyObject,
     ) -> PyResult<PyObject> {
         let index = &*self.index(py).borrow();
-
-        let from_core = match filtered_revs.is_none(py) {
-            true => index.head_revs_shortcut(),
+        let stop_rev = match stop_rev.is_none(py) {
             false => {
+                let rev = stop_rev.extract::<i32>(py)?;
+                if 0 <= rev && rev < index.len() as BaseRevision {
+                    Some(Revision(rev))
+                } else {
+                    None
+                }
+            }
+            true => None,
+        };
+        let from_core = match (filtered_revs.is_none(py), stop_rev.is_none()) {
+            (true, true) => index.head_revs_shortcut(),
+            (true, false) => {
+                index.head_revs_advanced(&HashSet::new(), stop_rev, false)
+            }
+            _ => {
                 let filtered_revs =
                     rev_pyiter_collect(py, filtered_revs, index)?;
-                index.head_revs_filtered(&filtered_revs, true)
+                index.head_revs_advanced(
+                    &filtered_revs,
+                    stop_rev,
+                    stop_rev.is_none(),
+                )
             }
         };
 
-        if let Some(new_heads) = from_core.map_err(|e| graph_error(py, e))? {
-            self.cache_new_heads_py_list(&new_heads, py);
-        }
+        if stop_rev.is_some() {
+            // we don't cache result for now
+            let new_heads = from_core
+                .map_err(|e| graph_error(py, e))?
+                .expect("this case should not be cached yet");
 
-        Ok(self
-            .head_revs_py_list(py)
-            .borrow()
-            .as_ref()
-            .expect("head revs should be cached")
-            .clone_ref(py)
-            .into_object())
+            let as_vec: Vec<PyObject> = new_heads
+                .iter()
+                .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
+                .collect();
+            Ok(PyList::new(py, &as_vec).into_object())
+        } else {
+            if let Some(new_heads) =
+                from_core.map_err(|e| graph_error(py, e))?
+            {
+                self.cache_new_heads_py_list(&new_heads, py);
+            }
+
+            Ok(self
+                .head_revs_py_list(py)
+                .borrow()
+                .as_ref()
+                .expect("head revs should be cached")
+                .clone_ref(py)
+                .into_object())
+        }
     }
 
     fn check_revision(