head-revs: add a native implementation of the `stop_rev` parameter
This does not add too much complexity to the native code and help with
branchmap v3 performance.
Note that the final conversion of the heads from native-code to Python is still
too costly, especially in Rust. In addition the current caching around headrevs
is too simple and fragile. However these are an unrelated problem.
### benchmark.name = hg.command.unbundle
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.
issue6528 = disabled
# benchmark.variants.resource-usage = default
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
# benchmark.variants.source = unbundle
# benchmark.variants.validate = default
# benchmark.variants.verbosity = quiet
## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.233711 ~~~~~
branch-v3 before: 0.239857 (+2.63%, +0.01)
branch-v3 after: 0.239558 (+2.50%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.235230 ~~~~~
branch-v3 before: 0.240972 (+2.44%, +0.01)
branch-v3 after: 0.239917 (+1.99%, +0.00)
## data-env-vars.name = netbeans-2018-08-01-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.255586 ~~~~~
branch-v3 before: 0.268560 (+5.08%, +0.01)
branch-v3 after: 0.262261 (+2.61%, +0.01)
## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.339010 ~~~~~
branch-v3 before: 0.349389 (+3.06%, +0.01)
branch-v3 after: 0.348247 (+2.72%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.346525 ~~~~~
branch-v3 before: 0.355661 (+2.64%, +0.01)
branch-v3 after: 0.350906 (+1.26%, +0.00)
## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.380202 ~~~~~
branch-v3 before: 0.408851 (+7.54%, +0.03)
branch-v3 after: 0.406511 (+6.92%, +0.03)
## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.412165 ~~~~~
branch-v3 before: 0.427782 (+3.79%, +0.02)
branch-v3 after: 0.422595 (+2.53%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.412397 ~~~~~
branch-v3 before: 0.422354 (+2.41%, +0.01)
branch-v3 after: 0.421079 (+2.11%, +0.01)
## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.429501 ~~~~~
branch-v3 before: 0.443197 (+3.19%, +0.01)
branch-v3 after: 0.449432 (+4.64%, +0.02)
## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 3.403171 ~~~~~
branch-v3 before: 3.819477 (+12.23%, +0.42)
branch-v3 after: 3.658482 (+7.50%, +0.26)
# bin-env-vars.hg.flavor = rust
branch-v2: 3.454876 ~~~~~
branch-v3 before: 3.590284 (+3.92%, +0.14)
branch-v3 after: 3.545843 (+2.63%, +0.09)
## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 3.465435 ~~~~~
branch-v3 before: 3.633278 (+4.84%, +0.17)
branch-v3 after: 3.556074 (+2.62%, +0.09)
--- a/mercurial/cext/revlog.c Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/cext/revlog.c Fri Sep 27 03:55:40 2024 +0200
@@ -1237,22 +1237,36 @@
static PyObject *index_headrevs(indexObject *self, PyObject *args)
{
- Py_ssize_t i, j, len;
+ Py_ssize_t i, j, len, stop_rev;
char *nothead = NULL;
PyObject *heads = NULL;
PyObject *filter = NULL;
PyObject *filteredrevs = Py_None;
-
- if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
+ PyObject *stop_rev_obj = Py_None;
+
+ if (!PyArg_ParseTuple(args, "|OO", &filteredrevs, &stop_rev_obj)) {
return NULL;
}
- if (self->headrevs && filteredrevs == self->filteredrevs)
+ len = index_length(self);
+ if (stop_rev_obj == Py_None) {
+ stop_rev = len;
+ } else {
+ stop_rev = PyLong_AsSsize_t(stop_rev_obj);
+ if (stop_rev == -1 && PyErr_Occurred() != NULL) {
+ return NULL;
+ }
+ }
+
+ if (self->headrevs && filteredrevs == self->filteredrevs &&
+ stop_rev == len)
return list_copy(self->headrevs);
- Py_DECREF(self->filteredrevs);
- self->filteredrevs = filteredrevs;
- Py_INCREF(filteredrevs);
+ if (stop_rev == len) {
+ Py_DECREF(self->filteredrevs);
+ self->filteredrevs = filteredrevs;
+ Py_INCREF(filteredrevs);
+ }
if (filteredrevs != Py_None) {
filter = PyObject_GetAttrString(filteredrevs, "__contains__");
@@ -1264,11 +1278,10 @@
}
}
- len = index_length(self);
heads = PyList_New(0);
if (heads == NULL)
goto bail;
- if (len == 0) {
+ if (stop_rev == 0) {
if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) {
Py_XDECREF(nullid);
goto bail;
@@ -1276,13 +1289,13 @@
goto done;
}
- nothead = calloc(len, 1);
+ nothead = calloc(stop_rev, 1);
if (nothead == NULL) {
PyErr_NoMemory();
goto bail;
}
- for (i = len - 1; i >= 0; i--) {
+ for (i = stop_rev - 1; i >= 0; i--) {
int isfiltered;
int parents[2];
@@ -1304,7 +1317,7 @@
}
}
- if (index_get_parents(self, i, parents, (int)len - 1) < 0)
+ if (index_get_parents(self, i, parents, (int)stop_rev - 1) < 0)
goto bail;
for (j = 0; j < 2; j++) {
if (parents[j] >= 0)
@@ -1312,7 +1325,7 @@
}
}
- for (i = 0; i < len; i++) {
+ for (i = 0; i < stop_rev; i++) {
if (nothead[i])
continue;
if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) {
@@ -1321,10 +1334,14 @@
}
done:
- self->headrevs = heads;
Py_XDECREF(filter);
free(nothead);
- return list_copy(self->headrevs);
+ if (stop_rev == len) {
+ self->headrevs = heads;
+ return list_copy(self->headrevs);
+ } else {
+ return heads;
+ }
bail:
Py_XDECREF(filter);
Py_XDECREF(heads);
--- a/mercurial/pure/parsers.py Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/pure/parsers.py Fri Sep 27 03:55:40 2024 +0200
@@ -696,8 +696,10 @@
p = p[revlog_constants.INDEX_HEADER.size :]
return p
- def headrevs(self, excluded_revs=None):
+ def headrevs(self, excluded_revs=None, stop_rev=None):
count = len(self)
+ if stop_rev is not None:
+ count = min(count, stop_rev)
if not count:
return [nullrev]
# we won't iter over filtered rev so nobody is a head at start
--- a/mercurial/repoview.py Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/repoview.py Fri Sep 27 03:55:40 2024 +0200
@@ -312,11 +312,8 @@
def headrevs(self, revs=None, stop_rev=None):
if revs is None:
- filtered = self.filteredrevs
- if stop_rev is not None and stop_rev < len(self.index):
- filtered = set(self.filteredrevs)
- filtered.update(range(stop_rev, len(self.index)))
- return self.index.headrevs(filtered)
+ return self.index.headrevs(self.filteredrevs, stop_rev)
+ # it is ignored from here, so better double check we passed the right argument
assert stop_rev is None
revs = self._checknofilteredinrevs(revs)
--- a/mercurial/revlog.py Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/revlog.py Fri Sep 27 03:55:40 2024 +0200
@@ -2382,12 +2382,7 @@
def headrevs(self, revs=None, stop_rev=None):
if revs is None:
- excluded = None
- if stop_rev is not None and stop_rev < len(self.index):
- # We should let the native code handle it, but that a
- # simple enough first step.
- excluded = range(stop_rev, len(self.index))
- return self.index.headrevs(excluded)
+ return self.index.headrevs(None, stop_rev)
assert stop_rev is None
if rustdagop is not None and self.index.rust_ext_compat:
return rustdagop.headrevs(self.index, revs)
--- a/mercurial/revlogutils/revlogv0.py Thu Sep 26 01:52:09 2024 +0200
+++ b/mercurial/revlogutils/revlogv0.py Fri Sep 27 03:55:40 2024 +0200
@@ -111,8 +111,10 @@
)
return INDEX_ENTRY_V0.pack(*e2)
- def headrevs(self, excluded_revs=None):
+ def headrevs(self, excluded_revs=None, stop_rev=None):
count = len(self)
+ if stop_rev is not None:
+ count = min(count, stop_rev)
if not count:
return [node.nullrev]
# we won't iter over filtered rev so nobody is a head at start
--- a/rust/hg-core/src/revlog/index.rs Thu Sep 26 01:52:09 2024 +0200
+++ b/rust/hg-core/src/revlog/index.rs Fri Sep 27 03:55:40 2024 +0200
@@ -541,14 +541,15 @@
/// Return the head revisions of this index
pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
- self.head_revs_filtered(&HashSet::new(), false)
+ self.head_revs_advanced(&HashSet::new(), None, false)
.map(|h| h.unwrap())
}
/// Return the head revisions of this index
- pub fn head_revs_filtered(
+ pub fn head_revs_advanced(
&self,
filtered_revs: &HashSet<Revision>,
+ stop_rev: Option<Revision>,
py_shortcut: bool,
) -> Result<Option<Vec<Revision>>, GraphError> {
{
@@ -560,6 +561,7 @@
let self_filtered_revs = &guard.1;
if !self_head_revs.is_empty()
&& filtered_revs == self_filtered_revs
+ && stop_rev.is_none()
{
if py_shortcut {
// Don't copy the revs since we've already cached them
@@ -571,32 +573,42 @@
}
}
- let as_vec = if self.is_empty() {
- vec![NULL_REVISION]
+ let (as_vec, cachable) = if self.is_empty() {
+ (vec![NULL_REVISION], true)
} else {
- let mut not_heads = bitvec![0; self.len()];
+ let length: usize = match stop_rev {
+ Some(r) => r.0 as usize,
+ None => self.len(),
+ };
+ let cachable = self.len() == length;
+ let mut not_heads = bitvec![0; length];
dagops::retain_heads_fast(
self,
not_heads.as_mut_bitslice(),
filtered_revs,
)?;
- not_heads
- .into_iter()
- .enumerate()
- .filter_map(|(idx, is_not_head)| {
- if is_not_head {
- None
- } else {
- Some(Revision(idx as BaseRevision))
- }
- })
- .collect()
+ (
+ not_heads
+ .into_iter()
+ .enumerate()
+ .filter_map(|(idx, is_not_head)| {
+ if is_not_head {
+ None
+ } else {
+ Some(Revision(idx as BaseRevision))
+ }
+ })
+ .collect(),
+ cachable,
+ )
};
- *self
- .head_revs
- .write()
- .expect("RwLock on Index.head_revs should not be poisoned") =
- (as_vec.to_owned(), filtered_revs.to_owned());
+ if cachable {
+ *self
+ .head_revs
+ .write()
+ .expect("RwLock on Index.head_revs should not be poisoned") =
+ (as_vec.to_owned(), filtered_revs.to_owned());
+ }
Ok(Some(as_vec))
}
@@ -604,7 +616,7 @@
pub fn head_revs_shortcut(
&self,
) -> Result<Option<Vec<Revision>>, GraphError> {
- self.head_revs_filtered(&HashSet::new(), true)
+ self.head_revs_advanced(&HashSet::new(), None, true)
}
/// Return the heads removed and added by advancing from `begin` to `end`.
--- a/rust/hg-cpython/src/revlog.rs Thu Sep 26 01:52:09 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs Fri Sep 27 03:55:40 2024 +0200
@@ -27,7 +27,10 @@
revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
};
-use std::{cell::RefCell, collections::HashMap};
+use std::{
+ cell::RefCell,
+ collections::{HashMap, HashSet},
+};
use vcsgraph::graph::Graph as VCSGraph;
pub struct PySharedIndex {
@@ -305,12 +308,13 @@
/// get head revisions
def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
- let filtered_revs = match &args.len(py) {
- 0 => Ok(py.None()),
- 1 => Ok(args.get_item(py, 0)),
+ let (filtered_revs, stop_rev) = match &args.len(py) {
+ 0 => Ok((py.None(), py.None())),
+ 1 => Ok((args.get_item(py, 0), py.None())),
+ 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
_ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
}?;
- self.inner_headrevs(py, &filtered_revs)
+ self.inner_headrevs(py, &filtered_revs, &stop_rev)
}
/// get head nodeids
@@ -821,29 +825,62 @@
&self,
py: Python,
filtered_revs: &PyObject,
+ stop_rev: &PyObject,
) -> PyResult<PyObject> {
let index = &*self.index(py).borrow();
-
- let from_core = match filtered_revs.is_none(py) {
- true => index.head_revs_shortcut(),
+ let stop_rev = match stop_rev.is_none(py) {
false => {
+ let rev = stop_rev.extract::<i32>(py)?;
+ if 0 <= rev && rev < index.len() as BaseRevision {
+ Some(Revision(rev))
+ } else {
+ None
+ }
+ }
+ true => None,
+ };
+ let from_core = match (filtered_revs.is_none(py), stop_rev.is_none()) {
+ (true, true) => index.head_revs_shortcut(),
+ (true, false) => {
+ index.head_revs_advanced(&HashSet::new(), stop_rev, false)
+ }
+ _ => {
let filtered_revs =
rev_pyiter_collect(py, filtered_revs, index)?;
- index.head_revs_filtered(&filtered_revs, true)
+ index.head_revs_advanced(
+ &filtered_revs,
+ stop_rev,
+ stop_rev.is_none(),
+ )
}
};
- if let Some(new_heads) = from_core.map_err(|e| graph_error(py, e))? {
- self.cache_new_heads_py_list(&new_heads, py);
- }
+ if stop_rev.is_some() {
+ // we don't cache result for now
+ let new_heads = from_core
+ .map_err(|e| graph_error(py, e))?
+ .expect("this case should not be cached yet");
- Ok(self
- .head_revs_py_list(py)
- .borrow()
- .as_ref()
- .expect("head revs should be cached")
- .clone_ref(py)
- .into_object())
+ let as_vec: Vec<PyObject> = new_heads
+ .iter()
+ .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
+ .collect();
+ Ok(PyList::new(py, &as_vec).into_object())
+ } else {
+ if let Some(new_heads) =
+ from_core.map_err(|e| graph_error(py, e))?
+ {
+ self.cache_new_heads_py_list(&new_heads, py);
+ }
+
+ Ok(self
+ .head_revs_py_list(py)
+ .borrow()
+ .as_ref()
+ .expect("head revs should be cached")
+ .clone_ref(py)
+ .into_object())
+ }
}
fn check_revision(