# HG changeset patch # User Pierre-Yves David # Date 1727402140 -7200 # Node ID 609700e5d8dfc3a7cc11bba637e99b1d435db54b # Parent 5d1e6f447d2d527bb7846a186b52dcfc12ce90a3 head-revs: add a native implementation of the `stop_rev` parameter This does not add too much complexity to the native code and help with branchmap v3 performance. Note that the final conversion of the heads from native-code to Python is still too costly, especially in Rust. In addition the current caching around headrevs is too simple and fragile. However these are an unrelated problem. ### benchmark.name = hg.command.unbundle # bin-env-vars.hg.py-re2-module = default # benchmark.variants.issue6528 = disabled # benchmark.variants.resource-usage = default # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle # benchmark.variants.validate = default # benchmark.variants.verbosity = quiet ## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.233711 ~~~~~ branch-v3 before: 0.239857 (+2.63%, +0.01) branch-v3 after: 0.239558 (+2.50%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.235230 ~~~~~ branch-v3 before: 0.240972 (+2.44%, +0.01) branch-v3 after: 0.239917 (+1.99%, +0.00) ## data-env-vars.name = netbeans-2018-08-01-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.255586 ~~~~~ branch-v3 before: 0.268560 (+5.08%, +0.01) branch-v3 after: 0.262261 (+2.61%, +0.01) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.339010 ~~~~~ branch-v3 before: 0.349389 (+3.06%, +0.01) branch-v3 after: 0.348247 (+2.72%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.346525 ~~~~~ branch-v3 before: 0.355661 (+2.64%, +0.01) branch-v3 after: 0.350906 (+1.26%, +0.00) ## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.380202 ~~~~~ branch-v3 before: 0.408851 (+7.54%, +0.03) branch-v3 after: 0.406511 (+6.92%, +0.03) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.412165 ~~~~~ branch-v3 before: 0.427782 (+3.79%, +0.02) branch-v3 after: 0.422595 (+2.53%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.412397 ~~~~~ branch-v3 before: 0.422354 (+2.41%, +0.01) branch-v3 after: 0.421079 (+2.11%, +0.01) ## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.429501 ~~~~~ branch-v3 before: 0.443197 (+3.19%, +0.01) branch-v3 after: 0.449432 (+4.64%, +0.02) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 3.403171 ~~~~~ branch-v3 before: 3.819477 (+12.23%, +0.42) branch-v3 after: 3.658482 (+7.50%, +0.26) # bin-env-vars.hg.flavor = rust branch-v2: 3.454876 ~~~~~ branch-v3 before: 3.590284 (+3.92%, +0.14) branch-v3 after: 3.545843 (+2.63%, +0.09) ## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 3.465435 ~~~~~ branch-v3 before: 3.633278 (+4.84%, +0.17) branch-v3 after: 3.556074 (+2.62%, +0.09) diff -r 5d1e6f447d2d -r 609700e5d8df mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/cext/revlog.c Fri Sep 27 03:55:40 2024 +0200 @@ -1237,22 +1237,36 @@ static PyObject *index_headrevs(indexObject *self, PyObject *args) { - Py_ssize_t i, j, len; + Py_ssize_t i, j, len, stop_rev; char *nothead = NULL; PyObject *heads = NULL; PyObject *filter = NULL; PyObject *filteredrevs = Py_None; - - if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) { + PyObject *stop_rev_obj = Py_None; + + if (!PyArg_ParseTuple(args, "|OO", &filteredrevs, &stop_rev_obj)) { return NULL; } - if (self->headrevs && filteredrevs == self->filteredrevs) + len = index_length(self); + if (stop_rev_obj == Py_None) { + stop_rev = len; + } else { + stop_rev = PyLong_AsSsize_t(stop_rev_obj); + if (stop_rev == -1 && PyErr_Occurred() != NULL) { + return NULL; + } + } + + if (self->headrevs && filteredrevs == self->filteredrevs && + stop_rev == len) return list_copy(self->headrevs); - Py_DECREF(self->filteredrevs); - self->filteredrevs = filteredrevs; - Py_INCREF(filteredrevs); + if (stop_rev == len) { + Py_DECREF(self->filteredrevs); + self->filteredrevs = filteredrevs; + Py_INCREF(filteredrevs); + } if (filteredrevs != Py_None) { filter = PyObject_GetAttrString(filteredrevs, "__contains__"); @@ -1264,11 +1278,10 @@ } } - len = index_length(self); heads = PyList_New(0); if (heads == NULL) goto bail; - if (len == 0) { + if (stop_rev == 0) { if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) { Py_XDECREF(nullid); goto bail; @@ -1276,13 +1289,13 @@ goto done; } - nothead = calloc(len, 1); + nothead = calloc(stop_rev, 1); if (nothead == NULL) { PyErr_NoMemory(); goto bail; } - for (i = len - 1; i >= 0; i--) { + for (i = stop_rev - 1; i >= 0; i--) { int isfiltered; int parents[2]; @@ -1304,7 +1317,7 @@ } } - if (index_get_parents(self, i, parents, (int)len - 1) < 0) + if (index_get_parents(self, i, parents, (int)stop_rev - 1) < 0) goto bail; for (j = 0; j < 2; j++) { if (parents[j] >= 0) @@ -1312,7 +1325,7 @@ } } - for (i = 0; i < len; i++) { + for (i = 0; i < stop_rev; i++) { if (nothead[i]) continue; if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) { @@ -1321,10 +1334,14 @@ } done: - self->headrevs = heads; Py_XDECREF(filter); free(nothead); - return list_copy(self->headrevs); + if (stop_rev == len) { + self->headrevs = heads; + return list_copy(self->headrevs); + } else { + return heads; + } bail: Py_XDECREF(filter); Py_XDECREF(heads); diff -r 5d1e6f447d2d -r 609700e5d8df mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/pure/parsers.py Fri Sep 27 03:55:40 2024 +0200 @@ -696,8 +696,10 @@ p = p[revlog_constants.INDEX_HEADER.size :] return p - def headrevs(self, excluded_revs=None): + def headrevs(self, excluded_revs=None, stop_rev=None): count = len(self) + if stop_rev is not None: + count = min(count, stop_rev) if not count: return [nullrev] # we won't iter over filtered rev so nobody is a head at start diff -r 5d1e6f447d2d -r 609700e5d8df mercurial/repoview.py --- a/mercurial/repoview.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/repoview.py Fri Sep 27 03:55:40 2024 +0200 @@ -312,11 +312,8 @@ def headrevs(self, revs=None, stop_rev=None): if revs is None: - filtered = self.filteredrevs - if stop_rev is not None and stop_rev < len(self.index): - filtered = set(self.filteredrevs) - filtered.update(range(stop_rev, len(self.index))) - return self.index.headrevs(filtered) + return self.index.headrevs(self.filteredrevs, stop_rev) + # it is ignored from here, so better double check we passed the right argument assert stop_rev is None revs = self._checknofilteredinrevs(revs) diff -r 5d1e6f447d2d -r 609700e5d8df mercurial/revlog.py --- a/mercurial/revlog.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/revlog.py Fri Sep 27 03:55:40 2024 +0200 @@ -2382,12 +2382,7 @@ def headrevs(self, revs=None, stop_rev=None): if revs is None: - excluded = None - if stop_rev is not None and stop_rev < len(self.index): - # We should let the native code handle it, but that a - # simple enough first step. - excluded = range(stop_rev, len(self.index)) - return self.index.headrevs(excluded) + return self.index.headrevs(None, stop_rev) assert stop_rev is None if rustdagop is not None and self.index.rust_ext_compat: return rustdagop.headrevs(self.index, revs) diff -r 5d1e6f447d2d -r 609700e5d8df mercurial/revlogutils/revlogv0.py --- a/mercurial/revlogutils/revlogv0.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/revlogutils/revlogv0.py Fri Sep 27 03:55:40 2024 +0200 @@ -111,8 +111,10 @@ ) return INDEX_ENTRY_V0.pack(*e2) - def headrevs(self, excluded_revs=None): + def headrevs(self, excluded_revs=None, stop_rev=None): count = len(self) + if stop_rev is not None: + count = min(count, stop_rev) if not count: return [node.nullrev] # we won't iter over filtered rev so nobody is a head at start diff -r 5d1e6f447d2d -r 609700e5d8df rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs Thu Sep 26 01:52:09 2024 +0200 +++ b/rust/hg-core/src/revlog/index.rs Fri Sep 27 03:55:40 2024 +0200 @@ -541,14 +541,15 @@ /// Return the head revisions of this index pub fn head_revs(&self) -> Result, GraphError> { - self.head_revs_filtered(&HashSet::new(), false) + self.head_revs_advanced(&HashSet::new(), None, false) .map(|h| h.unwrap()) } /// Return the head revisions of this index - pub fn head_revs_filtered( + pub fn head_revs_advanced( &self, filtered_revs: &HashSet, + stop_rev: Option, py_shortcut: bool, ) -> Result>, GraphError> { { @@ -560,6 +561,7 @@ let self_filtered_revs = &guard.1; if !self_head_revs.is_empty() && filtered_revs == self_filtered_revs + && stop_rev.is_none() { if py_shortcut { // Don't copy the revs since we've already cached them @@ -571,32 +573,42 @@ } } - let as_vec = if self.is_empty() { - vec![NULL_REVISION] + let (as_vec, cachable) = if self.is_empty() { + (vec![NULL_REVISION], true) } else { - let mut not_heads = bitvec![0; self.len()]; + let length: usize = match stop_rev { + Some(r) => r.0 as usize, + None => self.len(), + }; + let cachable = self.len() == length; + let mut not_heads = bitvec![0; length]; dagops::retain_heads_fast( self, not_heads.as_mut_bitslice(), filtered_revs, )?; - not_heads - .into_iter() - .enumerate() - .filter_map(|(idx, is_not_head)| { - if is_not_head { - None - } else { - Some(Revision(idx as BaseRevision)) - } - }) - .collect() + ( + not_heads + .into_iter() + .enumerate() + .filter_map(|(idx, is_not_head)| { + if is_not_head { + None + } else { + Some(Revision(idx as BaseRevision)) + } + }) + .collect(), + cachable, + ) }; - *self - .head_revs - .write() - .expect("RwLock on Index.head_revs should not be poisoned") = - (as_vec.to_owned(), filtered_revs.to_owned()); + if cachable { + *self + .head_revs + .write() + .expect("RwLock on Index.head_revs should not be poisoned") = + (as_vec.to_owned(), filtered_revs.to_owned()); + } Ok(Some(as_vec)) } @@ -604,7 +616,7 @@ pub fn head_revs_shortcut( &self, ) -> Result>, GraphError> { - self.head_revs_filtered(&HashSet::new(), true) + self.head_revs_advanced(&HashSet::new(), None, true) } /// Return the heads removed and added by advancing from `begin` to `end`. diff -r 5d1e6f447d2d -r 609700e5d8df rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Thu Sep 26 01:52:09 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Fri Sep 27 03:55:40 2024 +0200 @@ -27,7 +27,10 @@ revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION, }; -use std::{cell::RefCell, collections::HashMap}; +use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, +}; use vcsgraph::graph::Graph as VCSGraph; pub struct PySharedIndex { @@ -305,12 +308,13 @@ /// get head revisions def headrevs(&self, *args, **_kw) -> PyResult { - let filtered_revs = match &args.len(py) { - 0 => Ok(py.None()), - 1 => Ok(args.get_item(py, 0)), + let (filtered_revs, stop_rev) = match &args.len(py) { + 0 => Ok((py.None(), py.None())), + 1 => Ok((args.get_item(py, 0), py.None())), + 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))), _ => Err(PyErr::new::(py, "too many arguments")), }?; - self.inner_headrevs(py, &filtered_revs) + self.inner_headrevs(py, &filtered_revs, &stop_rev) } /// get head nodeids @@ -821,29 +825,62 @@ &self, py: Python, filtered_revs: &PyObject, + stop_rev: &PyObject, ) -> PyResult { let index = &*self.index(py).borrow(); - - let from_core = match filtered_revs.is_none(py) { - true => index.head_revs_shortcut(), + let stop_rev = match stop_rev.is_none(py) { false => { + let rev = stop_rev.extract::(py)?; + if 0 <= rev && rev < index.len() as BaseRevision { + Some(Revision(rev)) + } else { + None + } + } + true => None, + }; + let from_core = match (filtered_revs.is_none(py), stop_rev.is_none()) { + (true, true) => index.head_revs_shortcut(), + (true, false) => { + index.head_revs_advanced(&HashSet::new(), stop_rev, false) + } + _ => { let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?; - index.head_revs_filtered(&filtered_revs, true) + index.head_revs_advanced( + &filtered_revs, + stop_rev, + stop_rev.is_none(), + ) } }; - if let Some(new_heads) = from_core.map_err(|e| graph_error(py, e))? { - self.cache_new_heads_py_list(&new_heads, py); - } + if stop_rev.is_some() { + // we don't cache result for now + let new_heads = from_core + .map_err(|e| graph_error(py, e))? + .expect("this case should not be cached yet"); - Ok(self - .head_revs_py_list(py) - .borrow() - .as_ref() - .expect("head revs should be cached") - .clone_ref(py) - .into_object()) + let as_vec: Vec = new_heads + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + Ok(PyList::new(py, &as_vec).into_object()) + } else { + if let Some(new_heads) = + from_core.map_err(|e| graph_error(py, e))? + { + self.cache_new_heads_py_list(&new_heads, py); + } + + Ok(self + .head_revs_py_list(py) + .borrow() + .as_ref() + .expect("head revs should be cached") + .clone_ref(py) + .into_object()) + } } fn check_revision(