Mercurial > hg
changeset 51979:609700e5d8df
head-revs: add a native implementation of the `stop_rev` parameter
This does not add too much complexity to the native code and help with
branchmap v3 performance.
Note that the final conversion of the heads from native-code to Python is still
too costly, especially in Rust. In addition the current caching around headrevs
is too simple and fragile. However these are an unrelated problem.
### benchmark.name = hg.command.unbundle
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.issue6528 = disabled
# benchmark.variants.resource-usage = default
# benchmark.variants.reuse-external-delta-parent = yes
# benchmark.variants.revs = any-1-extra-rev
# benchmark.variants.source = unbundle
# benchmark.variants.validate = default
# benchmark.variants.verbosity = quiet
## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.233711 ~~~~~
branch-v3 before: 0.239857 (+2.63%, +0.01)
branch-v3 after: 0.239558 (+2.50%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.235230 ~~~~~
branch-v3 before: 0.240972 (+2.44%, +0.01)
branch-v3 after: 0.239917 (+1.99%, +0.00)
## data-env-vars.name = netbeans-2018-08-01-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.255586 ~~~~~
branch-v3 before: 0.268560 (+5.08%, +0.01)
branch-v3 after: 0.262261 (+2.61%, +0.01)
## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.339010 ~~~~~
branch-v3 before: 0.349389 (+3.06%, +0.01)
branch-v3 after: 0.348247 (+2.72%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.346525 ~~~~~
branch-v3 before: 0.355661 (+2.64%, +0.01)
branch-v3 after: 0.350906 (+1.26%, +0.00)
## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.380202 ~~~~~
branch-v3 before: 0.408851 (+7.54%, +0.03)
branch-v3 after: 0.406511 (+6.92%, +0.03)
## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 0.412165 ~~~~~
branch-v3 before: 0.427782 (+3.79%, +0.02)
branch-v3 after: 0.422595 (+2.53%, +0.01)
# bin-env-vars.hg.flavor = rust
branch-v2: 0.412397 ~~~~~
branch-v3 before: 0.422354 (+2.41%, +0.01)
branch-v3 after: 0.421079 (+2.11%, +0.01)
## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 0.429501 ~~~~~
branch-v3 before: 0.443197 (+3.19%, +0.01)
branch-v3 after: 0.449432 (+4.64%, +0.02)
## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog
# bin-env-vars.hg.flavor = default
branch-v2: 3.403171 ~~~~~
branch-v3 before: 3.819477 (+12.23%, +0.42)
branch-v3 after: 3.658482 (+7.50%, +0.26)
# bin-env-vars.hg.flavor = rust
branch-v2: 3.454876 ~~~~~
branch-v3 before: 3.590284 (+3.92%, +0.14)
branch-v3 after: 3.545843 (+2.63%, +0.09)
## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# bin-env-vars.hg.flavor = rust
branch-v2: 3.465435 ~~~~~
branch-v3 before: 3.633278 (+4.84%, +0.17)
branch-v3 after: 3.556074 (+2.62%, +0.09)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 27 Sep 2024 03:55:40 +0200 |
parents | 5d1e6f447d2d |
children | 3e135e79b7f7 |
files | mercurial/cext/revlog.c mercurial/pure/parsers.py mercurial/repoview.py mercurial/revlog.py mercurial/revlogutils/revlogv0.py rust/hg-core/src/revlog/index.rs rust/hg-cpython/src/revlog.rs |
diffstat | 7 files changed, 131 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/cext/revlog.c Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/cext/revlog.c Fri Sep 27 03:55:40 2024 +0200 @@ -1237,22 +1237,36 @@ static PyObject *index_headrevs(indexObject *self, PyObject *args) { - Py_ssize_t i, j, len; + Py_ssize_t i, j, len, stop_rev; char *nothead = NULL; PyObject *heads = NULL; PyObject *filter = NULL; PyObject *filteredrevs = Py_None; - - if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) { + PyObject *stop_rev_obj = Py_None; + + if (!PyArg_ParseTuple(args, "|OO", &filteredrevs, &stop_rev_obj)) { return NULL; } - if (self->headrevs && filteredrevs == self->filteredrevs) + len = index_length(self); + if (stop_rev_obj == Py_None) { + stop_rev = len; + } else { + stop_rev = PyLong_AsSsize_t(stop_rev_obj); + if (stop_rev == -1 && PyErr_Occurred() != NULL) { + return NULL; + } + } + + if (self->headrevs && filteredrevs == self->filteredrevs && + stop_rev == len) return list_copy(self->headrevs); - Py_DECREF(self->filteredrevs); - self->filteredrevs = filteredrevs; - Py_INCREF(filteredrevs); + if (stop_rev == len) { + Py_DECREF(self->filteredrevs); + self->filteredrevs = filteredrevs; + Py_INCREF(filteredrevs); + } if (filteredrevs != Py_None) { filter = PyObject_GetAttrString(filteredrevs, "__contains__"); @@ -1264,11 +1278,10 @@ } } - len = index_length(self); heads = PyList_New(0); if (heads == NULL) goto bail; - if (len == 0) { + if (stop_rev == 0) { if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) { Py_XDECREF(nullid); goto bail; @@ -1276,13 +1289,13 @@ goto done; } - nothead = calloc(len, 1); + nothead = calloc(stop_rev, 1); if (nothead == NULL) { PyErr_NoMemory(); goto bail; } - for (i = len - 1; i >= 0; i--) { + for (i = stop_rev - 1; i >= 0; i--) { int isfiltered; int parents[2]; @@ -1304,7 +1317,7 @@ } } - if (index_get_parents(self, i, parents, (int)len - 1) < 0) + if (index_get_parents(self, i, parents, (int)stop_rev - 1) < 0) goto bail; for (j = 0; j < 2; j++) { if (parents[j] >= 0) @@ -1312,7 +1325,7 @@ } } - for (i = 0; i < len; i++) { + for (i = 0; i < stop_rev; i++) { if (nothead[i]) continue; if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) { @@ -1321,10 +1334,14 @@ } done: - self->headrevs = heads; Py_XDECREF(filter); free(nothead); - return list_copy(self->headrevs); + if (stop_rev == len) { + self->headrevs = heads; + return list_copy(self->headrevs); + } else { + return heads; + } bail: Py_XDECREF(filter); Py_XDECREF(heads);
--- a/mercurial/pure/parsers.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/pure/parsers.py Fri Sep 27 03:55:40 2024 +0200 @@ -696,8 +696,10 @@ p = p[revlog_constants.INDEX_HEADER.size :] return p - def headrevs(self, excluded_revs=None): + def headrevs(self, excluded_revs=None, stop_rev=None): count = len(self) + if stop_rev is not None: + count = min(count, stop_rev) if not count: return [nullrev] # we won't iter over filtered rev so nobody is a head at start
--- a/mercurial/repoview.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/repoview.py Fri Sep 27 03:55:40 2024 +0200 @@ -312,11 +312,8 @@ def headrevs(self, revs=None, stop_rev=None): if revs is None: - filtered = self.filteredrevs - if stop_rev is not None and stop_rev < len(self.index): - filtered = set(self.filteredrevs) - filtered.update(range(stop_rev, len(self.index))) - return self.index.headrevs(filtered) + return self.index.headrevs(self.filteredrevs, stop_rev) + # it is ignored from here, so better double check we passed the right argument assert stop_rev is None revs = self._checknofilteredinrevs(revs)
--- a/mercurial/revlog.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/revlog.py Fri Sep 27 03:55:40 2024 +0200 @@ -2382,12 +2382,7 @@ def headrevs(self, revs=None, stop_rev=None): if revs is None: - excluded = None - if stop_rev is not None and stop_rev < len(self.index): - # We should let the native code handle it, but that a - # simple enough first step. - excluded = range(stop_rev, len(self.index)) - return self.index.headrevs(excluded) + return self.index.headrevs(None, stop_rev) assert stop_rev is None if rustdagop is not None and self.index.rust_ext_compat: return rustdagop.headrevs(self.index, revs)
--- a/mercurial/revlogutils/revlogv0.py Thu Sep 26 01:52:09 2024 +0200 +++ b/mercurial/revlogutils/revlogv0.py Fri Sep 27 03:55:40 2024 +0200 @@ -111,8 +111,10 @@ ) return INDEX_ENTRY_V0.pack(*e2) - def headrevs(self, excluded_revs=None): + def headrevs(self, excluded_revs=None, stop_rev=None): count = len(self) + if stop_rev is not None: + count = min(count, stop_rev) if not count: return [node.nullrev] # we won't iter over filtered rev so nobody is a head at start
--- a/rust/hg-core/src/revlog/index.rs Thu Sep 26 01:52:09 2024 +0200 +++ b/rust/hg-core/src/revlog/index.rs Fri Sep 27 03:55:40 2024 +0200 @@ -541,14 +541,15 @@ /// Return the head revisions of this index pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> { - self.head_revs_filtered(&HashSet::new(), false) + self.head_revs_advanced(&HashSet::new(), None, false) .map(|h| h.unwrap()) } /// Return the head revisions of this index - pub fn head_revs_filtered( + pub fn head_revs_advanced( &self, filtered_revs: &HashSet<Revision>, + stop_rev: Option<Revision>, py_shortcut: bool, ) -> Result<Option<Vec<Revision>>, GraphError> { { @@ -560,6 +561,7 @@ let self_filtered_revs = &guard.1; if !self_head_revs.is_empty() && filtered_revs == self_filtered_revs + && stop_rev.is_none() { if py_shortcut { // Don't copy the revs since we've already cached them @@ -571,32 +573,42 @@ } } - let as_vec = if self.is_empty() { - vec![NULL_REVISION] + let (as_vec, cachable) = if self.is_empty() { + (vec![NULL_REVISION], true) } else { - let mut not_heads = bitvec![0; self.len()]; + let length: usize = match stop_rev { + Some(r) => r.0 as usize, + None => self.len(), + }; + let cachable = self.len() == length; + let mut not_heads = bitvec![0; length]; dagops::retain_heads_fast( self, not_heads.as_mut_bitslice(), filtered_revs, )?; - not_heads - .into_iter() - .enumerate() - .filter_map(|(idx, is_not_head)| { - if is_not_head { - None - } else { - Some(Revision(idx as BaseRevision)) - } - }) - .collect() + ( + not_heads + .into_iter() + .enumerate() + .filter_map(|(idx, is_not_head)| { + if is_not_head { + None + } else { + Some(Revision(idx as BaseRevision)) + } + }) + .collect(), + cachable, + ) }; - *self - .head_revs - .write() - .expect("RwLock on Index.head_revs should not be poisoned") = - (as_vec.to_owned(), filtered_revs.to_owned()); + if cachable { + *self + .head_revs + .write() + .expect("RwLock on Index.head_revs should not be poisoned") = + (as_vec.to_owned(), filtered_revs.to_owned()); + } Ok(Some(as_vec)) } @@ -604,7 +616,7 @@ pub fn head_revs_shortcut( &self, ) -> Result<Option<Vec<Revision>>, GraphError> { - self.head_revs_filtered(&HashSet::new(), true) + self.head_revs_advanced(&HashSet::new(), None, true) } /// Return the heads removed and added by advancing from `begin` to `end`.
--- a/rust/hg-cpython/src/revlog.rs Thu Sep 26 01:52:09 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Fri Sep 27 03:55:40 2024 +0200 @@ -27,7 +27,10 @@ revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION, }; -use std::{cell::RefCell, collections::HashMap}; +use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, +}; use vcsgraph::graph::Graph as VCSGraph; pub struct PySharedIndex { @@ -305,12 +308,13 @@ /// get head revisions def headrevs(&self, *args, **_kw) -> PyResult<PyObject> { - let filtered_revs = match &args.len(py) { - 0 => Ok(py.None()), - 1 => Ok(args.get_item(py, 0)), + let (filtered_revs, stop_rev) = match &args.len(py) { + 0 => Ok((py.None(), py.None())), + 1 => Ok((args.get_item(py, 0), py.None())), + 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))), _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")), }?; - self.inner_headrevs(py, &filtered_revs) + self.inner_headrevs(py, &filtered_revs, &stop_rev) } /// get head nodeids @@ -821,29 +825,62 @@ &self, py: Python, filtered_revs: &PyObject, + stop_rev: &PyObject, ) -> PyResult<PyObject> { let index = &*self.index(py).borrow(); - - let from_core = match filtered_revs.is_none(py) { - true => index.head_revs_shortcut(), + let stop_rev = match stop_rev.is_none(py) { false => { + let rev = stop_rev.extract::<i32>(py)?; + if 0 <= rev && rev < index.len() as BaseRevision { + Some(Revision(rev)) + } else { + None + } + } + true => None, + }; + let from_core = match (filtered_revs.is_none(py), stop_rev.is_none()) { + (true, true) => index.head_revs_shortcut(), + (true, false) => { + index.head_revs_advanced(&HashSet::new(), stop_rev, false) + } + _ => { let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?; - index.head_revs_filtered(&filtered_revs, true) + index.head_revs_advanced( + &filtered_revs, + stop_rev, + stop_rev.is_none(), + ) } }; - if let Some(new_heads) = from_core.map_err(|e| graph_error(py, e))? { - self.cache_new_heads_py_list(&new_heads, py); - } + if stop_rev.is_some() { + // we don't cache result for now + let new_heads = from_core + .map_err(|e| graph_error(py, e))? + .expect("this case should not be cached yet"); - Ok(self - .head_revs_py_list(py) - .borrow() - .as_ref() - .expect("head revs should be cached") - .clone_ref(py) - .into_object()) + let as_vec: Vec<PyObject> = new_heads + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + Ok(PyList::new(py, &as_vec).into_object()) + } else { + if let Some(new_heads) = + from_core.map_err(|e| graph_error(py, e))? + { + self.cache_new_heads_py_list(&new_heads, py); + } + + Ok(self + .head_revs_py_list(py) + .borrow() + .as_ref() + .expect("head revs should be cached") + .clone_ref(py) + .into_object()) + } } fn check_revision(