revlog: add a Rust implementation of `headrevsdiff`
Python implementation of `headrevsdiff` can be very slow in the worst
case compared with the `heads` computation it replaces, since the
latter is done in Rust.
Even the average case of this Python implementation is still
noticeable in the profiles.
This patch makes the computation much much faster by doing it in Rust.
--- a/rust/hg-core/src/revlog/index.rs Thu Dec 21 20:30:03 2023 +0000
+++ b/rust/hg-core/src/revlog/index.rs Mon Feb 12 20:01:27 2024 +0000
@@ -556,6 +556,57 @@
self.head_revs_filtered(&HashSet::new(), true)
}
+ /// Return the heads removed and added by advancing from `begin` to `end`.
+ /// In revset language, we compute:
+ /// - `heads(:begin)-heads(:end)`
+ /// - `heads(:end)-heads(:begin)`
+ pub fn head_revs_diff(
+ &self,
+ begin: Revision,
+ end: Revision,
+ ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
+ let mut heads_added = vec![];
+ let mut heads_removed = vec![];
+
+ let mut acc = HashSet::new();
+ let Revision(begin) = begin;
+ let Revision(end) = end;
+ let mut i = end;
+
+ while i > begin {
+ // acc invariant:
+ // `j` is in the set iff `j <= i` and it has children
+ // among `i+1..end` (inclusive)
+ if !acc.remove(&i) {
+ heads_added.push(Revision(i));
+ }
+ for Revision(parent) in self.parents(Revision(i))? {
+ acc.insert(parent);
+ }
+ i -= 1;
+ }
+
+ // At this point `acc` contains old revisions that gained new children.
+ // We need to check if they had any children before. If not, those
+ // revisions are the removed heads.
+ while !acc.is_empty() {
+ // acc invariant:
+ // `j` is in the set iff `j <= i` and it has children
+ // among `begin+1..end`, but not among `i+1..begin` (inclusive)
+
+ assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
+ if acc.remove(&i) {
+ heads_removed.push(Revision(i));
+ }
+ for Revision(parent) in self.parents(Revision(i))? {
+ acc.remove(&parent);
+ }
+ i -= 1;
+ }
+
+ Ok((heads_removed, heads_added))
+ }
+
/// Return the head revisions of this index
pub fn head_revs_filtered(
&self,
--- a/rust/hg-cpython/src/revlog.rs Thu Dec 21 20:30:03 2023 +0000
+++ b/rust/hg-cpython/src/revlog.rs Mon Feb 12 20:01:27 2024 +0000
@@ -315,6 +315,15 @@
Ok(rust_res)
}
+ /// get diff in head revisions
+ def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rust_res = self.inner_headrevsdiff(
+ py,
+ &args.get_item(py, 0),
+ &args.get_item(py, 1))?;
+ Ok(rust_res)
+ }
+
/// get filtered head revisions
def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
@@ -827,6 +836,38 @@
.into_object())
}
+ fn check_revision(
+ index: &hg::index::Index,
+ rev: UncheckedRevision,
+ py: Python,
+ ) -> PyResult<Revision> {
+ index
+ .check_revision(rev)
+ .ok_or_else(|| rev_not_in_index(py, rev))
+ }
+
+ fn inner_headrevsdiff(
+ &self,
+ py: Python,
+ begin: &PyObject,
+ end: &PyObject,
+ ) -> PyResult<PyObject> {
+ let begin = begin.extract::<BaseRevision>(py)?;
+ let end = end.extract::<BaseRevision>(py)?;
+ let index = &mut *self.index(py).borrow_mut();
+ let begin =
+ Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
+ let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
+ let (removed, added) = index
+ .head_revs_diff(begin, end)
+ .map_err(|e| graph_error(py, e))?;
+ let removed: Vec<_> =
+ removed.into_iter().map(PyRevision::from).collect();
+ let added: Vec<_> = added.into_iter().map(PyRevision::from).collect();
+ let res = (removed, added).to_py_object(py).into_object();
+ Ok(res)
+ }
+
fn inner_headrevsfiltered(
&self,
py: Python,