Mercurial > hg
changeset 42176:3bdb21bbf791
rust-dagops: range of revisions
This is a Rust implementation for what reachableroots2() does if
includepath is True.
The algorithmic details and performance notes are included in the
documentation comment.
Our main use case for now is a Rust counterpart of the partialdiscovery
object, so we don't really need bindings yet.
Differential Revision: https://phab.mercurial-scm.org/D6229
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Tue, 19 Feb 2019 23:41:57 +0100 |
parents | 84bd6ae2d1f6 |
children | be0733552984 |
files | rust/hg-core/src/dagops.rs |
diffstat | 1 files changed, 92 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dagops.rs Wed Apr 17 10:49:11 2019 -0700 +++ b/rust/hg-core/src/dagops.rs Tue Feb 19 23:41:57 2019 +0100 @@ -13,7 +13,8 @@ //! - Similarly *relative roots* of a collection of `Revision`, we mean //! those whose parents, if any, don't belong to the collection. use super::{Graph, GraphError, Revision, NULL_REVISION}; -use std::collections::HashSet; +use crate::ancestors::AncestorsIterator; +use std::collections::{BTreeSet, HashSet}; fn remove_parents( graph: &impl Graph, @@ -80,6 +81,71 @@ Ok(()) } +/// Compute the topological range between two collections of revisions +/// +/// This is equivalent to the revset `<roots>::<heads>`. +/// +/// Currently, the given `Graph` has to implement `Clone`, which means +/// actually cloning just a reference-counted Python pointer if +/// it's passed over through `rust-cpython`. This is due to the internal +/// use of `AncestorsIterator` +/// +/// # Algorithmic details +/// +/// This is a two-pass swipe inspired from what `reachableroots2` from +/// `mercurial.cext.parsers` does to obtain the same results. +/// +/// - first, we climb up the DAG from `heads` in topological order, keeping +/// them in the vector `heads_ancestors` vector, and adding any element of +/// `roots` we find among them to the resulting range. +/// - Then, we iterate on that recorded vector so that a revision is always +/// emitted after its parents and add all revisions whose parents are already +/// in the range to the results. +/// +/// # Performance notes +/// +/// The main difference with the C implementation is that +/// the latter uses a flat array with bit flags, instead of complex structures +/// like `HashSet`, making it faster in most scenarios. In theory, it's +/// possible that the present implementation could be more memory efficient +/// for very large repositories with many branches. +pub fn range( + graph: &(impl Graph + Clone), + roots: impl IntoIterator<Item = Revision>, + heads: impl IntoIterator<Item = Revision>, +) -> Result<BTreeSet<Revision>, GraphError> { + let mut range = BTreeSet::new(); + let roots: HashSet<Revision> = roots.into_iter().collect(); + let min_root: Revision = match roots.iter().cloned().min() { + None => { + return Ok(range); + } + Some(r) => r, + }; + + // Internally, AncestorsIterator currently maintains a `HashSet` + // of all seen revision, which is also what we record, albeit in an ordered + // way. There's room for improvement on this duplication. + let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?; + let mut heads_ancestors: Vec<Revision> = Vec::new(); + for revres in ait { + let rev = revres?; + if roots.contains(&rev) { + range.insert(rev); + } + heads_ancestors.push(rev); + } + + for rev in heads_ancestors.into_iter().rev() { + for parent in graph.parents(rev)?.iter() { + if *parent != NULL_REVISION && range.contains(parent) { + range.insert(rev); + } + } + } + Ok(range) +} + #[cfg(test)] mod tests { @@ -137,4 +203,29 @@ Ok(()) } + /// Apply `range()` and convert the result into a Vec for easier comparison + fn range_vec( + graph: impl Graph + Clone, + roots: &[Revision], + heads: &[Revision], + ) -> Result<Vec<Revision>, GraphError> { + range(&graph, roots.iter().cloned(), heads.iter().cloned()) + .map(|bs| bs.into_iter().collect()) + } + + #[test] + fn test_range() -> Result<(), GraphError> { + assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]); + assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]); + assert_eq!( + range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?, + vec![5, 10] + ); + assert_eq!( + range_vec(SampleGraph, &[5, 6], &[10, 12])?, + vec![5, 6, 9, 10, 12] + ); + Ok(()) + } + }