changeset 42176:3bdb21bbf791

rust-dagops: range of revisions This is a Rust implementation for what reachableroots2() does if includepath is True. The algorithmic details and performance notes are included in the documentation comment. Our main use case for now is a Rust counterpart of the partialdiscovery object, so we don't really need bindings yet. Differential Revision: https://phab.mercurial-scm.org/D6229
author Georges Racinet <georges.racinet@octobus.net>
date Tue, 19 Feb 2019 23:41:57 +0100
parents 84bd6ae2d1f6
children be0733552984
files rust/hg-core/src/dagops.rs
diffstat 1 files changed, 92 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dagops.rs	Wed Apr 17 10:49:11 2019 -0700
+++ b/rust/hg-core/src/dagops.rs	Tue Feb 19 23:41:57 2019 +0100
@@ -13,7 +13,8 @@
 //! - Similarly *relative roots* of a collection of `Revision`, we mean
 //!   those whose parents, if any, don't belong to the collection.
 use super::{Graph, GraphError, Revision, NULL_REVISION};
-use std::collections::HashSet;
+use crate::ancestors::AncestorsIterator;
+use std::collections::{BTreeSet, HashSet};
 
 fn remove_parents(
     graph: &impl Graph,
@@ -80,6 +81,71 @@
     Ok(())
 }
 
+/// Compute the topological range between two collections of revisions
+///
+/// This is equivalent to the revset `<roots>::<heads>`.
+///
+/// Currently, the given `Graph` has to implement `Clone`, which means
+/// actually cloning just a reference-counted Python pointer if
+/// it's passed over through `rust-cpython`. This is due to the internal
+/// use of `AncestorsIterator`
+///
+/// # Algorithmic details
+///
+/// This is a two-pass swipe inspired from what `reachableroots2` from
+/// `mercurial.cext.parsers` does to obtain the same results.
+///
+/// - first, we climb up the DAG from `heads` in topological order, keeping
+///   them in the vector `heads_ancestors` vector, and adding any element of
+///   `roots` we find among them to the resulting range.
+/// - Then, we iterate on that recorded vector so that a revision is always
+///   emitted after its parents and add all revisions whose parents are already
+///   in the range to the results.
+///
+/// # Performance notes
+///
+/// The main difference with the C implementation is that
+/// the latter uses a flat array with bit flags, instead of complex structures
+/// like `HashSet`, making it faster in most scenarios. In theory, it's
+/// possible that the present implementation could be more memory efficient
+/// for very large repositories with many branches.
+pub fn range(
+    graph: &(impl Graph + Clone),
+    roots: impl IntoIterator<Item = Revision>,
+    heads: impl IntoIterator<Item = Revision>,
+) -> Result<BTreeSet<Revision>, GraphError> {
+    let mut range = BTreeSet::new();
+    let roots: HashSet<Revision> = roots.into_iter().collect();
+    let min_root: Revision = match roots.iter().cloned().min() {
+        None => {
+            return Ok(range);
+        }
+        Some(r) => r,
+    };
+
+    // Internally, AncestorsIterator currently maintains a `HashSet`
+    // of all seen revision, which is also what we record, albeit in an ordered
+    // way. There's room for improvement on this duplication.
+    let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
+    let mut heads_ancestors: Vec<Revision> = Vec::new();
+    for revres in ait {
+        let rev = revres?;
+        if roots.contains(&rev) {
+            range.insert(rev);
+        }
+        heads_ancestors.push(rev);
+    }
+
+    for rev in heads_ancestors.into_iter().rev() {
+        for parent in graph.parents(rev)?.iter() {
+            if *parent != NULL_REVISION && range.contains(parent) {
+                range.insert(rev);
+            }
+        }
+    }
+    Ok(range)
+}
+
 #[cfg(test)]
 mod tests {
 
@@ -137,4 +203,29 @@
         Ok(())
     }
 
+    /// Apply `range()` and convert the result into a Vec for easier comparison
+    fn range_vec(
+        graph: impl Graph + Clone,
+        roots: &[Revision],
+        heads: &[Revision],
+    ) -> Result<Vec<Revision>, GraphError> {
+        range(&graph, roots.iter().cloned(), heads.iter().cloned())
+            .map(|bs| bs.into_iter().collect())
+    }
+
+    #[test]
+    fn test_range() -> Result<(), GraphError> {
+        assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
+        assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
+        assert_eq!(
+            range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
+            vec![5, 10]
+        );
+        assert_eq!(
+            range_vec(SampleGraph, &[5, 6], &[10, 12])?,
+            vec![5, 6, 9, 10, 12]
+        );
+        Ok(())
+    }
+
 }