rust: translation of missingancestors
authorGeorges Racinet <gracinet@anybox.fr>
Fri, 30 Nov 2018 00:46:55 +0100
changeset 40972 d097dd0afc19
parent 40971 abd7b75e80bc
child 40973 43974cd44967
rust: translation of missingancestors This is as direct as possible a translation of the ancestor.missingancestors Python class in pure Rust. The goal for this changeset is to make it easy to compare with the Python version. We also add to Python tests the cases that helped us develop and debug this implementation. Some possible optimizations are marked along the way as TODO comments Differential Revision: https://phab.mercurial-scm.org/D5416
rust/hg-core/src/ancestors.rs
rust/hg-core/src/lib.rs
tests/test-ancestor.py
tests/test-ancestor.py.out
--- a/rust/hg-core/src/ancestors.rs	Fri Dec 14 18:15:19 2018 +0100
+++ b/rust/hg-core/src/ancestors.rs	Fri Nov 30 00:46:55 2018 +0100
@@ -8,6 +8,7 @@
 //! Rust versions of generic DAG ancestors algorithms for Mercurial
 
 use super::{Graph, GraphError, Revision, NULL_REVISION};
+use std::cmp::max;
 use std::collections::{BinaryHeap, HashSet};
 
 /// Iterator over the ancestors of a given list of revisions
@@ -24,6 +25,11 @@
     stoprev: Revision,
 }
 
+pub struct MissingAncestors<G: Graph> {
+    graph: G,
+    bases: HashSet<Revision>,
+}
+
 impl<G: Graph> AncestorsIterator<G> {
     /// Constructor.
     ///
@@ -131,10 +137,187 @@
     }
 }
 
+impl<G: Graph> MissingAncestors<G> {
+    pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
+        let mut bases: HashSet<Revision> = bases.into_iter().collect();
+        if bases.is_empty() {
+            bases.insert(NULL_REVISION);
+        }
+        MissingAncestors { graph, bases }
+    }
+
+    pub fn has_bases(&self) -> bool {
+        self.bases.iter().any(|&b| b != NULL_REVISION)
+    }
+
+    /// Return a reference to current bases.
+    ///
+    /// This is useful in unit tests, but also setdiscovery.py does
+    /// read the bases attribute of a ancestor.missingancestors instance.
+    pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
+        &self.bases
+    }
+
+    pub fn add_bases(
+        &mut self,
+        new_bases: impl IntoIterator<Item = Revision>,
+    ) {
+        self.bases.extend(new_bases);
+    }
+
+    /// Remove all ancestors of self.bases from the revs set (in place)
+    pub fn remove_ancestors_from(
+        &mut self,
+        revs: &mut HashSet<Revision>,
+    ) -> Result<(), GraphError> {
+        revs.retain(|r| !self.bases.contains(r));
+        // the null revision is always an ancestor
+        revs.remove(&NULL_REVISION);
+        if revs.is_empty() {
+            return Ok(());
+        }
+        // anything in revs > start is definitely not an ancestor of bases
+        // revs <= start need to be investigated
+        // TODO optim: if a missingancestors is to be used several times,
+        // we shouldn't need to iterate each time on bases
+        let start = match self.bases.iter().cloned().max() {
+            Some(m) => m,
+            None => {
+                // bases is empty (shouldn't happen, but let's be safe)
+                return Ok(());
+            }
+        };
+        // whatever happens, we'll keep at least keepcount of them
+        // knowing this gives us a earlier stop condition than
+        // going all the way to the root
+        let keepcount = revs.iter().filter(|r| **r > start).count();
+
+        let mut curr = start;
+        while curr != NULL_REVISION && revs.len() > keepcount {
+            if self.bases.contains(&curr) {
+                revs.remove(&curr);
+                self.add_parents(curr)?;
+            }
+            curr -= 1;
+        }
+        Ok(())
+    }
+
+    /// Add rev's parents to self.bases
+    #[inline]
+    fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
+        // No need to bother the set with inserting NULL_REVISION over and
+        // over
+        for p in self.graph.parents(rev)?.iter().cloned() {
+            if p != NULL_REVISION {
+                self.bases.insert(p);
+            }
+        }
+        Ok(())
+    }
+
+    /// Return all the ancestors of revs that are not ancestors of self.bases
+    ///
+    /// This may include elements from revs.
+    ///
+    /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
+    /// revision number order, which is a topological order.
+    pub fn missing_ancestors(
+        &mut self,
+        revs: impl IntoIterator<Item = Revision>,
+    ) -> Result<Vec<Revision>, GraphError> {
+        // just for convenience and comparison with Python version
+        let bases_visit = &mut self.bases;
+        let mut revs: HashSet<Revision> = revs
+            .into_iter()
+            .filter(|r| !bases_visit.contains(r))
+            .collect();
+        let revs_visit = &mut revs;
+        let mut both_visit: HashSet<Revision> =
+            revs_visit.intersection(&bases_visit).cloned().collect();
+        if revs_visit.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let max_bases =
+            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
+        let max_revs =
+            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
+        let start = max(max_bases, max_revs);
+
+        // TODO heuristics for with_capacity()?
+        let mut missing: Vec<Revision> = Vec::new();
+        for curr in (0..=start).map(|i| start - i) {
+            if revs_visit.is_empty() {
+                break;
+            }
+            if both_visit.contains(&curr) {
+                // curr's parents might have made it into revs_visit through
+                // another path
+                // TODO optim: Rust's HashSet.remove returns a boolean telling
+                // if it happened. This will spare us one set lookup
+                both_visit.remove(&curr);
+                for p in self.graph.parents(curr)?.iter().cloned() {
+                    if p == NULL_REVISION {
+                        continue;
+                    }
+                    revs_visit.remove(&p);
+                    bases_visit.insert(p);
+                    both_visit.insert(p);
+                }
+                continue;
+            }
+            // in Rust, one can't just use mutable variables assignation
+            // to be more straightforward. Instead of Python's
+            // thisvisit and othervisit, we'll differentiate with a boolean
+            let this_visit_is_revs = {
+                if revs_visit.remove(&curr) {
+                    missing.push(curr);
+                    true
+                } else if bases_visit.contains(&curr) {
+                    false
+                } else {
+                    // not an ancestor of revs or bases: ignore
+                    continue;
+                }
+            };
+
+            for p in self.graph.parents(curr)?.iter().cloned() {
+                if p == NULL_REVISION {
+                    continue;
+                }
+                let in_other_visit = if this_visit_is_revs {
+                    bases_visit.contains(&p)
+                } else {
+                    revs_visit.contains(&p)
+                };
+                if in_other_visit || both_visit.contains(&p) {
+                    // p is implicitely in this_visit.
+                    // This means p is or should be in bothvisit
+                    // TODO optim: hence if bothvisit, we look up twice
+                    revs_visit.remove(&p);
+                    bases_visit.insert(p);
+                    both_visit.insert(p);
+                } else {
+                    // visit later
+                    if this_visit_is_revs {
+                        revs_visit.insert(p);
+                    } else {
+                        bases_visit.insert(p);
+                    }
+                }
+            }
+        }
+        missing.reverse();
+        Ok(missing)
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
     use super::*;
+    use std::iter::FromIterator;
 
     #[derive(Clone, Debug)]
     struct Stub;
@@ -252,4 +435,211 @@
             AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
         assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
     }
+
+    #[test]
+    /// Test constructor, add/get bases
+    fn test_missing_bases() {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, [5, 3, 1, 3].iter().cloned());
+        let mut as_vec: Vec<Revision> =
+            missing_ancestors.get_bases().iter().cloned().collect();
+        as_vec.sort();
+        assert_eq!(as_vec, [1, 3, 5]);
+
+        missing_ancestors.add_bases([3, 7, 8].iter().cloned());
+        as_vec = missing_ancestors.get_bases().iter().cloned().collect();
+        as_vec.sort();
+        assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+    }
+
+    fn assert_missing_remove(
+        bases: &[Revision],
+        revs: &[Revision],
+        expected: &[Revision],
+    ) {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, bases.iter().cloned());
+        let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
+        missing_ancestors
+            .remove_ancestors_from(&mut revset)
+            .unwrap();
+        let mut as_vec: Vec<Revision> = revset.into_iter().collect();
+        as_vec.sort();
+        assert_eq!(as_vec.as_slice(), expected);
+    }
+
+    #[test]
+    fn test_missing_remove() {
+        assert_missing_remove(
+            &[1, 2, 3, 4, 7],
+            Vec::from_iter(1..10).as_slice(),
+            &[5, 6, 8, 9],
+        );
+        assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
+        assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
+    }
+
+    fn assert_missing_ancestors(
+        bases: &[Revision],
+        revs: &[Revision],
+        expected: &[Revision],
+    ) {
+        let mut missing_ancestors =
+            MissingAncestors::new(Stub, bases.iter().cloned());
+        let missing = missing_ancestors
+            .missing_ancestors(revs.iter().cloned())
+            .unwrap();
+        assert_eq!(missing.as_slice(), expected);
+    }
+
+    #[test]
+    fn test_missing_ancestors() {
+        // examples taken from test-ancestors.py by having it run
+        // on the same graph (both naive and fast Python algs)
+        assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
+        assert_missing_ancestors(&[11], &[10], &[5, 10]);
+        assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
+    }
+
+    // A Graph represented by a vector whose indices are revisions
+    // and values are parents of the revisions
+    type VecGraph = Vec<[Revision; 2]>;
+
+    impl Graph for VecGraph {
+        fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
+            Ok(self[rev as usize])
+        }
+    }
+
+    /// An interesting case found by a random generator similar to
+    /// the one in test-ancestor.py. An early version of Rust MissingAncestors
+    /// failed this, yet none of the integration tests of the whole suite
+    /// catched it.
+    #[test]
+    fn test_remove_ancestors_from_case1() {
+        let graph: VecGraph = vec![
+            [NULL_REVISION, NULL_REVISION],
+            [0, NULL_REVISION],
+            [1, 0],
+            [2, 1],
+            [3, NULL_REVISION],
+            [4, NULL_REVISION],
+            [5, 1],
+            [2, NULL_REVISION],
+            [7, NULL_REVISION],
+            [8, NULL_REVISION],
+            [9, NULL_REVISION],
+            [10, 1],
+            [3, NULL_REVISION],
+            [12, NULL_REVISION],
+            [13, NULL_REVISION],
+            [14, NULL_REVISION],
+            [4, NULL_REVISION],
+            [16, NULL_REVISION],
+            [17, NULL_REVISION],
+            [18, NULL_REVISION],
+            [19, 11],
+            [20, NULL_REVISION],
+            [21, NULL_REVISION],
+            [22, NULL_REVISION],
+            [23, NULL_REVISION],
+            [2, NULL_REVISION],
+            [3, NULL_REVISION],
+            [26, 24],
+            [27, NULL_REVISION],
+            [28, NULL_REVISION],
+            [12, NULL_REVISION],
+            [1, NULL_REVISION],
+            [1, 9],
+            [32, NULL_REVISION],
+            [33, NULL_REVISION],
+            [34, 31],
+            [35, NULL_REVISION],
+            [36, 26],
+            [37, NULL_REVISION],
+            [38, NULL_REVISION],
+            [39, NULL_REVISION],
+            [40, NULL_REVISION],
+            [41, NULL_REVISION],
+            [42, 26],
+            [0, NULL_REVISION],
+            [44, NULL_REVISION],
+            [45, 4],
+            [40, NULL_REVISION],
+            [47, NULL_REVISION],
+            [36, 0],
+            [49, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [51, NULL_REVISION],
+            [52, NULL_REVISION],
+            [53, NULL_REVISION],
+            [14, NULL_REVISION],
+            [55, NULL_REVISION],
+            [15, NULL_REVISION],
+            [23, NULL_REVISION],
+            [58, NULL_REVISION],
+            [59, NULL_REVISION],
+            [2, NULL_REVISION],
+            [61, 59],
+            [62, NULL_REVISION],
+            [63, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [65, NULL_REVISION],
+            [66, NULL_REVISION],
+            [67, NULL_REVISION],
+            [68, NULL_REVISION],
+            [37, 28],
+            [69, 25],
+            [71, NULL_REVISION],
+            [72, NULL_REVISION],
+            [50, 2],
+            [74, NULL_REVISION],
+            [12, NULL_REVISION],
+            [18, NULL_REVISION],
+            [77, NULL_REVISION],
+            [78, NULL_REVISION],
+            [79, NULL_REVISION],
+            [43, 33],
+            [81, NULL_REVISION],
+            [82, NULL_REVISION],
+            [83, NULL_REVISION],
+            [84, 45],
+            [85, NULL_REVISION],
+            [86, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [88, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+            [76, 83],
+            [44, NULL_REVISION],
+            [92, NULL_REVISION],
+            [93, NULL_REVISION],
+            [9, NULL_REVISION],
+            [95, 67],
+            [96, NULL_REVISION],
+            [97, NULL_REVISION],
+            [NULL_REVISION, NULL_REVISION],
+        ];
+        let problem_rev = 28 as Revision;
+        let problem_base = 70 as Revision;
+        // making the problem obvious: problem_rev is a parent of problem_base
+        assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
+
+        let mut missing_ancestors: MissingAncestors<VecGraph> =
+            MissingAncestors::new(
+                graph,
+                [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
+                    .iter()
+                    .cloned(),
+            );
+        assert!(missing_ancestors.bases.contains(&problem_base));
+
+        let mut revs: HashSet<Revision> =
+            [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
+                .iter()
+                .cloned()
+                .collect();
+        missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
+        assert!(!revs.contains(&problem_rev));
+    }
+
 }
--- a/rust/hg-core/src/lib.rs	Fri Dec 14 18:15:19 2018 +0100
+++ b/rust/hg-core/src/lib.rs	Fri Nov 30 00:46:55 2018 +0100
@@ -3,7 +3,7 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 mod ancestors;
-pub use ancestors::AncestorsIterator;
+pub use ancestors::{AncestorsIterator, MissingAncestors};
 
 /// Mercurial revision numbers
 ///
@@ -15,6 +15,9 @@
 
 /// The simplest expression of what we need of Mercurial DAGs.
 pub trait Graph {
+    /// Return the two parents of the given `Revision`.
+    ///
+    /// Each of the parents can be independently `NULL_REVISION`
     fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
 }
 
--- a/tests/test-ancestor.py	Fri Dec 14 18:15:19 2018 +0100
+++ b/tests/test-ancestor.py	Fri Nov 30 00:46:55 2018 +0100
@@ -182,6 +182,64 @@
          5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7],
          10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]}
 
+def test_missingancestors_explicit():
+    """A few explicit cases, easier to check for catching errors in refactors.
+
+    The bigger graph at the end has been produced by the random generator
+    above, and we have some evidence that the other tests don't cover it.
+    """
+    for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))),
+                                       ({10}, set({11, 12, 13, 14})),
+                                       ({7}, set({1, 2, 3, 4, 5})),
+                                       )):
+        print("%% removeancestorsfrom(), example %d" % (i + 1))
+        missanc = ancestor.incrementalmissingancestors(graph.get, bases)
+        missanc.removeancestorsfrom(revs)
+        print("remaining (sorted): %s" % sorted(list(revs)))
+
+    for i, (bases, revs) in enumerate((({10}, {11}),
+                                       ({11}, {10}),
+                                       ({7}, {9, 11}),
+                                       )):
+        print("%% missingancestors(), example %d" % (i + 1))
+        missanc = ancestor.incrementalmissingancestors(graph.get, bases)
+        print("return %s" % missanc.missingancestors(revs))
+
+    print("% removeancestorsfrom(), bigger graph")
+    vecgraph = [
+        [-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1],
+        [2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1],
+        [13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1],
+        [19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1],
+        [3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9],
+        [32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1],
+        [38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1],
+        [44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1],
+        [-1, -1], [51, -1], [52, -1], [53, -1], [14, -1],
+        [55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1],
+        [61, 59], [62, -1], [63, -1], [-1, -1], [65, -1],
+        [66, -1], [67, -1], [68, -1], [37, 28], [69, 25],
+        [71, -1], [72, -1], [50, 2], [74, -1], [12, -1],
+        [18, -1], [77, -1], [78, -1], [79, -1], [43, 33],
+        [81, -1], [82, -1], [83, -1], [84, 45], [85, -1],
+        [86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1],
+        [92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1],
+        [-1, -1]]
+    problem_rev = 28
+    problem_base = 70
+    # problem_rev is a parent of problem_base, but a faulty implementation
+    # could forget to remove it.
+    bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6}
+    if problem_rev not in vecgraph[problem_base] or problem_base not in bases:
+        print("Conditions have changed")
+    missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases)
+    revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44}
+    missanc.removeancestorsfrom(revs)
+    if 28 in revs:
+        print("Failed!")
+    else:
+        print("Ok")
+
 def genlazyancestors(revs, stoprev=0, inclusive=False):
     print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
            (revs, stoprev, inclusive)))
@@ -276,6 +334,7 @@
             seed = long(time.time() * 1000)
 
     rng = random.Random(seed)
+    test_missingancestors_explicit()
     test_missingancestors(seed, rng)
     test_lazyancestors()
     test_gca()
--- a/tests/test-ancestor.py.out	Fri Dec 14 18:15:19 2018 +0100
+++ b/tests/test-ancestor.py.out	Fri Nov 30 00:46:55 2018 +0100
@@ -1,3 +1,17 @@
+% removeancestorsfrom(), example 1
+remaining (sorted): [5, 6, 8, 9]
+% removeancestorsfrom(), example 2
+remaining (sorted): [11, 12, 13, 14]
+% removeancestorsfrom(), example 3
+remaining (sorted): [3, 5]
+% missingancestors(), example 1
+return [3, 7, 11]
+% missingancestors(), example 2
+return [5, 10]
+% missingancestors(), example 3
+return [3, 6, 9, 11]
+% removeancestorsfrom(), bigger graph
+Ok
 % lazy ancestor set for [], stoprev = 0, inclusive = False
 membership: []
 iteration:  []