rust/hg-core/src/ancestors.rs
changeset 40959 d097dd0afc19
parent 40933 18513d6ef7d4
child 41054 ef54bd33b476
equal deleted inserted replaced
40958:abd7b75e80bc 40959:d097dd0afc19
     6 // GNU General Public License version 2 or any later version.
     6 // GNU General Public License version 2 or any later version.
     7 
     7 
     8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
     8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
     9 
     9 
    10 use super::{Graph, GraphError, Revision, NULL_REVISION};
    10 use super::{Graph, GraphError, Revision, NULL_REVISION};
       
    11 use std::cmp::max;
    11 use std::collections::{BinaryHeap, HashSet};
    12 use std::collections::{BinaryHeap, HashSet};
    12 
    13 
    13 /// Iterator over the ancestors of a given list of revisions
    14 /// Iterator over the ancestors of a given list of revisions
    14 /// This is a generic type, defined and implemented for any Graph, so that
    15 /// This is a generic type, defined and implemented for any Graph, so that
    15 /// it's easy to
    16 /// it's easy to
    20 pub struct AncestorsIterator<G: Graph> {
    21 pub struct AncestorsIterator<G: Graph> {
    21     graph: G,
    22     graph: G,
    22     visit: BinaryHeap<Revision>,
    23     visit: BinaryHeap<Revision>,
    23     seen: HashSet<Revision>,
    24     seen: HashSet<Revision>,
    24     stoprev: Revision,
    25     stoprev: Revision,
       
    26 }
       
    27 
       
    28 pub struct MissingAncestors<G: Graph> {
       
    29     graph: G,
       
    30     bases: HashSet<Revision>,
    25 }
    31 }
    26 
    32 
    27 impl<G: Graph> AncestorsIterator<G> {
    33 impl<G: Graph> AncestorsIterator<G> {
    28     /// Constructor.
    34     /// Constructor.
    29     ///
    35     ///
   129         self.conditionally_push_rev(p2);
   135         self.conditionally_push_rev(p2);
   130         Some(Ok(current))
   136         Some(Ok(current))
   131     }
   137     }
   132 }
   138 }
   133 
   139 
       
   140 impl<G: Graph> MissingAncestors<G> {
       
   141     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
       
   142         let mut bases: HashSet<Revision> = bases.into_iter().collect();
       
   143         if bases.is_empty() {
       
   144             bases.insert(NULL_REVISION);
       
   145         }
       
   146         MissingAncestors { graph, bases }
       
   147     }
       
   148 
       
   149     pub fn has_bases(&self) -> bool {
       
   150         self.bases.iter().any(|&b| b != NULL_REVISION)
       
   151     }
       
   152 
       
   153     /// Return a reference to current bases.
       
   154     ///
       
   155     /// This is useful in unit tests, but also setdiscovery.py does
       
   156     /// read the bases attribute of a ancestor.missingancestors instance.
       
   157     pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
       
   158         &self.bases
       
   159     }
       
   160 
       
   161     pub fn add_bases(
       
   162         &mut self,
       
   163         new_bases: impl IntoIterator<Item = Revision>,
       
   164     ) {
       
   165         self.bases.extend(new_bases);
       
   166     }
       
   167 
       
   168     /// Remove all ancestors of self.bases from the revs set (in place)
       
   169     pub fn remove_ancestors_from(
       
   170         &mut self,
       
   171         revs: &mut HashSet<Revision>,
       
   172     ) -> Result<(), GraphError> {
       
   173         revs.retain(|r| !self.bases.contains(r));
       
   174         // the null revision is always an ancestor
       
   175         revs.remove(&NULL_REVISION);
       
   176         if revs.is_empty() {
       
   177             return Ok(());
       
   178         }
       
   179         // anything in revs > start is definitely not an ancestor of bases
       
   180         // revs <= start need to be investigated
       
   181         // TODO optim: if a missingancestors is to be used several times,
       
   182         // we shouldn't need to iterate each time on bases
       
   183         let start = match self.bases.iter().cloned().max() {
       
   184             Some(m) => m,
       
   185             None => {
       
   186                 // bases is empty (shouldn't happen, but let's be safe)
       
   187                 return Ok(());
       
   188             }
       
   189         };
       
   190         // whatever happens, we'll keep at least keepcount of them
       
   191         // knowing this gives us a earlier stop condition than
       
   192         // going all the way to the root
       
   193         let keepcount = revs.iter().filter(|r| **r > start).count();
       
   194 
       
   195         let mut curr = start;
       
   196         while curr != NULL_REVISION && revs.len() > keepcount {
       
   197             if self.bases.contains(&curr) {
       
   198                 revs.remove(&curr);
       
   199                 self.add_parents(curr)?;
       
   200             }
       
   201             curr -= 1;
       
   202         }
       
   203         Ok(())
       
   204     }
       
   205 
       
   206     /// Add rev's parents to self.bases
       
   207     #[inline]
       
   208     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
       
   209         // No need to bother the set with inserting NULL_REVISION over and
       
   210         // over
       
   211         for p in self.graph.parents(rev)?.iter().cloned() {
       
   212             if p != NULL_REVISION {
       
   213                 self.bases.insert(p);
       
   214             }
       
   215         }
       
   216         Ok(())
       
   217     }
       
   218 
       
   219     /// Return all the ancestors of revs that are not ancestors of self.bases
       
   220     ///
       
   221     /// This may include elements from revs.
       
   222     ///
       
   223     /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
       
   224     /// revision number order, which is a topological order.
       
   225     pub fn missing_ancestors(
       
   226         &mut self,
       
   227         revs: impl IntoIterator<Item = Revision>,
       
   228     ) -> Result<Vec<Revision>, GraphError> {
       
   229         // just for convenience and comparison with Python version
       
   230         let bases_visit = &mut self.bases;
       
   231         let mut revs: HashSet<Revision> = revs
       
   232             .into_iter()
       
   233             .filter(|r| !bases_visit.contains(r))
       
   234             .collect();
       
   235         let revs_visit = &mut revs;
       
   236         let mut both_visit: HashSet<Revision> =
       
   237             revs_visit.intersection(&bases_visit).cloned().collect();
       
   238         if revs_visit.is_empty() {
       
   239             return Ok(Vec::new());
       
   240         }
       
   241 
       
   242         let max_bases =
       
   243             bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
       
   244         let max_revs =
       
   245             revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
       
   246         let start = max(max_bases, max_revs);
       
   247 
       
   248         // TODO heuristics for with_capacity()?
       
   249         let mut missing: Vec<Revision> = Vec::new();
       
   250         for curr in (0..=start).map(|i| start - i) {
       
   251             if revs_visit.is_empty() {
       
   252                 break;
       
   253             }
       
   254             if both_visit.contains(&curr) {
       
   255                 // curr's parents might have made it into revs_visit through
       
   256                 // another path
       
   257                 // TODO optim: Rust's HashSet.remove returns a boolean telling
       
   258                 // if it happened. This will spare us one set lookup
       
   259                 both_visit.remove(&curr);
       
   260                 for p in self.graph.parents(curr)?.iter().cloned() {
       
   261                     if p == NULL_REVISION {
       
   262                         continue;
       
   263                     }
       
   264                     revs_visit.remove(&p);
       
   265                     bases_visit.insert(p);
       
   266                     both_visit.insert(p);
       
   267                 }
       
   268                 continue;
       
   269             }
       
   270             // in Rust, one can't just use mutable variables assignation
       
   271             // to be more straightforward. Instead of Python's
       
   272             // thisvisit and othervisit, we'll differentiate with a boolean
       
   273             let this_visit_is_revs = {
       
   274                 if revs_visit.remove(&curr) {
       
   275                     missing.push(curr);
       
   276                     true
       
   277                 } else if bases_visit.contains(&curr) {
       
   278                     false
       
   279                 } else {
       
   280                     // not an ancestor of revs or bases: ignore
       
   281                     continue;
       
   282                 }
       
   283             };
       
   284 
       
   285             for p in self.graph.parents(curr)?.iter().cloned() {
       
   286                 if p == NULL_REVISION {
       
   287                     continue;
       
   288                 }
       
   289                 let in_other_visit = if this_visit_is_revs {
       
   290                     bases_visit.contains(&p)
       
   291                 } else {
       
   292                     revs_visit.contains(&p)
       
   293                 };
       
   294                 if in_other_visit || both_visit.contains(&p) {
       
   295                     // p is implicitely in this_visit.
       
   296                     // This means p is or should be in bothvisit
       
   297                     // TODO optim: hence if bothvisit, we look up twice
       
   298                     revs_visit.remove(&p);
       
   299                     bases_visit.insert(p);
       
   300                     both_visit.insert(p);
       
   301                 } else {
       
   302                     // visit later
       
   303                     if this_visit_is_revs {
       
   304                         revs_visit.insert(p);
       
   305                     } else {
       
   306                         bases_visit.insert(p);
       
   307                     }
       
   308                 }
       
   309             }
       
   310         }
       
   311         missing.reverse();
       
   312         Ok(missing)
       
   313     }
       
   314 }
       
   315 
   134 #[cfg(test)]
   316 #[cfg(test)]
   135 mod tests {
   317 mod tests {
   136 
   318 
   137     use super::*;
   319     use super::*;
       
   320     use std::iter::FromIterator;
   138 
   321 
   139     #[derive(Clone, Debug)]
   322     #[derive(Clone, Debug)]
   140     struct Stub;
   323     struct Stub;
   141 
   324 
   142     /// This is the same as the dict from test-ancestors.py
   325     /// This is the same as the dict from test-ancestors.py
   250         // inclusive=false looks up initrev's parents right away
   433         // inclusive=false looks up initrev's parents right away
   251         let mut iter =
   434         let mut iter =
   252             AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
   435             AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
   253         assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
   436         assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
   254     }
   437     }
       
   438 
       
   439     #[test]
       
   440     /// Test constructor, add/get bases
       
   441     fn test_missing_bases() {
       
   442         let mut missing_ancestors =
       
   443             MissingAncestors::new(Stub, [5, 3, 1, 3].iter().cloned());
       
   444         let mut as_vec: Vec<Revision> =
       
   445             missing_ancestors.get_bases().iter().cloned().collect();
       
   446         as_vec.sort();
       
   447         assert_eq!(as_vec, [1, 3, 5]);
       
   448 
       
   449         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
       
   450         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
       
   451         as_vec.sort();
       
   452         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
       
   453     }
       
   454 
       
   455     fn assert_missing_remove(
       
   456         bases: &[Revision],
       
   457         revs: &[Revision],
       
   458         expected: &[Revision],
       
   459     ) {
       
   460         let mut missing_ancestors =
       
   461             MissingAncestors::new(Stub, bases.iter().cloned());
       
   462         let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
       
   463         missing_ancestors
       
   464             .remove_ancestors_from(&mut revset)
       
   465             .unwrap();
       
   466         let mut as_vec: Vec<Revision> = revset.into_iter().collect();
       
   467         as_vec.sort();
       
   468         assert_eq!(as_vec.as_slice(), expected);
       
   469     }
       
   470 
       
   471     #[test]
       
   472     fn test_missing_remove() {
       
   473         assert_missing_remove(
       
   474             &[1, 2, 3, 4, 7],
       
   475             Vec::from_iter(1..10).as_slice(),
       
   476             &[5, 6, 8, 9],
       
   477         );
       
   478         assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
       
   479         assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
       
   480     }
       
   481 
       
   482     fn assert_missing_ancestors(
       
   483         bases: &[Revision],
       
   484         revs: &[Revision],
       
   485         expected: &[Revision],
       
   486     ) {
       
   487         let mut missing_ancestors =
       
   488             MissingAncestors::new(Stub, bases.iter().cloned());
       
   489         let missing = missing_ancestors
       
   490             .missing_ancestors(revs.iter().cloned())
       
   491             .unwrap();
       
   492         assert_eq!(missing.as_slice(), expected);
       
   493     }
       
   494 
       
   495     #[test]
       
   496     fn test_missing_ancestors() {
       
   497         // examples taken from test-ancestors.py by having it run
       
   498         // on the same graph (both naive and fast Python algs)
       
   499         assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
       
   500         assert_missing_ancestors(&[11], &[10], &[5, 10]);
       
   501         assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
       
   502     }
       
   503 
       
   504     // A Graph represented by a vector whose indices are revisions
       
   505     // and values are parents of the revisions
       
   506     type VecGraph = Vec<[Revision; 2]>;
       
   507 
       
   508     impl Graph for VecGraph {
       
   509         fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
       
   510             Ok(self[rev as usize])
       
   511         }
       
   512     }
       
   513 
       
   514     /// An interesting case found by a random generator similar to
       
   515     /// the one in test-ancestor.py. An early version of Rust MissingAncestors
       
   516     /// failed this, yet none of the integration tests of the whole suite
       
   517     /// catched it.
       
   518     #[test]
       
   519     fn test_remove_ancestors_from_case1() {
       
   520         let graph: VecGraph = vec![
       
   521             [NULL_REVISION, NULL_REVISION],
       
   522             [0, NULL_REVISION],
       
   523             [1, 0],
       
   524             [2, 1],
       
   525             [3, NULL_REVISION],
       
   526             [4, NULL_REVISION],
       
   527             [5, 1],
       
   528             [2, NULL_REVISION],
       
   529             [7, NULL_REVISION],
       
   530             [8, NULL_REVISION],
       
   531             [9, NULL_REVISION],
       
   532             [10, 1],
       
   533             [3, NULL_REVISION],
       
   534             [12, NULL_REVISION],
       
   535             [13, NULL_REVISION],
       
   536             [14, NULL_REVISION],
       
   537             [4, NULL_REVISION],
       
   538             [16, NULL_REVISION],
       
   539             [17, NULL_REVISION],
       
   540             [18, NULL_REVISION],
       
   541             [19, 11],
       
   542             [20, NULL_REVISION],
       
   543             [21, NULL_REVISION],
       
   544             [22, NULL_REVISION],
       
   545             [23, NULL_REVISION],
       
   546             [2, NULL_REVISION],
       
   547             [3, NULL_REVISION],
       
   548             [26, 24],
       
   549             [27, NULL_REVISION],
       
   550             [28, NULL_REVISION],
       
   551             [12, NULL_REVISION],
       
   552             [1, NULL_REVISION],
       
   553             [1, 9],
       
   554             [32, NULL_REVISION],
       
   555             [33, NULL_REVISION],
       
   556             [34, 31],
       
   557             [35, NULL_REVISION],
       
   558             [36, 26],
       
   559             [37, NULL_REVISION],
       
   560             [38, NULL_REVISION],
       
   561             [39, NULL_REVISION],
       
   562             [40, NULL_REVISION],
       
   563             [41, NULL_REVISION],
       
   564             [42, 26],
       
   565             [0, NULL_REVISION],
       
   566             [44, NULL_REVISION],
       
   567             [45, 4],
       
   568             [40, NULL_REVISION],
       
   569             [47, NULL_REVISION],
       
   570             [36, 0],
       
   571             [49, NULL_REVISION],
       
   572             [NULL_REVISION, NULL_REVISION],
       
   573             [51, NULL_REVISION],
       
   574             [52, NULL_REVISION],
       
   575             [53, NULL_REVISION],
       
   576             [14, NULL_REVISION],
       
   577             [55, NULL_REVISION],
       
   578             [15, NULL_REVISION],
       
   579             [23, NULL_REVISION],
       
   580             [58, NULL_REVISION],
       
   581             [59, NULL_REVISION],
       
   582             [2, NULL_REVISION],
       
   583             [61, 59],
       
   584             [62, NULL_REVISION],
       
   585             [63, NULL_REVISION],
       
   586             [NULL_REVISION, NULL_REVISION],
       
   587             [65, NULL_REVISION],
       
   588             [66, NULL_REVISION],
       
   589             [67, NULL_REVISION],
       
   590             [68, NULL_REVISION],
       
   591             [37, 28],
       
   592             [69, 25],
       
   593             [71, NULL_REVISION],
       
   594             [72, NULL_REVISION],
       
   595             [50, 2],
       
   596             [74, NULL_REVISION],
       
   597             [12, NULL_REVISION],
       
   598             [18, NULL_REVISION],
       
   599             [77, NULL_REVISION],
       
   600             [78, NULL_REVISION],
       
   601             [79, NULL_REVISION],
       
   602             [43, 33],
       
   603             [81, NULL_REVISION],
       
   604             [82, NULL_REVISION],
       
   605             [83, NULL_REVISION],
       
   606             [84, 45],
       
   607             [85, NULL_REVISION],
       
   608             [86, NULL_REVISION],
       
   609             [NULL_REVISION, NULL_REVISION],
       
   610             [88, NULL_REVISION],
       
   611             [NULL_REVISION, NULL_REVISION],
       
   612             [76, 83],
       
   613             [44, NULL_REVISION],
       
   614             [92, NULL_REVISION],
       
   615             [93, NULL_REVISION],
       
   616             [9, NULL_REVISION],
       
   617             [95, 67],
       
   618             [96, NULL_REVISION],
       
   619             [97, NULL_REVISION],
       
   620             [NULL_REVISION, NULL_REVISION],
       
   621         ];
       
   622         let problem_rev = 28 as Revision;
       
   623         let problem_base = 70 as Revision;
       
   624         // making the problem obvious: problem_rev is a parent of problem_base
       
   625         assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
       
   626 
       
   627         let mut missing_ancestors: MissingAncestors<VecGraph> =
       
   628             MissingAncestors::new(
       
   629                 graph,
       
   630                 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
       
   631                     .iter()
       
   632                     .cloned(),
       
   633             );
       
   634         assert!(missing_ancestors.bases.contains(&problem_base));
       
   635 
       
   636         let mut revs: HashSet<Revision> =
       
   637             [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
       
   638                 .iter()
       
   639                 .cloned()
       
   640                 .collect();
       
   641         missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
       
   642         assert!(!revs.contains(&problem_rev));
       
   643     }
       
   644 
   255 }
   645 }