Mercurial > hg
changeset 41717:9060af281be7
rust: itering less on MissingAncestors.bases for max()
Instead of iterating on the whole `self.bases` each time to find
its max, we keep the latter in a separate member attribute and
keep it up to date in `add_bases()`
On a perfdiscovery done on PyPy, with repos prepared with
`contrib/discovery-helper.sh 50 100`, this gives a slight
improvement (around 0.5% on wall time, but 10% on CPU)
before:
! wall 0.172801 comb 0.180000 user 0.180000 sys 0.000000 (median of 541)
after:
! wall 0.171798 comb 0.160000 user 0.160000 sys 0.000000 (median of 551)
(perf command run time upped because of bigger variability during this test).
Differential Revision: https://phab.mercurial-scm.org/D5945
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Mon, 04 Feb 2019 19:46:57 +0100 |
parents | 977432970080 |
children | a87ca1d7e61d |
files | rust/hg-core/src/ancestors.rs rust/hg-core/src/dagops.rs rust/hg-core/src/lib.rs |
diffstat | 3 files changed, 56 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/ancestors.rs Tue Feb 05 10:28:32 2019 +0100 +++ b/rust/hg-core/src/ancestors.rs Mon Feb 04 19:46:57 2019 +0100 @@ -38,6 +38,7 @@ pub struct MissingAncestors<G: Graph> { graph: G, bases: HashSet<Revision>, + max_base: Revision, } impl<G: Graph> AncestorsIterator<G> { @@ -209,7 +210,13 @@ impl<G: Graph> MissingAncestors<G> { pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self { - MissingAncestors { graph: graph, bases: bases.into_iter().collect() } + let mut created = MissingAncestors { + graph: graph, + bases: HashSet::new(), + max_base: NULL_REVISION, + }; + created.add_bases(bases); + created } pub fn has_bases(&self) -> bool { @@ -232,17 +239,33 @@ } /// Consumes the object and returns the relative heads of its bases. - pub fn into_bases_heads(mut self) -> Result<HashSet<Revision>, GraphError> { + pub fn into_bases_heads( + mut self, + ) -> Result<HashSet<Revision>, GraphError> { dagops::retain_heads(&self.graph, &mut self.bases)?; Ok(self.bases) } + /// Add some revisions to `self.bases` + /// + /// Takes care of keeping `self.max_base` up to date. pub fn add_bases( &mut self, new_bases: impl IntoIterator<Item = Revision>, ) { - self.bases - .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION)); + let mut max_base = self.max_base; + self.bases.extend( + new_bases + .into_iter() + .filter(|&rev| rev != NULL_REVISION) + .map(|r| { + if r > max_base { + max_base = r; + } + r + }), + ); + self.max_base = max_base; } /// Remove all ancestors of self.bases from the revs set (in place) @@ -261,20 +284,16 @@ } // anything in revs > start is definitely not an ancestor of bases // revs <= start need to be investigated - // TODO optim: if a missingancestors is to be used several times, - // we shouldn't need to iterate each time on bases - let start = match self.bases.iter().cloned().max() { - Some(m) => m, - None => { // self.bases is empty - return Ok(()); - } - }; + if self.max_base == NULL_REVISION { + return Ok(()); + } + // whatever happens, we'll keep at least keepcount of them // knowing this gives us a earlier stop condition than // going all the way to the root - let keepcount = revs.iter().filter(|r| **r > start).count(); + let keepcount = revs.iter().filter(|r| **r > self.max_base).count(); - let mut curr = start; + let mut curr = self.max_base; while curr != NULL_REVISION && revs.len() > keepcount { if self.bases.contains(&curr) { revs.remove(&curr); @@ -285,12 +304,17 @@ Ok(()) } - /// Add rev's parents to self.bases + /// Add the parents of `rev` to `self.bases` + /// + /// This has no effect on `self.max_base` #[inline] fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { - // No need to bother the set with inserting NULL_REVISION over and - // over + if rev == NULL_REVISION { + return Ok(()); + } for p in self.graph.parents(rev)?.iter().cloned() { + // No need to bother the set with inserting NULL_REVISION over and + // over if p != NULL_REVISION { self.bases.insert(p); } @@ -320,12 +344,8 @@ if revs_visit.is_empty() { return Ok(Vec::new()); } - - let max_bases = - bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION); - let max_revs = - revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION); - let start = max(max_bases, max_revs); + let max_revs = revs_visit.iter().cloned().max().unwrap(); + let start = max(self.max_base, max_revs); // TODO heuristics for with_capacity()? let mut missing: Vec<Revision> = Vec::new(); @@ -571,11 +591,13 @@ missing_ancestors.get_bases().iter().cloned().collect(); as_vec.sort(); assert_eq!(as_vec, [1, 3, 5]); + assert_eq!(missing_ancestors.max_base, 5); missing_ancestors.add_bases([3, 7, 8].iter().cloned()); as_vec = missing_ancestors.get_bases().iter().cloned().collect(); as_vec.sort(); assert_eq!(as_vec, [1, 3, 5, 7, 8]); + assert_eq!(missing_ancestors.max_base, 8); as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect(); as_vec.sort();
--- a/rust/hg-core/src/dagops.rs Tue Feb 05 10:28:32 2019 +0100 +++ b/rust/hg-core/src/dagops.rs Mon Feb 04 19:46:57 2019 +0100 @@ -46,7 +46,9 @@ let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect(); heads.remove(&NULL_REVISION); for rev in iter_revs { - remove_parents(graph, *rev, &mut heads)?; + if *rev != NULL_REVISION { + remove_parents(graph, *rev, &mut heads)?; + } } Ok(heads) } @@ -71,7 +73,9 @@ // mutating let as_vec: Vec<Revision> = revs.iter().cloned().collect(); for rev in as_vec { - remove_parents(graph, rev, revs)?; + if rev != NULL_REVISION { + remove_parents(graph, rev, revs)?; + } } Ok(()) }
--- a/rust/hg-core/src/lib.rs Tue Feb 05 10:28:32 2019 +0100 +++ b/rust/hg-core/src/lib.rs Mon Feb 04 19:46:57 2019 +0100 @@ -13,6 +13,11 @@ /// 4 bytes, and are liberally converted to ints, whence the i32 pub type Revision = i32; + +/// Marker expressing the absence of a parent +/// +/// Independently of the actual representation, `NULL_REVISION` is guaranteed +/// to be smaller that all existing revisions. pub const NULL_REVISION: Revision = -1; /// Same as `mercurial.node.wdirrev`