rust: itering less on MissingAncestors.bases for max()
Instead of iterating on the whole `self.bases` each time to find
its max, we keep the latter in a separate member attribute and
keep it up to date in `add_bases()`
On a perfdiscovery done on PyPy, with repos prepared with
`contrib/discovery-helper.sh 50 100`, this gives a slight
improvement (around 0.5% on wall time, but 10% on CPU)
before:
! wall 0.172801 comb 0.180000 user 0.180000 sys 0.000000 (median of 541)
after:
! wall 0.171798 comb 0.160000 user 0.160000 sys 0.000000 (median of 551)
(perf command run time upped because of bigger variability during this test).
Differential Revision: https://phab.mercurial-scm.org/D5945
--- a/rust/hg-core/src/ancestors.rs Tue Feb 05 10:28:32 2019 +0100
+++ b/rust/hg-core/src/ancestors.rs Mon Feb 04 19:46:57 2019 +0100
@@ -38,6 +38,7 @@
pub struct MissingAncestors<G: Graph> {
graph: G,
bases: HashSet<Revision>,
+ max_base: Revision,
}
impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
impl<G: Graph> MissingAncestors<G> {
pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
- MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+ let mut created = MissingAncestors {
+ graph: graph,
+ bases: HashSet::new(),
+ max_base: NULL_REVISION,
+ };
+ created.add_bases(bases);
+ created
}
pub fn has_bases(&self) -> bool {
@@ -232,17 +239,33 @@
}
/// Consumes the object and returns the relative heads of its bases.
- pub fn into_bases_heads(mut self) -> Result<HashSet<Revision>, GraphError> {
+ pub fn into_bases_heads(
+ mut self,
+ ) -> Result<HashSet<Revision>, GraphError> {
dagops::retain_heads(&self.graph, &mut self.bases)?;
Ok(self.bases)
}
+ /// Add some revisions to `self.bases`
+ ///
+ /// Takes care of keeping `self.max_base` up to date.
pub fn add_bases(
&mut self,
new_bases: impl IntoIterator<Item = Revision>,
) {
- self.bases
- .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION));
+ let mut max_base = self.max_base;
+ self.bases.extend(
+ new_bases
+ .into_iter()
+ .filter(|&rev| rev != NULL_REVISION)
+ .map(|r| {
+ if r > max_base {
+ max_base = r;
+ }
+ r
+ }),
+ );
+ self.max_base = max_base;
}
/// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,20 +284,16 @@
}
// anything in revs > start is definitely not an ancestor of bases
// revs <= start need to be investigated
- // TODO optim: if a missingancestors is to be used several times,
- // we shouldn't need to iterate each time on bases
- let start = match self.bases.iter().cloned().max() {
- Some(m) => m,
- None => { // self.bases is empty
- return Ok(());
- }
- };
+ if self.max_base == NULL_REVISION {
+ return Ok(());
+ }
+
// whatever happens, we'll keep at least keepcount of them
// knowing this gives us a earlier stop condition than
// going all the way to the root
- let keepcount = revs.iter().filter(|r| **r > start).count();
+ let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
- let mut curr = start;
+ let mut curr = self.max_base;
while curr != NULL_REVISION && revs.len() > keepcount {
if self.bases.contains(&curr) {
revs.remove(&curr);
@@ -285,12 +304,17 @@
Ok(())
}
- /// Add rev's parents to self.bases
+ /// Add the parents of `rev` to `self.bases`
+ ///
+ /// This has no effect on `self.max_base`
#[inline]
fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
- // No need to bother the set with inserting NULL_REVISION over and
- // over
+ if rev == NULL_REVISION {
+ return Ok(());
+ }
for p in self.graph.parents(rev)?.iter().cloned() {
+ // No need to bother the set with inserting NULL_REVISION over and
+ // over
if p != NULL_REVISION {
self.bases.insert(p);
}
@@ -320,12 +344,8 @@
if revs_visit.is_empty() {
return Ok(Vec::new());
}
-
- let max_bases =
- bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
- let max_revs =
- revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
- let start = max(max_bases, max_revs);
+ let max_revs = revs_visit.iter().cloned().max().unwrap();
+ let start = max(self.max_base, max_revs);
// TODO heuristics for with_capacity()?
let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +591,13 @@
missing_ancestors.get_bases().iter().cloned().collect();
as_vec.sort();
assert_eq!(as_vec, [1, 3, 5]);
+ assert_eq!(missing_ancestors.max_base, 5);
missing_ancestors.add_bases([3, 7, 8].iter().cloned());
as_vec = missing_ancestors.get_bases().iter().cloned().collect();
as_vec.sort();
assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+ assert_eq!(missing_ancestors.max_base, 8);
as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
as_vec.sort();
--- a/rust/hg-core/src/dagops.rs Tue Feb 05 10:28:32 2019 +0100
+++ b/rust/hg-core/src/dagops.rs Mon Feb 04 19:46:57 2019 +0100
@@ -46,7 +46,9 @@
let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
heads.remove(&NULL_REVISION);
for rev in iter_revs {
- remove_parents(graph, *rev, &mut heads)?;
+ if *rev != NULL_REVISION {
+ remove_parents(graph, *rev, &mut heads)?;
+ }
}
Ok(heads)
}
@@ -71,7 +73,9 @@
// mutating
let as_vec: Vec<Revision> = revs.iter().cloned().collect();
for rev in as_vec {
- remove_parents(graph, rev, revs)?;
+ if rev != NULL_REVISION {
+ remove_parents(graph, rev, revs)?;
+ }
}
Ok(())
}
--- a/rust/hg-core/src/lib.rs Tue Feb 05 10:28:32 2019 +0100
+++ b/rust/hg-core/src/lib.rs Mon Feb 04 19:46:57 2019 +0100
@@ -13,6 +13,11 @@
/// 4 bytes, and are liberally converted to ints, whence the i32
pub type Revision = i32;
+
+/// Marker expressing the absence of a parent
+///
+/// Independently of the actual representation, `NULL_REVISION` is guaranteed
+/// to be smaller that all existing revisions.
pub const NULL_REVISION: Revision = -1;
/// Same as `mercurial.node.wdirrev`