rust-index: implement faster retain heads using a vec instead of a hashset
This is the same optimization that the C index does, we're only catching up
now because this showed up as slow in benchmarking.
--- a/rust/hg-core/src/dagops.rs Thu Dec 14 11:52:05 2023 +0100
+++ b/rust/hg-core/src/dagops.rs Wed Nov 29 10:04:41 2023 -0500
@@ -13,7 +13,7 @@
//! - Similarly *relative roots* of a collection of `Revision`, we mean those
//! whose parents, if any, don't belong to the collection.
use super::{Graph, GraphError, Revision, NULL_REVISION};
-use crate::ancestors::AncestorsIterator;
+use crate::{ancestors::AncestorsIterator, BaseRevision};
use std::collections::{BTreeSet, HashSet};
fn remove_parents<S: std::hash::BuildHasher>(
@@ -81,6 +81,32 @@
Ok(())
}
+/// Optimized version of `retain_heads` that expects an zeroed array of the size
+/// of the graph, to act as a faster but less space-efficient `HashSet`.
+///
+/// # Panics
+///
+/// Can panic if `not_heads` is shorten than the length of graph.
+pub fn retain_heads_fast(
+ graph: &impl Graph,
+ not_heads: &mut [bool],
+ filtered_revs: &HashSet<Revision>,
+) -> Result<(), GraphError> {
+ for idx in (0..not_heads.len()).rev() {
+ let rev = Revision(idx as BaseRevision);
+ if !not_heads[idx] && filtered_revs.contains(&rev) {
+ not_heads[idx] = true;
+ continue;
+ }
+ for parent in graph.parents(rev)?.iter() {
+ if *parent != NULL_REVISION {
+ not_heads[parent.0 as usize] = true;
+ }
+ }
+ }
+ Ok(())
+}
+
/// Roots of `revs`, passed as a `HashSet`
///
/// They are returned in arbitrary order
--- a/rust/hg-core/src/revlog/index.rs Thu Dec 14 11:52:05 2023 +0100
+++ b/rust/hg-core/src/revlog/index.rs Wed Nov 29 10:04:41 2023 -0500
@@ -1,4 +1,3 @@
-use std::collections::hash_map::RandomState;
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::ops::Deref;
@@ -565,32 +564,24 @@
return Ok(self_head_revs.to_owned());
}
}
- let mut revs: HashSet<Revision, RandomState> =
- if filtered_revs.is_empty() {
- (0..self.len())
- .into_iter()
- .map(|i| Revision(i as BaseRevision))
- .collect()
- } else {
- (0..self.len())
- .into_iter()
- .filter_map(|i| {
- let r = Revision(i as BaseRevision);
- if filtered_revs.contains(&r) {
- None
- } else {
- Some(r)
- }
- })
- .collect()
- };
- dagops::retain_heads(self, &mut revs)?;
- if self.is_empty() {
- revs.insert(NULL_REVISION);
- }
- let mut as_vec: Vec<Revision> =
- revs.into_iter().map(Into::into).collect();
- as_vec.sort_unstable();
+
+ let as_vec = if self.is_empty() {
+ vec![NULL_REVISION]
+ } else {
+ let mut not_heads = vec![false; self.len()];
+ dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?;
+ not_heads
+ .into_iter()
+ .enumerate()
+ .filter_map(|(idx, is_not_head)| {
+ if is_not_head {
+ None
+ } else {
+ Some(Revision(idx as BaseRevision))
+ }
+ })
+ .collect()
+ };
*self
.head_revs
.write()