# HG changeset patch # User Raphaël Gomès # Date 1701270281 18000 # Node ID ed6683d4cb294fcf69cb9d005be47537a1e3775c # Parent 8b89f7cc953a6ae56a0af3a470e1aa016bcb20a1 rust-index: implement faster retain heads using a vec instead of a hashset This is the same optimization that the C index does, we're only catching up now because this showed up as slow in benchmarking. diff -r 8b89f7cc953a -r ed6683d4cb29 rust/hg-core/src/dagops.rs --- a/rust/hg-core/src/dagops.rs Thu Dec 14 11:52:05 2023 +0100 +++ b/rust/hg-core/src/dagops.rs Wed Nov 29 10:04:41 2023 -0500 @@ -13,7 +13,7 @@ //! - Similarly *relative roots* of a collection of `Revision`, we mean those //! whose parents, if any, don't belong to the collection. use super::{Graph, GraphError, Revision, NULL_REVISION}; -use crate::ancestors::AncestorsIterator; +use crate::{ancestors::AncestorsIterator, BaseRevision}; use std::collections::{BTreeSet, HashSet}; fn remove_parents( @@ -81,6 +81,32 @@ Ok(()) } +/// Optimized version of `retain_heads` that expects an zeroed array of the size +/// of the graph, to act as a faster but less space-efficient `HashSet`. +/// +/// # Panics +/// +/// Can panic if `not_heads` is shorten than the length of graph. +pub fn retain_heads_fast( + graph: &impl Graph, + not_heads: &mut [bool], + filtered_revs: &HashSet, +) -> Result<(), GraphError> { + for idx in (0..not_heads.len()).rev() { + let rev = Revision(idx as BaseRevision); + if !not_heads[idx] && filtered_revs.contains(&rev) { + not_heads[idx] = true; + continue; + } + for parent in graph.parents(rev)?.iter() { + if *parent != NULL_REVISION { + not_heads[parent.0 as usize] = true; + } + } + } + Ok(()) +} + /// Roots of `revs`, passed as a `HashSet` /// /// They are returned in arbitrary order diff -r 8b89f7cc953a -r ed6683d4cb29 rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs Thu Dec 14 11:52:05 2023 +0100 +++ b/rust/hg-core/src/revlog/index.rs Wed Nov 29 10:04:41 2023 -0500 @@ -1,4 +1,3 @@ -use std::collections::hash_map::RandomState; use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::ops::Deref; @@ -565,32 +564,24 @@ return Ok(self_head_revs.to_owned()); } } - let mut revs: HashSet = - if filtered_revs.is_empty() { - (0..self.len()) - .into_iter() - .map(|i| Revision(i as BaseRevision)) - .collect() - } else { - (0..self.len()) - .into_iter() - .filter_map(|i| { - let r = Revision(i as BaseRevision); - if filtered_revs.contains(&r) { - None - } else { - Some(r) - } - }) - .collect() - }; - dagops::retain_heads(self, &mut revs)?; - if self.is_empty() { - revs.insert(NULL_REVISION); - } - let mut as_vec: Vec = - revs.into_iter().map(Into::into).collect(); - as_vec.sort_unstable(); + + let as_vec = if self.is_empty() { + vec![NULL_REVISION] + } else { + let mut not_heads = vec![false; self.len()]; + dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?; + not_heads + .into_iter() + .enumerate() + .filter_map(|(idx, is_not_head)| { + if is_not_head { + None + } else { + Some(Revision(idx as BaseRevision)) + } + }) + .collect() + }; *self .head_revs .write()