Mercurial > hg
changeset 51259:ed6683d4cb29
rust-index: implement faster retain heads using a vec instead of a hashset
This is the same optimization that the C index does, we're only catching up
now because this showed up as slow in benchmarking.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 29 Nov 2023 10:04:41 -0500 |
parents | 8b89f7cc953a |
children | c4f1a790bda8 |
files | rust/hg-core/src/dagops.rs rust/hg-core/src/revlog/index.rs |
diffstat | 2 files changed, 45 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dagops.rs Thu Dec 14 11:52:05 2023 +0100 +++ b/rust/hg-core/src/dagops.rs Wed Nov 29 10:04:41 2023 -0500 @@ -13,7 +13,7 @@ //! - Similarly *relative roots* of a collection of `Revision`, we mean those //! whose parents, if any, don't belong to the collection. use super::{Graph, GraphError, Revision, NULL_REVISION}; -use crate::ancestors::AncestorsIterator; +use crate::{ancestors::AncestorsIterator, BaseRevision}; use std::collections::{BTreeSet, HashSet}; fn remove_parents<S: std::hash::BuildHasher>( @@ -81,6 +81,32 @@ Ok(()) } +/// Optimized version of `retain_heads` that expects an zeroed array of the size +/// of the graph, to act as a faster but less space-efficient `HashSet`. +/// +/// # Panics +/// +/// Can panic if `not_heads` is shorten than the length of graph. +pub fn retain_heads_fast( + graph: &impl Graph, + not_heads: &mut [bool], + filtered_revs: &HashSet<Revision>, +) -> Result<(), GraphError> { + for idx in (0..not_heads.len()).rev() { + let rev = Revision(idx as BaseRevision); + if !not_heads[idx] && filtered_revs.contains(&rev) { + not_heads[idx] = true; + continue; + } + for parent in graph.parents(rev)?.iter() { + if *parent != NULL_REVISION { + not_heads[parent.0 as usize] = true; + } + } + } + Ok(()) +} + /// Roots of `revs`, passed as a `HashSet` /// /// They are returned in arbitrary order
--- a/rust/hg-core/src/revlog/index.rs Thu Dec 14 11:52:05 2023 +0100 +++ b/rust/hg-core/src/revlog/index.rs Wed Nov 29 10:04:41 2023 -0500 @@ -1,4 +1,3 @@ -use std::collections::hash_map::RandomState; use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::ops::Deref; @@ -565,32 +564,24 @@ return Ok(self_head_revs.to_owned()); } } - let mut revs: HashSet<Revision, RandomState> = - if filtered_revs.is_empty() { - (0..self.len()) - .into_iter() - .map(|i| Revision(i as BaseRevision)) - .collect() - } else { - (0..self.len()) - .into_iter() - .filter_map(|i| { - let r = Revision(i as BaseRevision); - if filtered_revs.contains(&r) { - None - } else { - Some(r) - } - }) - .collect() - }; - dagops::retain_heads(self, &mut revs)?; - if self.is_empty() { - revs.insert(NULL_REVISION); - } - let mut as_vec: Vec<Revision> = - revs.into_iter().map(Into::into).collect(); - as_vec.sort_unstable(); + + let as_vec = if self.is_empty() { + vec![NULL_REVISION] + } else { + let mut not_heads = vec![false; self.len()]; + dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?; + not_heads + .into_iter() + .enumerate() + .filter_map(|(idx, is_not_head)| { + if is_not_head { + None + } else { + Some(Revision(idx as BaseRevision)) + } + }) + .collect() + }; *self .head_revs .write()