Mercurial > hg-stable
changeset 51281:c4f1a790bda8
rust-index: use a `BitVec` instead of plain `Vec` for heads computation
The `Vec` method uses one byte per revision, this uses 1 per 8 revisions,
which improves our memory footprint. For large graphs (10+ millions), this
can make a measurable difference server-side.
I have seen no measurable impact on execution speed.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 29 Nov 2023 15:58:24 -0500 |
parents | ed6683d4cb29 |
children | 9088c6d65ef6 |
files | rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/dagops.rs rust/hg-core/src/revlog/index.rs |
diffstat | 4 files changed, 55 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/Cargo.lock Wed Nov 29 10:04:41 2023 -0500 +++ b/rust/Cargo.lock Wed Nov 29 15:58:24 2023 -0500 @@ -70,6 +70,18 @@ ] [[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] name = "block-buffer" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -443,6 +455,12 @@ ] [[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] name = "generic-array" version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -516,6 +534,7 @@ version = "0.1.0" dependencies = [ "bitflags", + "bitvec", "byteorder", "bytes-cast", "clap", @@ -915,6 +934,12 @@ ] [[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] name = "rand" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1226,6 +1251,12 @@ ] [[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] name = "tempfile" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1489,6 +1520,15 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml Wed Nov 29 10:04:41 2023 -0500 +++ b/rust/hg-core/Cargo.toml Wed Nov 29 15:58:24 2023 -0500 @@ -39,6 +39,7 @@ zstd = "0.12" format-bytes = "0.3.0" once_cell = "1.16.0" +bitvec = "1.0.1" # We don't use the `miniz-oxide` backend to not change rhg benchmarks and until # we have a clearer view of which backend is the fastest.
--- a/rust/hg-core/src/dagops.rs Wed Nov 29 10:04:41 2023 -0500 +++ b/rust/hg-core/src/dagops.rs Wed Nov 29 15:58:24 2023 -0500 @@ -12,6 +12,8 @@ //! mean those revisions that have no children among the collection. //! - Similarly *relative roots* of a collection of `Revision`, we mean those //! whose parents, if any, don't belong to the collection. +use bitvec::slice::BitSlice; + use super::{Graph, GraphError, Revision, NULL_REVISION}; use crate::{ancestors::AncestorsIterator, BaseRevision}; use std::collections::{BTreeSet, HashSet}; @@ -81,26 +83,26 @@ Ok(()) } -/// Optimized version of `retain_heads` that expects an zeroed array of the size -/// of the graph, to act as a faster but less space-efficient `HashSet`. +/// Optimized version of `retain_heads` that expects an zeroed bitvec of the +/// size of the graph, to act as a faster but less space-efficient `HashSet`. /// /// # Panics /// /// Can panic if `not_heads` is shorten than the length of graph. pub fn retain_heads_fast( graph: &impl Graph, - not_heads: &mut [bool], + not_heads: &mut BitSlice, filtered_revs: &HashSet<Revision>, ) -> Result<(), GraphError> { for idx in (0..not_heads.len()).rev() { let rev = Revision(idx as BaseRevision); if !not_heads[idx] && filtered_revs.contains(&rev) { - not_heads[idx] = true; + not_heads.get_mut(idx).unwrap().commit(true); continue; } for parent in graph.parents(rev)?.iter() { if *parent != NULL_REVISION { - not_heads[parent.0 as usize] = true; + not_heads.get_mut(parent.0 as usize).unwrap().commit(true); } } }
--- a/rust/hg-core/src/revlog/index.rs Wed Nov 29 10:04:41 2023 -0500 +++ b/rust/hg-core/src/revlog/index.rs Wed Nov 29 15:58:24 2023 -0500 @@ -3,6 +3,7 @@ use std::ops::Deref; use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use bitvec::prelude::*; use byteorder::{BigEndian, ByteOrder}; use bytes_cast::{unaligned, BytesCast}; @@ -568,8 +569,12 @@ let as_vec = if self.is_empty() { vec![NULL_REVISION] } else { - let mut not_heads = vec![false; self.len()]; - dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?; + let mut not_heads = bitvec![0; self.len()]; + dagops::retain_heads_fast( + self, + not_heads.as_mut_bitslice(), + filtered_revs, + )?; not_heads .into_iter() .enumerate()