rust-index: use a `BitVec` instead of plain `Vec` for heads computation
The `Vec` method uses one byte per revision, this uses 1 per 8 revisions,
which improves our memory footprint. For large graphs (10+ millions), this
can make a measurable difference server-side.
I have seen no measurable impact on execution speed.
--- a/rust/Cargo.lock Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/Cargo.lock Wed Nov 29 15:58:24 2023 -0500
@@ -70,6 +70,18 @@
]
[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -443,6 +455,12 @@
]
[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
+[[package]]
name = "generic-array"
version = "0.14.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -516,6 +534,7 @@
version = "0.1.0"
dependencies = [
"bitflags",
+ "bitvec",
"byteorder",
"bytes-cast",
"clap",
@@ -915,6 +934,12 @@
]
[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
+[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1226,6 +1251,12 @@
]
[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
+[[package]]
name = "tempfile"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1489,6 +1520,15 @@
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
+[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/Cargo.toml Wed Nov 29 15:58:24 2023 -0500
@@ -39,6 +39,7 @@
zstd = "0.12"
format-bytes = "0.3.0"
once_cell = "1.16.0"
+bitvec = "1.0.1"
# We don't use the `miniz-oxide` backend to not change rhg benchmarks and until
# we have a clearer view of which backend is the fastest.
--- a/rust/hg-core/src/dagops.rs Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/src/dagops.rs Wed Nov 29 15:58:24 2023 -0500
@@ -12,6 +12,8 @@
//! mean those revisions that have no children among the collection.
//! - Similarly *relative roots* of a collection of `Revision`, we mean those
//! whose parents, if any, don't belong to the collection.
+use bitvec::slice::BitSlice;
+
use super::{Graph, GraphError, Revision, NULL_REVISION};
use crate::{ancestors::AncestorsIterator, BaseRevision};
use std::collections::{BTreeSet, HashSet};
@@ -81,26 +83,26 @@
Ok(())
}
-/// Optimized version of `retain_heads` that expects an zeroed array of the size
-/// of the graph, to act as a faster but less space-efficient `HashSet`.
+/// Optimized version of `retain_heads` that expects an zeroed bitvec of the
+/// size of the graph, to act as a faster but less space-efficient `HashSet`.
///
/// # Panics
///
/// Can panic if `not_heads` is shorten than the length of graph.
pub fn retain_heads_fast(
graph: &impl Graph,
- not_heads: &mut [bool],
+ not_heads: &mut BitSlice,
filtered_revs: &HashSet<Revision>,
) -> Result<(), GraphError> {
for idx in (0..not_heads.len()).rev() {
let rev = Revision(idx as BaseRevision);
if !not_heads[idx] && filtered_revs.contains(&rev) {
- not_heads[idx] = true;
+ not_heads.get_mut(idx).unwrap().commit(true);
continue;
}
for parent in graph.parents(rev)?.iter() {
if *parent != NULL_REVISION {
- not_heads[parent.0 as usize] = true;
+ not_heads.get_mut(parent.0 as usize).unwrap().commit(true);
}
}
}
--- a/rust/hg-core/src/revlog/index.rs Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/src/revlog/index.rs Wed Nov 29 15:58:24 2023 -0500
@@ -3,6 +3,7 @@
use std::ops::Deref;
use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
+use bitvec::prelude::*;
use byteorder::{BigEndian, ByteOrder};
use bytes_cast::{unaligned, BytesCast};
@@ -568,8 +569,12 @@
let as_vec = if self.is_empty() {
vec![NULL_REVISION]
} else {
- let mut not_heads = vec![false; self.len()];
- dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?;
+ let mut not_heads = bitvec![0; self.len()];
+ dagops::retain_heads_fast(
+ self,
+ not_heads.as_mut_bitslice(),
+ filtered_revs,
+ )?;
not_heads
.into_iter()
.enumerate()