changeset 51260:c4f1a790bda8

rust-index: use a `BitVec` instead of plain `Vec` for heads computation The `Vec` method uses one byte per revision, this uses 1 per 8 revisions, which improves our memory footprint. For large graphs (10+ millions), this can make a measurable difference server-side. I have seen no measurable impact on execution speed.
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 29 Nov 2023 15:58:24 -0500
parents ed6683d4cb29
children 9088c6d65ef6
files rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/dagops.rs rust/hg-core/src/revlog/index.rs
diffstat 4 files changed, 55 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/rust/Cargo.lock	Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/Cargo.lock	Wed Nov 29 15:58:24 2023 -0500
@@ -70,6 +70,18 @@
 ]
 
 [[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
 name = "block-buffer"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -443,6 +455,12 @@
 ]
 
 [[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
+[[package]]
 name = "generic-array"
 version = "0.14.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -516,6 +534,7 @@
 version = "0.1.0"
 dependencies = [
  "bitflags",
+ "bitvec",
  "byteorder",
  "bytes-cast",
  "clap",
@@ -915,6 +934,12 @@
 ]
 
 [[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
+[[package]]
 name = "rand"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1226,6 +1251,12 @@
 ]
 
 [[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
+[[package]]
 name = "tempfile"
 version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1489,6 +1520,15 @@
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
+[[package]]
 name = "yansi"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml	Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/Cargo.toml	Wed Nov 29 15:58:24 2023 -0500
@@ -39,6 +39,7 @@
 zstd = "0.12"
 format-bytes = "0.3.0"
 once_cell = "1.16.0"
+bitvec = "1.0.1"
 
 # We don't use the `miniz-oxide` backend to not change rhg benchmarks and until
 # we have a clearer view of which backend is the fastest.
--- a/rust/hg-core/src/dagops.rs	Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/src/dagops.rs	Wed Nov 29 15:58:24 2023 -0500
@@ -12,6 +12,8 @@
 //!   mean those revisions that have no children among the collection.
 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
 //!   whose parents, if any, don't belong to the collection.
+use bitvec::slice::BitSlice;
+
 use super::{Graph, GraphError, Revision, NULL_REVISION};
 use crate::{ancestors::AncestorsIterator, BaseRevision};
 use std::collections::{BTreeSet, HashSet};
@@ -81,26 +83,26 @@
     Ok(())
 }
 
-/// Optimized version of `retain_heads` that expects an zeroed array of the size
-/// of the graph, to act as a faster but less space-efficient `HashSet`.
+/// Optimized version of `retain_heads` that expects an zeroed bitvec of the
+/// size of the graph, to act as a faster but less space-efficient `HashSet`.
 ///
 /// # Panics
 ///
 /// Can panic if `not_heads` is shorten than the length of graph.
 pub fn retain_heads_fast(
     graph: &impl Graph,
-    not_heads: &mut [bool],
+    not_heads: &mut BitSlice,
     filtered_revs: &HashSet<Revision>,
 ) -> Result<(), GraphError> {
     for idx in (0..not_heads.len()).rev() {
         let rev = Revision(idx as BaseRevision);
         if !not_heads[idx] && filtered_revs.contains(&rev) {
-            not_heads[idx] = true;
+            not_heads.get_mut(idx).unwrap().commit(true);
             continue;
         }
         for parent in graph.parents(rev)?.iter() {
             if *parent != NULL_REVISION {
-                not_heads[parent.0 as usize] = true;
+                not_heads.get_mut(parent.0 as usize).unwrap().commit(true);
             }
         }
     }
--- a/rust/hg-core/src/revlog/index.rs	Wed Nov 29 10:04:41 2023 -0500
+++ b/rust/hg-core/src/revlog/index.rs	Wed Nov 29 15:58:24 2023 -0500
@@ -3,6 +3,7 @@
 use std::ops::Deref;
 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 
+use bitvec::prelude::*;
 use byteorder::{BigEndian, ByteOrder};
 use bytes_cast::{unaligned, BytesCast};
 
@@ -568,8 +569,12 @@
         let as_vec = if self.is_empty() {
             vec![NULL_REVISION]
         } else {
-            let mut not_heads = vec![false; self.len()];
-            dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?;
+            let mut not_heads = bitvec![0; self.len()];
+            dagops::retain_heads_fast(
+                self,
+                not_heads.as_mut_bitslice(),
+                filtered_revs,
+            )?;
             not_heads
                 .into_iter()
                 .enumerate()