diff rust/hg-core/src/revlog/index.rs @ 51229:1b23aaf5eb7b

rust-index: optimize find_gca_candidates() on less than 8 revisions This is expected to be by far the most common case, given that, e.g., merging involves using it on two revisions. Using a `u8` as support for the bitset obviously divides the amount of RAM needed by 8. To state the obvious, on a repository with 10 million changesets, this spares 70MB. It is also possible that it'd be slightly faster, because it is easier to allocate and provides better cache locality. It is possible that some exhaustive listing of the traits implemented by `u8` and `u64` would avoid the added duplication, but that can be done later and would need a replacement for the `MAX` consts.
author Georges Racinet <georges.racinet@octobus.net>
date Fri, 20 Oct 2023 09:12:22 +0200
parents 61a6ef876efd
children ca81cd96000a
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/index.rs	Fri Oct 20 08:54:49 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Fri Oct 20 09:12:22 2023 +0200
@@ -1053,7 +1053,9 @@
 
         let revisions: Vec<Revision> = as_set.into_iter().collect();
 
-        if revisions.len() <= 63 {
+        if revisions.len() < 8 {
+            self.find_gca_candidates::<u8>(&revisions)
+        } else if revisions.len() < 64 {
             self.find_gca_candidates::<u64>(&revisions)
         } else {
             self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
@@ -1314,6 +1316,7 @@
 }
 
 const U64_POISON: u64 = 1 << 63;
+const U8_POISON: u8 = 1 << 7;
 
 impl PoisonableBitSet for u64 {
     fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
@@ -1361,6 +1364,52 @@
     }
 }
 
+impl PoisonableBitSet for u8 {
+    fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
+        vec![0; vec_len]
+    }
+
+    fn size(&self) -> usize {
+        1
+    }
+
+    fn capacity(&self) -> usize {
+        7
+    }
+
+    fn add(&mut self, n: usize) {
+        (*self) |= 1 << n;
+    }
+
+    fn discard(&mut self, n: usize) {
+        (*self) &= u8::MAX - (1 << n);
+    }
+
+    fn union(&mut self, other: &Self) {
+        if *self != *other {
+            (*self) |= *other;
+        }
+    }
+
+    fn is_full_range(&self, n: usize) -> bool {
+        *self + 1 == (1 << n)
+    }
+
+    fn is_empty(&self) -> bool {
+        *self == 0
+    }
+
+    fn poison(&mut self) {
+        *self = U8_POISON;
+    }
+
+    fn is_poisoned(&self) -> bool {
+        // equality comparison would be tempting but would not resist
+        // operations after poisoning (even if these should be bogus).
+        *self >= U8_POISON
+    }
+}
+
 /// A poisonable bit set whose capacity is not known at compile time but
 /// is constant after initial construction
 ///