rust-index: optimize find_gca_candidates() on less than 8 revisions
This is expected to be by far the most common case, given that, e.g.,
merging involves using it on two revisions.
Using a `u8` as support for the bitset obviously divides the amount of
RAM needed by 8. To state the obvious, on a repository with 10 million
changesets, this spares 70MB. It is also possible that it'd be slightly
faster, because it is easier to allocate and provides better cache locality.
It is possible that some exhaustive listing of the traits implemented by
`u8` and `u64` would avoid the added duplication, but that can be done later
and would need a replacement for the `MAX` consts.
--- a/rust/hg-core/src/revlog/index.rs Fri Oct 20 08:54:49 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs Fri Oct 20 09:12:22 2023 +0200
@@ -1053,7 +1053,9 @@
let revisions: Vec<Revision> = as_set.into_iter().collect();
- if revisions.len() <= 63 {
+ if revisions.len() < 8 {
+ self.find_gca_candidates::<u8>(&revisions)
+ } else if revisions.len() < 64 {
self.find_gca_candidates::<u64>(&revisions)
} else {
self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
@@ -1314,6 +1316,7 @@
}
const U64_POISON: u64 = 1 << 63;
+const U8_POISON: u8 = 1 << 7;
impl PoisonableBitSet for u64 {
fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
@@ -1361,6 +1364,52 @@
}
}
+impl PoisonableBitSet for u8 {
+ fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
+ vec![0; vec_len]
+ }
+
+ fn size(&self) -> usize {
+ 1
+ }
+
+ fn capacity(&self) -> usize {
+ 7
+ }
+
+ fn add(&mut self, n: usize) {
+ (*self) |= 1 << n;
+ }
+
+ fn discard(&mut self, n: usize) {
+ (*self) &= u8::MAX - (1 << n);
+ }
+
+ fn union(&mut self, other: &Self) {
+ if *self != *other {
+ (*self) |= *other;
+ }
+ }
+
+ fn is_full_range(&self, n: usize) -> bool {
+ *self + 1 == (1 << n)
+ }
+
+ fn is_empty(&self) -> bool {
+ *self == 0
+ }
+
+ fn poison(&mut self) {
+ *self = U8_POISON;
+ }
+
+ fn is_poisoned(&self) -> bool {
+ // equality comparison would be tempting but would not resist
+ // operations after poisoning (even if these should be bogus).
+ *self >= U8_POISON
+ }
+}
+
/// A poisonable bit set whose capacity is not known at compile time but
/// is constant after initial construction
///
--- a/rust/hg-cpython/src/revlog.rs Fri Oct 20 08:54:49 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs Fri Oct 20 09:12:22 2023 +0200
@@ -20,7 +20,10 @@
};
use hg::{
errors::HgError,
- index::{IndexHeader, Phase, RevisionDataParams, SnapshotsCache},
+ index::{
+ IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
+ INDEX_ENTRY_SIZE,
+ },
nodemap::{Block, NodeMapError, NodeTree},
revlog::{nodemap::NodeMap, NodePrefix, RevlogError, RevlogIndex},
BaseRevision, Revision, UncheckedRevision, NULL_REVISION,
@@ -483,12 +486,26 @@
@property
def entry_size(&self) -> PyResult<PyInt> {
- self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
+ let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+
+ let c_res = self.cindex(py).borrow().inner()
+ .getattr(py, "entry_size")?;
+ assert_py_eq(py, "entry_size", rust_res.as_object(), &c_res)?;
+
+ Ok(rust_res)
}
@property
def rust_ext_compat(&self) -> PyResult<PyInt> {
- self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
+ // will be entirely removed when the Rust index yet useful to
+ // implement in Rust to detangle things when removing `self.cindex`
+ let rust_res: PyInt = 1.to_py_object(py);
+
+ let c_res = self.cindex(py).borrow().inner()
+ .getattr(py, "rust_ext_compat")?;
+ assert_py_eq(py, "rust_ext_compat", rust_res.as_object(), &c_res)?;
+
+ Ok(rust_res)
}
});
@@ -671,7 +688,7 @@
let rust_index_len = self.index(py).borrow().len();
let cindex_len = self.cindex(py).borrow().inner().len(py)?;
assert_eq!(rust_index_len, cindex_len);
- Ok(cindex_len)
+ Ok(rust_index_len)
}
/// This is scaffolding at this point, but it could also become