# HG changeset patch # User Raphaël Gomès # Date 1698658488 -3600 # Node ID 4e6620b7fbbb1d38a747cdf1fcc4f3793b96f3ad # Parent f95f70cf2ee21383aa09c943e0da706e07c9d01c rust-index: support cache clearing I'm not 100% sure how useful it is outside of perf, but it's still worth implementing. diff -r f95f70cf2ee2 -r 4e6620b7fbbb rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs Thu Jun 29 11:37:19 2023 +0200 +++ b/rust/hg-core/src/revlog/index.rs Mon Oct 30 10:34:48 2023 +0100 @@ -1,5 +1,6 @@ use std::fmt::Debug; use std::ops::Deref; +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use byteorder::{BigEndian, ByteOrder}; use bytes_cast::{unaligned, BytesCast}; @@ -225,8 +226,9 @@ bytes: IndexData, /// Offsets of starts of index blocks. /// Only needed when the index is interleaved with data. - offsets: Option>, + offsets: RwLock>>, uses_generaldelta: bool, + is_inline: bool, } impl Debug for Index { @@ -294,8 +296,9 @@ if offset == bytes.len() { Ok(Self { bytes: IndexData::new(bytes), - offsets: Some(offsets), + offsets: RwLock::new(Some(offsets)), uses_generaldelta, + is_inline: true, }) } else { Err(HgError::corrupted("unexpected inline revlog length")) @@ -303,8 +306,9 @@ } else { Ok(Self { bytes: IndexData::new(bytes), - offsets: None, + offsets: RwLock::new(None), uses_generaldelta, + is_inline: false, }) } } @@ -315,7 +319,7 @@ /// Value of the inline flag. pub fn is_inline(&self) -> bool { - self.offsets.is_some() + self.is_inline } /// Return a slice of bytes if `revlog` is inline. Panic if not. @@ -328,13 +332,35 @@ /// Return number of entries of the revlog index. pub fn len(&self) -> usize { - if let Some(offsets) = &self.offsets { + if let Some(offsets) = &*self.get_offsets() { offsets.len() } else { self.bytes.len() / INDEX_ENTRY_SIZE } } + pub fn get_offsets(&self) -> RwLockReadGuard>> { + if self.is_inline() { + { + // Wrap in a block to drop the read guard + // TODO perf? + let mut offsets = self.offsets.write().unwrap(); + if offsets.is_none() { + offsets.replace(inline_scan(&self.bytes.bytes).1); + } + } + } + self.offsets.read().unwrap() + } + + pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard>> { + let mut offsets = self.offsets.write().unwrap(); + if self.is_inline() && offsets.is_none() { + offsets.replace(inline_scan(&self.bytes.bytes).1); + } + offsets + } + /// Returns `true` if the `Index` has zero `entries`. pub fn is_empty(&self) -> bool { self.len() == 0 @@ -346,8 +372,8 @@ if rev == NULL_REVISION { return None; } - Some(if let Some(offsets) = &self.offsets { - self.get_entry_inline(rev, offsets) + Some(if let Some(offsets) = &*self.get_offsets() { + self.get_entry_inline(rev, offsets.as_ref()) } else { self.get_entry_separated(rev) }) @@ -393,7 +419,7 @@ ) -> Result<(), RevlogError> { revision_data.validate()?; let new_offset = self.bytes.len(); - if let Some(offsets) = self.offsets.as_mut() { + if let Some(offsets) = &mut *self.get_offsets_mut() { offsets.push(new_offset) } self.bytes.added.extend(revision_data.into_v1().as_bytes()); @@ -401,12 +427,37 @@ } pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> { - self.bytes.remove(rev, self.offsets.as_deref())?; - if let Some(offsets) = self.offsets.as_mut() { + let offsets = self.get_offsets().clone(); + self.bytes.remove(rev, offsets.as_deref())?; + if let Some(offsets) = &mut *self.get_offsets_mut() { offsets.truncate(rev.0 as usize) } Ok(()) } + + pub fn clear_caches(&mut self) { + // We need to get the 'inline' value from Python at init and use this + // instead of offsets to determine whether we're inline since we might + // clear caches. This implies re-populating the offsets on-demand. + self.offsets = RwLock::new(None); + } +} + +fn inline_scan(bytes: &[u8]) -> (usize, Vec) { + let mut offset: usize = 0; + let mut offsets = Vec::new(); + + while offset + INDEX_ENTRY_SIZE <= bytes.len() { + offsets.push(offset); + let end = offset + INDEX_ENTRY_SIZE; + let entry = IndexEntry { + bytes: &bytes[offset..end], + offset_override: None, + }; + + offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; + } + (offset, offsets) } impl super::RevlogIndex for Index { diff -r f95f70cf2ee2 -r 4e6620b7fbbb rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Thu Jun 29 11:37:19 2023 +0200 +++ b/rust/hg-cpython/src/revlog.rs Mon Oct 30 10:34:48 2023 +0100 @@ -205,6 +205,7 @@ self.nt(py).borrow_mut().take(); self.docket(py).borrow_mut().take(); self.nodemap_mmap(py).borrow_mut().take(); + self.index(py).borrow_mut().clear_caches(); self.call_cindex(py, "clearcaches", args, kw) }