changeset 51197:4e6620b7fbbb

rust-index: support cache clearing I'm not 100% sure how useful it is outside of perf, but it's still worth implementing.
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 30 Oct 2023 10:34:48 +0100
parents f95f70cf2ee2
children 51cc12158f97
files rust/hg-core/src/revlog/index.rs rust/hg-cpython/src/revlog.rs
diffstat 2 files changed, 62 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/index.rs	Thu Jun 29 11:37:19 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Mon Oct 30 10:34:48 2023 +0100
@@ -1,5 +1,6 @@
 use std::fmt::Debug;
 use std::ops::Deref;
+use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 
 use byteorder::{BigEndian, ByteOrder};
 use bytes_cast::{unaligned, BytesCast};
@@ -225,8 +226,9 @@
     bytes: IndexData,
     /// Offsets of starts of index blocks.
     /// Only needed when the index is interleaved with data.
-    offsets: Option<Vec<usize>>,
+    offsets: RwLock<Option<Vec<usize>>>,
     uses_generaldelta: bool,
+    is_inline: bool,
 }
 
 impl Debug for Index {
@@ -294,8 +296,9 @@
             if offset == bytes.len() {
                 Ok(Self {
                     bytes: IndexData::new(bytes),
-                    offsets: Some(offsets),
+                    offsets: RwLock::new(Some(offsets)),
                     uses_generaldelta,
+                    is_inline: true,
                 })
             } else {
                 Err(HgError::corrupted("unexpected inline revlog length"))
@@ -303,8 +306,9 @@
         } else {
             Ok(Self {
                 bytes: IndexData::new(bytes),
-                offsets: None,
+                offsets: RwLock::new(None),
                 uses_generaldelta,
+                is_inline: false,
             })
         }
     }
@@ -315,7 +319,7 @@
 
     /// Value of the inline flag.
     pub fn is_inline(&self) -> bool {
-        self.offsets.is_some()
+        self.is_inline
     }
 
     /// Return a slice of bytes if `revlog` is inline. Panic if not.
@@ -328,13 +332,35 @@
 
     /// Return number of entries of the revlog index.
     pub fn len(&self) -> usize {
-        if let Some(offsets) = &self.offsets {
+        if let Some(offsets) = &*self.get_offsets() {
             offsets.len()
         } else {
             self.bytes.len() / INDEX_ENTRY_SIZE
         }
     }
 
+    pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
+        if self.is_inline() {
+            {
+                // Wrap in a block to drop the read guard
+                // TODO perf?
+                let mut offsets = self.offsets.write().unwrap();
+                if offsets.is_none() {
+                    offsets.replace(inline_scan(&self.bytes.bytes).1);
+                }
+            }
+        }
+        self.offsets.read().unwrap()
+    }
+
+    pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
+        let mut offsets = self.offsets.write().unwrap();
+        if self.is_inline() && offsets.is_none() {
+            offsets.replace(inline_scan(&self.bytes.bytes).1);
+        }
+        offsets
+    }
+
     /// Returns `true` if the `Index` has zero `entries`.
     pub fn is_empty(&self) -> bool {
         self.len() == 0
@@ -346,8 +372,8 @@
         if rev == NULL_REVISION {
             return None;
         }
-        Some(if let Some(offsets) = &self.offsets {
-            self.get_entry_inline(rev, offsets)
+        Some(if let Some(offsets) = &*self.get_offsets() {
+            self.get_entry_inline(rev, offsets.as_ref())
         } else {
             self.get_entry_separated(rev)
         })
@@ -393,7 +419,7 @@
     ) -> Result<(), RevlogError> {
         revision_data.validate()?;
         let new_offset = self.bytes.len();
-        if let Some(offsets) = self.offsets.as_mut() {
+        if let Some(offsets) = &mut *self.get_offsets_mut() {
             offsets.push(new_offset)
         }
         self.bytes.added.extend(revision_data.into_v1().as_bytes());
@@ -401,12 +427,37 @@
     }
 
     pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
-        self.bytes.remove(rev, self.offsets.as_deref())?;
-        if let Some(offsets) = self.offsets.as_mut() {
+        let offsets = self.get_offsets().clone();
+        self.bytes.remove(rev, offsets.as_deref())?;
+        if let Some(offsets) = &mut *self.get_offsets_mut() {
             offsets.truncate(rev.0 as usize)
         }
         Ok(())
     }
+
+    pub fn clear_caches(&mut self) {
+        // We need to get the 'inline' value from Python at init and use this
+        // instead of offsets to determine whether we're inline since we might
+        // clear caches. This implies re-populating the offsets on-demand.
+        self.offsets = RwLock::new(None);
+    }
+}
+
+fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
+    let mut offset: usize = 0;
+    let mut offsets = Vec::new();
+
+    while offset + INDEX_ENTRY_SIZE <= bytes.len() {
+        offsets.push(offset);
+        let end = offset + INDEX_ENTRY_SIZE;
+        let entry = IndexEntry {
+            bytes: &bytes[offset..end],
+            offset_override: None,
+        };
+
+        offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
+    }
+    (offset, offsets)
 }
 
 impl super::RevlogIndex for Index {
--- a/rust/hg-cpython/src/revlog.rs	Thu Jun 29 11:37:19 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Mon Oct 30 10:34:48 2023 +0100
@@ -205,6 +205,7 @@
         self.nt(py).borrow_mut().take();
         self.docket(py).borrow_mut().take();
         self.nodemap_mmap(py).borrow_mut().take();
+        self.index(py).borrow_mut().clear_caches();
         self.call_cindex(py, "clearcaches", args, kw)
     }