Mercurial: changeset 52164:e01e84e5e426

--- a/rust/Cargo.lock	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/Cargo.lock	Thu Oct 10 10:34:51 2024 +0200
@@ -15,6 +15,7 @@
 checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
 dependencies = [
  "cfg-if",
+ "getrandom 0.2.8",
  "once_cell",
  "version_check",
 ]
@@ -493,14 +494,14 @@
 
 [[package]]
 name = "filetime"
-version = "0.2.25"
+version = "0.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
+checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
 dependencies = [
  "cfg-if",
  "libc",
- "libredox",
- "windows-sys 0.59.0",
+ "redox_syscall 0.4.1",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -580,9 +581,9 @@
 
 [[package]]
 name = "hashbrown"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
+checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
 dependencies = [
  "ahash",
  "rayon",
@@ -625,7 +626,7 @@
  "filetime",
  "flate2",
  "format-bytes",
- "hashbrown 0.13.1",
+ "hashbrown 0.13.2",
  "home",
  "im-rc",
  "indicatif",
@@ -643,6 +644,7 @@
  "rayon",
  "regex",
  "same-file",
+ "schnellru",
  "self_cell",
  "serde",
  "sha-1 0.10.0",
@@ -806,7 +808,6 @@
 dependencies = [
  "bitflags 2.6.0",
  "libc",
- "redox_syscall 0.5.3",
 ]
 
 [[package]]
@@ -1191,11 +1192,11 @@
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.3"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
 dependencies = [
- "bitflags 2.6.0",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -1283,6 +1284,17 @@
 ]
 
 [[package]]
+name = "schnellru"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9a8ef13a93c54d20580de1e5c413e624e53121d42fc7e2c11d10ef7f8b02367"
+dependencies = [
+ "ahash",
+ "cfg-if",
+ "hashbrown 0.13.2",
+]
+
+[[package]]
 name = "scopeguard"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1709,15 +1721,6 @@
 ]
 
 [[package]]
-name = "windows-sys"
-version = "0.59.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
-dependencies = [
- "windows-targets 0.52.6",
-]
-
-[[package]]
 name = "windows-targets"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"

--- a/rust/hg-core/Cargo.toml	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/Cargo.toml	Thu Oct 10 10:34:51 2024 +0200
@@ -42,6 +42,7 @@
 once_cell = "1.16.0"
 bitvec = "1.0.1"
 chrono = "0.4.34"
+schnellru = "0.2.1"
 dyn-clone = "1.0.16"
 filetime = "0.2.23"
 uuid = { version = "1.10", features = ["v4"] }

--- a/rust/hg-core/src/lib.rs	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/src/lib.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -37,6 +37,7 @@
 pub mod operations;
 pub mod progress;
 pub mod revset;
+pub mod transaction;
 pub mod update;
 pub mod utils;
 pub mod vfs;

--- a/rust/hg-core/src/revlog/changelog.rs	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -86,7 +86,7 @@
     }
 
     pub fn get_index(&self) -> &Index {
-        &self.revlog.index
+        self.revlog.index()
     }
 }
 
@@ -569,7 +569,6 @@
             base: temp.path().to_owned(),
         };
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
-        std::fs::write(temp.path().join("foo.d"), b"").unwrap();
         let revlog =
             Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
                 .unwrap();

--- a/rust/hg-core/src/revlog/index.rs	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -272,7 +272,7 @@
     /// be accessed via the [`Self::head_revs`] method.
     /// The last filtered revisions in this index, used to make sure
     /// we haven't changed filters when returning the cached `head_revs`.
-    head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
+    pub(super) head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
 }
 
 impl Debug for Index {
@@ -1397,6 +1397,14 @@
             offset
         }
     }
+
+    pub(crate) fn make_null_entry(&self) -> IndexEntry<'_> {
+        IndexEntry {
+            bytes: b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \
+            \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff \
+            \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
+        }
+    }
 }
 
 /// The kind of functionality needed by find_gca_candidates

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/inner_revlog.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -0,0 +1,1353 @@
+//! A layer of lower-level revlog functionality to encapsulate most of the
+//! IO work and expensive operations.
+use std::{
+    borrow::Cow,
+    cell::RefCell,
+    io::{ErrorKind, Seek, SeekFrom, Write},
+    ops::Deref,
+    path::PathBuf,
+    sync::{Arc, Mutex},
+};
+
+use schnellru::{ByMemoryUsage, LruMap};
+use sha1::{Digest, Sha1};
+
+use crate::{
+    errors::{HgError, IoResultExt},
+    exit_codes,
+    transaction::Transaction,
+    vfs::Vfs,
+};
+
+use super::{
+    compression::{
+        uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
+        ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
+    },
+    file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
+    index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
+    node::{NODE_BYTES_LENGTH, NULL_NODE},
+    options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
+    BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
+    UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
+};
+
+/// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
+/// boundary to incrementally rewrite higher-level revlog functionality in
+/// Rust.
+pub struct InnerRevlog {
+    /// When index and data are not interleaved: bytes of the revlog index.
+    /// When index and data are interleaved (inline revlog): bytes of the
+    /// revlog index and data.
+    pub index: Index,
+    /// The store vfs that is used to interact with the filesystem
+    vfs: Box<dyn Vfs>,
+    /// The index file path, relative to the vfs root
+    pub index_file: PathBuf,
+    /// The data file path, relative to the vfs root (same as `index_file`
+    /// if inline)
+    data_file: PathBuf,
+    /// Data config that applies to this revlog
+    data_config: RevlogDataConfig,
+    /// Delta config that applies to this revlog
+    delta_config: RevlogDeltaConfig,
+    /// Feature config that applies to this revlog
+    feature_config: RevlogFeatureConfig,
+    /// A view into this revlog's data file
+    segment_file: RandomAccessFile,
+    /// A cache of uncompressed chunks that have previously been restored.
+    /// Its eviction policy is defined in [`Self::new`].
+    uncompressed_chunk_cache: Option<UncompressedChunkCache>,
+    /// Used to keep track of the actual target during diverted writes
+    /// for the changelog
+    original_index_file: Option<PathBuf>,
+    /// Write handles to the index and data files
+    /// XXX why duplicate from `index` and `segment_file`?
+    writing_handles: Option<WriteHandles>,
+    /// See [`DelayedBuffer`].
+    delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
+    /// Whether this revlog is inline. XXX why duplicate from `index`?
+    pub inline: bool,
+    /// A cache of the last revision, which is usually accessed multiple
+    /// times.
+    pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
+}
+
+impl InnerRevlog {
+    pub fn new(
+        vfs: Box<dyn Vfs>,
+        index: Index,
+        index_file: PathBuf,
+        data_file: PathBuf,
+        data_config: RevlogDataConfig,
+        delta_config: RevlogDeltaConfig,
+        feature_config: RevlogFeatureConfig,
+    ) -> Self {
+        assert!(index_file.is_relative());
+        assert!(data_file.is_relative());
+        let segment_file = RandomAccessFile::new(
+            dyn_clone::clone_box(&*vfs),
+            if index.is_inline() {
+                index_file.to_owned()
+            } else {
+                data_file.to_owned()
+            },
+        );
+
+        let uncompressed_chunk_cache =
+            data_config.uncompressed_cache_factor.map(
+                // Arbitrary initial value
+                // TODO check if using a hasher specific to integers is useful
+                |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
+            );
+
+        let inline = index.is_inline();
+        Self {
+            index,
+            vfs,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+            segment_file,
+            uncompressed_chunk_cache,
+            original_index_file: None,
+            writing_handles: None,
+            delayed_buffer: None,
+            inline,
+            last_revision_cache: Mutex::new(None),
+        }
+    }
+
+    /// Return number of entries of the revlog index
+    pub fn len(&self) -> usize {
+        self.index.len()
+    }
+
+    /// Return `true` if this revlog has no entries
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return whether this revlog is inline (mixed index and data)
+    pub fn is_inline(&self) -> bool {
+        self.inline
+    }
+
+    /// Clear all caches from this revlog
+    pub fn clear_cache(&mut self) {
+        assert!(!self.is_delaying());
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            // We don't clear the allocation here because it's probably faster.
+            // We could change our minds later if this ends up being a problem
+            // with regards to memory consumption.
+            cache.borrow_mut().clear();
+        }
+    }
+
+    /// Return an entry for the null revision
+    pub fn make_null_entry(&self) -> RevlogEntry {
+        RevlogEntry {
+            revlog: self,
+            rev: NULL_REVISION,
+            uncompressed_len: 0,
+            p1: NULL_REVISION,
+            p2: NULL_REVISION,
+            flags: NULL_REVLOG_ENTRY_FLAGS,
+            hash: NULL_NODE,
+        }
+    }
+
+    /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
+    pub fn get_entry_for_checked_rev(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        if rev == NULL_REVISION {
+            return Ok(self.make_null_entry());
+        }
+        let index_entry = self
+            .index
+            .get_entry(rev)
+            .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
+        let p1 =
+            self.index.check_revision(index_entry.p1()).ok_or_else(|| {
+                RevlogError::corrupted(format!(
+                    "p1 for rev {} is invalid",
+                    rev
+                ))
+            })?;
+        let p2 =
+            self.index.check_revision(index_entry.p2()).ok_or_else(|| {
+                RevlogError::corrupted(format!(
+                    "p2 for rev {} is invalid",
+                    rev
+                ))
+            })?;
+        let entry = RevlogEntry {
+            revlog: self,
+            rev,
+            uncompressed_len: index_entry.uncompressed_len(),
+            p1,
+            p2,
+            flags: index_entry.flags(),
+            hash: *index_entry.hash(),
+        };
+        Ok(entry)
+    }
+
+    /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
+    /// returns a [`RevlogError`].
+    /// TODO normalize naming across the index and all revlogs
+    /// (changelog, etc.) so that `get_entry` is always on an unchecked rev and
+    /// `get_entry_for_checked_rev` is for checked rev
+    pub fn get_entry(
+        &self,
+        rev: UncheckedRevision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        if rev == NULL_REVISION.into() {
+            return Ok(self.make_null_entry());
+        }
+        let rev = self.index.check_revision(rev).ok_or_else(|| {
+            RevlogError::corrupted(format!("rev {} is invalid", rev))
+        })?;
+        self.get_entry_for_checked_rev(rev)
+    }
+
+    /// Is the revlog currently delaying the visibility of written data?
+    ///
+    /// The delaying mechanism can be either in-memory or written on disk in a
+    /// side-file.
+    pub fn is_delaying(&self) -> bool {
+        self.delayed_buffer.is_some() || self.original_index_file.is_some()
+    }
+
+    /// The offset of the data chunk for this revision
+    #[inline(always)]
+    pub fn start(&self, rev: Revision) -> usize {
+        self.index.start(
+            rev,
+            &self
+                .index
+                .get_entry(rev)
+                .unwrap_or_else(|| self.index.make_null_entry()),
+        )
+    }
+
+    /// The length of the data chunk for this revision
+    /// TODO rename this method and others to more explicit names than the
+    /// existing ones that were copied over from Python
+    #[inline(always)]
+    pub fn length(&self, rev: Revision) -> usize {
+        self.index
+            .get_entry(rev)
+            .unwrap_or_else(|| self.index.make_null_entry())
+            .compressed_len() as usize
+    }
+
+    /// The end of the data chunk for this revision
+    #[inline(always)]
+    pub fn end(&self, rev: Revision) -> usize {
+        self.start(rev) + self.length(rev)
+    }
+
+    /// Return the delta parent of the given revision
+    pub fn delta_parent(&self, rev: Revision) -> Revision {
+        let base = self
+            .index
+            .get_entry(rev)
+            .unwrap()
+            .base_revision_or_base_of_delta_chain();
+        if base.0 == rev.0 {
+            NULL_REVISION
+        } else if self.delta_config.general_delta {
+            Revision(base.0)
+        } else {
+            Revision(rev.0 - 1)
+        }
+    }
+
+    /// Return whether `rev` points to a snapshot revision (i.e. does not have
+    /// a delta base).
+    pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
+        if !self.delta_config.sparse_revlog {
+            return Ok(self.delta_parent(rev) == NULL_REVISION);
+        }
+        self.index.is_snapshot_unchecked(rev)
+    }
+
+    /// Return the delta chain for `rev` according to this revlog's config.
+    /// See [`Index::delta_chain`] for more information.
+    pub fn delta_chain(
+        &self,
+        rev: Revision,
+        stop_rev: Option<Revision>,
+    ) -> Result<(Vec<Revision>, bool), HgError> {
+        self.index.delta_chain(
+            rev,
+            stop_rev,
+            self.delta_config.general_delta.into(),
+        )
+    }
+
+    fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
+        // TODO cache the compressor?
+        Ok(match self.feature_config.compression_engine {
+            CompressionConfig::Zlib { level } => {
+                Box::new(ZlibCompressor::new(level))
+            }
+            CompressionConfig::Zstd { level, threads } => {
+                Box::new(ZstdCompressor::new(level, threads))
+            }
+            CompressionConfig::None => Box::new(NoneCompressor),
+        })
+    }
+
+    /// Generate a possibly-compressed representation of data.
+    /// Returns `None` if the data was not compressed.
+    pub fn compress<'data>(
+        &self,
+        data: &'data [u8],
+    ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
+        if data.is_empty() {
+            return Ok(Some(data.into()));
+        }
+        let res = self.compressor()?.compress(data)?;
+        if let Some(compressed) = res {
+            // The revlog compressor added the header in the returned data.
+            return Ok(Some(compressed.into()));
+        }
+
+        if data[0] == b'\0' {
+            return Ok(Some(data.into()));
+        }
+        Ok(None)
+    }
+
+    /// Decompress a revlog chunk.
+    ///
+    /// The chunk is expected to begin with a header identifying the
+    /// format type so it can be routed to an appropriate decompressor.
+    pub fn decompress<'a>(
+        &'a self,
+        data: &'a [u8],
+    ) -> Result<Cow<[u8]>, RevlogError> {
+        if data.is_empty() {
+            return Ok(data.into());
+        }
+
+        // Revlogs are read much more frequently than they are written and many
+        // chunks only take microseconds to decompress, so performance is
+        // important here.
+
+        let header = data[0];
+        match header {
+            // Settings don't matter as they only affect compression
+            ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
+            // Settings don't matter as they only affect compression
+            ZSTD_BYTE => {
+                Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
+            }
+            b'\0' => Ok(data.into()),
+            b'u' => Ok((&data[1..]).into()),
+            other => Err(HgError::UnsupportedFeature(format!(
+                "unknown compression header '{}'",
+                other
+            ))
+            .into()),
+        }
+    }
+
+    /// Obtain a segment of raw data corresponding to a range of revisions.
+    ///
+    /// Requests for data may be satisfied by a cache.
+    ///
+    /// Returns a 2-tuple of (offset, data) for the requested range of
+    /// revisions. Offset is the integer offset from the beginning of the
+    /// revlog and data is a slice of the raw byte data.
+    ///
+    /// Callers will need to call `self.start(rev)` and `self.length(rev)`
+    /// to determine where each revision's data begins and ends.
+    pub fn get_segment_for_revs(
+        &self,
+        start_rev: Revision,
+        end_rev: Revision,
+    ) -> Result<(usize, Vec<u8>), HgError> {
+        let start = if start_rev == NULL_REVISION {
+            0
+        } else {
+            let start_entry = self
+                .index
+                .get_entry(start_rev)
+                .expect("null revision segment");
+            self.index.start(start_rev, &start_entry)
+        };
+        let end_entry = self
+            .index
+            .get_entry(end_rev)
+            .expect("null revision segment");
+        let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
+
+        let length = end - start;
+
+        // XXX should we use mmap instead of doing this for platforms that
+        // support madvise/populate?
+        Ok((start, self.segment_file.read_chunk(start, length)?))
+    }
+
+    /// Return the uncompressed raw data for `rev`
+    pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            if let Some(chunk) = cache.borrow_mut().get(&rev) {
+                return Ok(chunk.clone());
+            }
+        }
+        // TODO revlogv2 should check the compression mode
+        let data = self.get_segment_for_revs(rev, rev)?.1;
+        let uncompressed = self.decompress(&data).map_err(|e| {
+            HgError::abort(
+                format!("revlog decompression error: {}", e),
+                exit_codes::ABORT,
+                None,
+            )
+        })?;
+        let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            cache.borrow_mut().insert(rev, uncompressed.clone());
+        }
+        Ok(uncompressed)
+    }
+
+    /// Execute `func` within a read context for the data file, meaning that
+    /// the read handle will be taken and discarded after the operation.
+    pub fn with_read<R>(
+        &self,
+        func: impl FnOnce() -> Result<R, RevlogError>,
+    ) -> Result<R, RevlogError> {
+        self.enter_reading_context()?;
+        let res = func();
+        self.exit_reading_context();
+        res.map_err(Into::into)
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn enter_reading_context(&self) -> Result<(), HgError> {
+        if self.is_empty() {
+            // Nothing to be read
+            return Ok(());
+        }
+        if self.delayed_buffer.is_some() && self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        self.segment_file.get_read_handle()?;
+        Ok(())
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn exit_reading_context(&self) {
+        self.segment_file.exit_reading_context()
+    }
+
+    /// Fill the buffer returned by `get_buffer` with the possibly un-validated
+    /// raw text for a revision. It can be already validated if it comes
+    /// from the cache.
+    pub fn raw_text<G, T>(
+        &self,
+        rev: Revision,
+        get_buffer: G,
+    ) -> Result<(), RevlogError>
+    where
+        G: FnOnce(
+            usize,
+            &mut dyn FnMut(
+                &mut dyn RevisionBuffer<Target = T>,
+            ) -> Result<(), RevlogError>,
+        ) -> Result<(), RevlogError>,
+    {
+        let entry = &self.get_entry_for_checked_rev(rev)?;
+        let raw_size = entry.uncompressed_len();
+        let mut mutex_guard = self
+            .last_revision_cache
+            .lock()
+            .expect("lock should not be held");
+        let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
+            Some((*rev, data.deref().as_ref()))
+        } else {
+            None
+        };
+        if let Some(cache) = &self.uncompressed_chunk_cache {
+            let cache = &mut cache.borrow_mut();
+            if let Some(size) = raw_size {
+                // Dynamically update the uncompressed_chunk_cache size to the
+                // largest revision we've seen in this revlog.
+                // Do it *before* restoration in case the current revision
+                // is the largest.
+                let factor = self
+                    .data_config
+                    .uncompressed_cache_factor
+                    .expect("cache should not exist without factor");
+                let candidate_size = (size as f64 * factor) as usize;
+                let limiter_mut = cache.limiter_mut();
+                if candidate_size > limiter_mut.max_memory_usage() {
+                    std::mem::swap(
+                        limiter_mut,
+                        &mut ByMemoryUsage::new(candidate_size),
+                    );
+                }
+            }
+        }
+        entry.rawdata(cached_rev, get_buffer)?;
+        // drop cache to save memory, the caller is expected to update
+        // the revision cache after validating the text
+        mutex_guard.take();
+        Ok(())
+    }
+
+    /// Only `pub` for `hg-cpython`.
+    /// Obtain decompressed raw data for the specified revisions that are
+    /// assumed to be in ascending order.
+    ///
+    /// Returns a list with decompressed data for each requested revision.
+    #[doc(hidden)]
+    pub fn chunks(
+        &self,
+        revs: Vec<Revision>,
+        target_size: Option<u64>,
+    ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
+        if revs.is_empty() {
+            return Ok(vec![]);
+        }
+        let mut fetched_revs = vec![];
+        let mut chunks = Vec::with_capacity(revs.len());
+
+        match self.uncompressed_chunk_cache.as_ref() {
+            Some(cache) => {
+                let mut cache = cache.borrow_mut();
+                for rev in revs.iter() {
+                    match cache.get(rev) {
+                        Some(hit) => chunks.push((*rev, hit.to_owned())),
+                        None => fetched_revs.push(*rev),
+                    }
+                }
+            }
+            None => fetched_revs = revs,
+        }
+
+        let already_cached = chunks.len();
+
+        let sliced_chunks = if fetched_revs.is_empty() {
+            vec![]
+        } else if !self.data_config.with_sparse_read || self.is_inline() {
+            vec![fetched_revs]
+        } else {
+            self.slice_chunk(&fetched_revs, target_size)?
+        };
+
+        self.with_read(|| {
+            for revs_chunk in sliced_chunks {
+                let first_rev = revs_chunk[0];
+                // Skip trailing revisions with empty diff
+                let last_rev_idx = revs_chunk
+                    .iter()
+                    .rposition(|r| self.length(*r) != 0)
+                    .unwrap_or(revs_chunk.len() - 1);
+
+                let last_rev = revs_chunk[last_rev_idx];
+
+                let (offset, data) =
+                    self.get_segment_for_revs(first_rev, last_rev)?;
+
+                let revs_chunk = &revs_chunk[..=last_rev_idx];
+
+                for rev in revs_chunk {
+                    let chunk_start = self.start(*rev);
+                    let chunk_length = self.length(*rev);
+                    // TODO revlogv2 should check the compression mode
+                    let bytes = &data[chunk_start - offset..][..chunk_length];
+                    let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
+                        // If we're using `zstd`, we want to try a more
+                        // specialized decompression
+                        let entry = self.index.get_entry(*rev).unwrap();
+                        let is_delta = entry
+                            .base_revision_or_base_of_delta_chain()
+                            != (*rev).into();
+                        let uncompressed = uncompressed_zstd_data(
+                            bytes,
+                            is_delta,
+                            entry.uncompressed_len(),
+                        )?;
+                        Cow::Owned(uncompressed)
+                    } else {
+                        // Otherwise just fallback to generic decompression.
+                        self.decompress(bytes)?
+                    };
+
+                    chunks.push((*rev, chunk.into()));
+                }
+            }
+            Ok(())
+        })?;
+
+        if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
+            let mut cache = cache.borrow_mut();
+            for (rev, chunk) in chunks.iter().skip(already_cached) {
+                cache.insert(*rev, chunk.clone());
+            }
+        }
+        // Use stable sort here since it's *mostly* sorted
+        chunks.sort_by(|a, b| a.0.cmp(&b.0));
+        Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
+    }
+
+    /// Slice revs to reduce the amount of unrelated data to be read from disk.
+    ///
+    /// ``revs`` is sliced into groups that should be read in one time.
+    /// Assume that revs are sorted.
+    ///
+    /// The initial chunk is sliced until the overall density
+    /// (payload/chunks-span ratio) is above
+    /// `revlog.data_config.sr_density_threshold`.
+    /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
+    ///
+    /// If `target_size` is set, no chunk larger than `target_size`
+    /// will be returned.
+    /// For consistency with other slicing choices, this limit won't go lower
+    /// than `revlog.data_config.sr_min_gap_size`.
+    ///
+    /// If individual revision chunks are larger than this limit, they will
+    /// still be raised individually.
+    pub fn slice_chunk(
+        &self,
+        revs: &[Revision],
+        target_size: Option<u64>,
+    ) -> Result<Vec<Vec<Revision>>, RevlogError> {
+        let target_size =
+            target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
+
+        let target_density = self.data_config.sr_density_threshold;
+        let min_gap_size = self.data_config.sr_min_gap_size as usize;
+        let to_density = self.index.slice_chunk_to_density(
+            revs,
+            target_density,
+            min_gap_size,
+        );
+
+        let mut sliced = vec![];
+
+        for chunk in to_density {
+            sliced.extend(
+                self.slice_chunk_to_size(&chunk, target_size)?
+                    .into_iter()
+                    .map(ToOwned::to_owned),
+            );
+        }
+
+        Ok(sliced)
+    }
+
+    /// Slice revs to match the target size
+    ///
+    /// This is intended to be used on chunks that density slicing selected,
+    /// but that are still too large compared to the read guarantee of revlogs.
+    /// This might happen when the "minimal gap size" interrupted the slicing
+    /// or when chains are built in a way that create large blocks next to
+    /// each other.
+    fn slice_chunk_to_size<'a>(
+        &self,
+        revs: &'a [Revision],
+        target_size: Option<u64>,
+    ) -> Result<Vec<&'a [Revision]>, RevlogError> {
+        let mut start_data = self.start(revs[0]);
+        let end_data = self.end(revs[revs.len() - 1]);
+        let full_span = end_data - start_data;
+
+        let nothing_to_do = target_size
+            .map(|size| full_span <= size as usize)
+            .unwrap_or(true);
+
+        if nothing_to_do {
+            return Ok(vec![revs]);
+        }
+        let target_size = target_size.expect("target_size is set") as usize;
+
+        let mut start_rev_idx = 0;
+        let mut end_rev_idx = 1;
+        let mut chunks = vec![];
+
+        for (idx, rev) in revs.iter().enumerate().skip(1) {
+            let span = self.end(*rev) - start_data;
+            let is_snapshot = self.is_snapshot(*rev)?;
+            if span <= target_size && is_snapshot {
+                end_rev_idx = idx + 1;
+            } else {
+                let chunk =
+                    self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
+                if !chunk.is_empty() {
+                    chunks.push(chunk);
+                }
+                start_rev_idx = idx;
+                start_data = self.start(*rev);
+                end_rev_idx = idx + 1;
+            }
+            if !is_snapshot {
+                break;
+            }
+        }
+
+        // For the others, we use binary slicing to quickly converge towards
+        // valid chunks (otherwise, we might end up looking for the start/end
+        // of many revisions). This logic is not looking for the perfect
+        // slicing point, it quickly converges towards valid chunks.
+        let number_of_items = revs.len();
+
+        while (end_data - start_data) > target_size {
+            end_rev_idx = number_of_items;
+            if number_of_items - start_rev_idx <= 1 {
+                // Protect against individual chunks larger than the limit
+                break;
+            }
+            let mut local_end_data = self.end(revs[end_rev_idx - 1]);
+            let mut span = local_end_data - start_data;
+            while span > target_size {
+                if end_rev_idx - start_rev_idx <= 1 {
+                    // Protect against individual chunks larger than the limit
+                    break;
+                }
+                end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
+                local_end_data = self.end(revs[end_rev_idx - 1]);
+                span = local_end_data - start_data;
+            }
+            let chunk =
+                self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
+            if !chunk.is_empty() {
+                chunks.push(chunk);
+            }
+            start_rev_idx = end_rev_idx;
+            start_data = self.start(revs[start_rev_idx]);
+        }
+
+        let chunk = self.trim_chunk(revs, start_rev_idx, None);
+        if !chunk.is_empty() {
+            chunks.push(chunk);
+        }
+
+        Ok(chunks)
+    }
+
+    /// Returns `revs[startidx..endidx]` without empty trailing revs
+    fn trim_chunk<'a>(
+        &self,
+        revs: &'a [Revision],
+        start_rev_idx: usize,
+        end_rev_idx: Option<usize>,
+    ) -> &'a [Revision] {
+        let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
+
+        // If we have a non-empty delta candidate, there is nothing to trim
+        if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
+            // Trim empty revs at the end, except the very first rev of a chain
+            while end_rev_idx > 1
+                && end_rev_idx > start_rev_idx
+                && self.length(revs[end_rev_idx - 1]) == 0
+            {
+                end_rev_idx -= 1
+            }
+        }
+
+        &revs[start_rev_idx..end_rev_idx]
+    }
+
+    /// Check the hash of some given data against the recorded hash.
+    pub fn check_hash(
+        &self,
+        p1: Revision,
+        p2: Revision,
+        expected: &[u8],
+        data: &[u8],
+    ) -> bool {
+        let e1 = self.index.get_entry(p1);
+        let h1 = match e1 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE,
+        };
+        let e2 = self.index.get_entry(p2);
+        let h2 = match e2 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE,
+        };
+
+        hash(data, h1.as_bytes(), h2.as_bytes()) == expected
+    }
+
+    /// Returns whether we are currently in a [`Self::with_write`] context
+    pub fn is_writing(&self) -> bool {
+        self.writing_handles.is_some()
+    }
+
+    /// Open the revlog files for writing
+    ///
+    /// Adding content to a revlog should be done within this context.
+    /// TODO try using `BufRead` and `BufWrite` and see if performance improves
+    pub fn with_write<R>(
+        &mut self,
+        transaction: &mut impl Transaction,
+        data_end: Option<usize>,
+        func: impl FnOnce() -> R,
+    ) -> Result<R, HgError> {
+        if self.is_writing() {
+            return Ok(func());
+        }
+        self.enter_writing_context(data_end, transaction)
+            .map_err(|e| {
+                self.exit_writing_context();
+                e
+            })?;
+        let res = func();
+        self.exit_writing_context();
+        Ok(res)
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn exit_writing_context(&mut self) {
+        self.writing_handles.take();
+        self.segment_file.writing_handle.take();
+        self.segment_file.reading_handle.take();
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
+        self.writing_handles.as_ref()
+    }
+
+    /// `pub` only for use in hg-cpython
+    #[doc(hidden)]
+    pub fn enter_writing_context(
+        &mut self,
+        data_end: Option<usize>,
+        transaction: &mut impl Transaction,
+    ) -> Result<(), HgError> {
+        let data_size = if self.is_empty() {
+            0
+        } else {
+            self.end(Revision((self.len() - 1) as BaseRevision))
+        };
+        let data_handle = if !self.is_inline() {
+            let data_handle = match self.vfs.open(&self.data_file) {
+                Ok(mut f) => {
+                    if let Some(end) = data_end {
+                        f.seek(SeekFrom::Start(end as u64))
+                            .when_reading_file(&self.data_file)?;
+                    } else {
+                        f.seek(SeekFrom::End(0))
+                            .when_reading_file(&self.data_file)?;
+                    }
+                    f
+                }
+                Err(e) => match e {
+                    HgError::IoError { error, context } => {
+                        if error.kind() != ErrorKind::NotFound {
+                            return Err(HgError::IoError { error, context });
+                        }
+                        self.vfs.create(&self.data_file)?
+                    }
+                    e => return Err(e),
+                },
+            };
+            transaction.add(&self.data_file, data_size);
+            Some(FileHandle::from_file(
+                data_handle,
+                dyn_clone::clone_box(&*self.vfs),
+                &self.data_file,
+            ))
+        } else {
+            None
+        };
+        let index_size = self.len() * INDEX_ENTRY_SIZE;
+        let index_handle = self.index_write_handle()?;
+        if self.is_inline() {
+            transaction.add(&self.index_file, data_size);
+        } else {
+            transaction.add(&self.index_file, index_size);
+        }
+        self.writing_handles = Some(WriteHandles {
+            index_handle: index_handle.clone(),
+            data_handle: data_handle.clone(),
+        });
+        *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
+            Some(index_handle)
+        } else {
+            data_handle
+        };
+        Ok(())
+    }
+
+    /// Get a write handle to the index, sought to the end of its data.
+    fn index_write_handle(&self) -> Result<FileHandle, HgError> {
+        let res = if self.delayed_buffer.is_none() {
+            if self.data_config.check_ambig {
+                self.vfs.open_check_ambig(&self.index_file)
+            } else {
+                self.vfs.open(&self.index_file)
+            }
+        } else {
+            self.vfs.open(&self.index_file)
+        };
+        match res {
+            Ok(mut handle) => {
+                handle
+                    .seek(SeekFrom::End(0))
+                    .when_reading_file(&self.index_file)?;
+                Ok(
+                    if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
+                    {
+                        FileHandle::from_file_delayed(
+                            handle,
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            delayed_buffer.clone(),
+                        )?
+                    } else {
+                        FileHandle::from_file(
+                            handle,
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                        )
+                    },
+                )
+            }
+            Err(e) => match e {
+                HgError::IoError { error, context } => {
+                    if error.kind() != ErrorKind::NotFound {
+                        return Err(HgError::IoError { error, context });
+                    };
+                    if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
+                    {
+                        FileHandle::new_delayed(
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            true,
+                            delayed_buffer.clone(),
+                        )
+                    } else {
+                        FileHandle::new(
+                            dyn_clone::clone_box(&*self.vfs),
+                            &self.index_file,
+                            true,
+                            true,
+                        )
+                    }
+                }
+                e => Err(e),
+            },
+        }
+    }
+
+    /// Split the data of an inline revlog into an index and a data file
+    pub fn split_inline(
+        &mut self,
+        header: IndexHeader,
+        new_index_file_path: Option<PathBuf>,
+    ) -> Result<PathBuf, RevlogError> {
+        assert!(self.delayed_buffer.is_none());
+        let existing_handles = self.writing_handles.is_some();
+        if let Some(handles) = &mut self.writing_handles {
+            handles.index_handle.flush()?;
+            self.writing_handles.take();
+            self.segment_file.writing_handle.take();
+        }
+        let mut new_data_file_handle = self.vfs.create(&self.data_file)?;
+        // Drop any potential data, possibly redundant with the VFS impl.
+        new_data_file_handle
+            .set_len(0)
+            .when_writing_file(&self.data_file)?;
+
+        self.with_read(|| -> Result<(), RevlogError> {
+            for r in 0..self.index.len() {
+                let rev = Revision(r as BaseRevision);
+                let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
+                new_data_file_handle
+                    .write_all(&rev_segment)
+                    .when_writing_file(&self.data_file)?;
+            }
+            new_data_file_handle
+                .flush()
+                .when_writing_file(&self.data_file)?;
+            Ok(())
+        })?;
+
+        if let Some(index_path) = new_index_file_path {
+            self.index_file = index_path
+        }
+
+        let mut new_index_handle = self.vfs.create(&self.index_file)?;
+        let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
+        for r in 0..self.len() {
+            let rev = Revision(r as BaseRevision);
+            let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
+                panic!(
+                    "entry {} should exist in {}",
+                    r,
+                    self.index_file.display()
+                )
+            });
+            if r == 0 {
+                new_data.extend(header.header_bytes);
+            }
+            new_data.extend(entry);
+        }
+        new_index_handle
+            .write_all(&new_data)
+            .when_writing_file(&self.index_file)?;
+        // Replace the index with a new one because the buffer contains inline
+        // data
+        self.index = Index::new(Box::new(new_data), header)?;
+        self.inline = false;
+
+        self.segment_file = RandomAccessFile::new(
+            dyn_clone::clone_box(&*self.vfs),
+            self.data_file.to_owned(),
+        );
+        if existing_handles {
+            // Switched from inline to conventional, reopen the index
+            let new_data_handle = Some(FileHandle::from_file(
+                new_data_file_handle,
+                dyn_clone::clone_box(&*self.vfs),
+                &self.data_file,
+            ));
+            self.writing_handles = Some(WriteHandles {
+                index_handle: self.index_write_handle()?,
+                data_handle: new_data_handle.clone(),
+            });
+            *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
+        }
+
+        Ok(self.index_file.to_owned())
+    }
+
+    /// Write a new entry to this revlog.
+    /// - `entry` is the index bytes
+    /// - `header_and_data` is the compression header and the revision data
+    /// - `offset` is the position in the data file to write to
+    /// - `index_end` is the overwritten position in the index in revlog-v2,
+    ///   since the format may allow a rewrite of garbage data at the end.
+    /// - `data_end` is the overwritten position in the data-file in revlog-v2,
+    ///   since the format may allow a rewrite of garbage data at the end.
+    ///
+    /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
+    pub fn write_entry(
+        &mut self,
+        mut transaction: impl Transaction,
+        entry: &[u8],
+        header_and_data: (&[u8], &[u8]),
+        mut offset: usize,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+    ) -> Result<(u64, Option<u64>), HgError> {
+        let current_revision = self.len() - 1;
+        let canonical_index_file = self.canonical_index_file();
+
+        let is_inline = self.is_inline();
+        let handles = match &mut self.writing_handles {
+            None => {
+                return Err(HgError::abort(
+                    "adding revision outside of the `with_write` context",
+                    exit_codes::ABORT,
+                    None,
+                ));
+            }
+            Some(handles) => handles,
+        };
+        let index_handle = &mut handles.index_handle;
+        let data_handle = &mut handles.data_handle;
+        if let Some(end) = index_end {
+            index_handle
+                .seek(SeekFrom::Start(end))
+                .when_reading_file(&self.index_file)?;
+        } else {
+            index_handle
+                .seek(SeekFrom::End(0))
+                .when_reading_file(&self.index_file)?;
+        }
+        if let Some(data_handle) = data_handle {
+            if let Some(end) = data_end {
+                data_handle
+                    .seek(SeekFrom::Start(end))
+                    .when_reading_file(&self.data_file)?;
+            } else {
+                data_handle
+                    .seek(SeekFrom::End(0))
+                    .when_reading_file(&self.data_file)?;
+            }
+        }
+        let (header, data) = header_and_data;
+
+        if !is_inline {
+            transaction.add(&self.data_file, offset);
+            transaction
+                .add(&canonical_index_file, current_revision * entry.len());
+            let data_handle = data_handle
+                .as_mut()
+                .expect("data handle should exist when not inline");
+            if !header.is_empty() {
+                data_handle.write_all(header)?;
+            }
+            data_handle.write_all(data)?;
+            match &mut self.delayed_buffer {
+                Some(buf) => {
+                    buf.lock()
+                        .expect("propagate the panic")
+                        .buffer
+                        .write_all(entry)
+                        .expect("write to delay buffer should succeed");
+                }
+                None => index_handle.write_all(entry)?,
+            }
+        } else if self.delayed_buffer.is_some() {
+            return Err(HgError::abort(
+                "invalid delayed write on inline revlog",
+                exit_codes::ABORT,
+                None,
+            ));
+        } else {
+            offset += current_revision * entry.len();
+            transaction.add(&canonical_index_file, offset);
+            index_handle.write_all(entry)?;
+            index_handle.write_all(header)?;
+            index_handle.write_all(data)?;
+        }
+        let data_position = match data_handle {
+            Some(h) => Some(h.position()?),
+            None => None,
+        };
+        Ok((index_handle.position()?, data_position))
+    }
+
+    /// Return the real target index file and not the temporary when diverting
+    pub fn canonical_index_file(&self) -> PathBuf {
+        self.original_index_file
+            .as_ref()
+            .map(ToOwned::to_owned)
+            .unwrap_or_else(|| self.index_file.to_owned())
+    }
+
+    /// Return the path to the diverted index
+    fn diverted_index(&self) -> PathBuf {
+        self.index_file.with_extension("i.a")
+    }
+
+    /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
+    pub fn is_open(&self) -> bool {
+        self.segment_file.is_open()
+    }
+
+    /// Set this revlog to delay its writes to a buffer
+    pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        if self.delayed_buffer.is_some() || self.original_index_file.is_some()
+        {
+            // Delay or divert already happening
+            return Ok(None);
+        }
+        if self.is_empty() {
+            self.original_index_file = Some(self.index_file.to_owned());
+            self.index_file = self.diverted_index();
+            if self.vfs.exists(&self.index_file) {
+                self.vfs.unlink(&self.index_file)?;
+            }
+            Ok(Some(self.index_file.to_owned()))
+        } else {
+            self.delayed_buffer =
+                Some(Arc::new(Mutex::new(DelayedBuffer::default())));
+            Ok(None)
+        }
+    }
+
+    /// Write the pending data (in memory) if any to the diverted index file
+    /// (on disk temporary file)
+    pub fn write_pending(
+        &mut self,
+    ) -> Result<(Option<PathBuf>, bool), HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        if self.original_index_file.is_some() {
+            return Ok((None, true));
+        }
+        let mut any_pending = false;
+        let pending_index_file = self.diverted_index();
+        if self.vfs.exists(&pending_index_file) {
+            self.vfs.unlink(&pending_index_file)?;
+        }
+        self.vfs.copy(&self.index_file, &pending_index_file)?;
+        if let Some(delayed_buffer) = self.delayed_buffer.take() {
+            let mut index_file_handle = self.vfs.open(&pending_index_file)?;
+            index_file_handle
+                .seek(SeekFrom::End(0))
+                .when_writing_file(&pending_index_file)?;
+            let delayed_data =
+                &delayed_buffer.lock().expect("propagate the panic").buffer;
+            index_file_handle
+                .write_all(delayed_data)
+                .when_writing_file(&pending_index_file)?;
+            any_pending = true;
+        }
+        self.original_index_file = Some(self.index_file.to_owned());
+        self.index_file = pending_index_file;
+        Ok((Some(self.index_file.to_owned()), any_pending))
+    }
+
+    /// Overwrite the canonical file with the diverted file, or write out the
+    /// delayed buffer.
+    /// Returns an error if the revlog is neither diverted nor delayed.
+    pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
+        assert!(!self.is_open());
+        if self.is_inline() {
+            return Err(HgError::abort(
+                "revlog with delayed write should not be inline",
+                exit_codes::ABORT,
+                None,
+            ));
+        }
+        match (
+            self.delayed_buffer.as_ref(),
+            self.original_index_file.as_ref(),
+        ) {
+            (None, None) => {
+                return Err(HgError::abort(
+                    "neither delay nor divert found on this revlog",
+                    exit_codes::ABORT,
+                    None,
+                ));
+            }
+            (Some(delay), None) => {
+                let mut index_file_handle = self.vfs.open(&self.index_file)?;
+                index_file_handle
+                    .seek(SeekFrom::End(0))
+                    .when_writing_file(&self.index_file)?;
+                index_file_handle
+                    .write_all(
+                        &delay.lock().expect("propagate the panic").buffer,
+                    )
+                    .when_writing_file(&self.index_file)?;
+                self.delayed_buffer = None;
+            }
+            (None, Some(divert)) => {
+                if self.vfs.exists(&self.index_file) {
+                    self.vfs.rename(&self.index_file, divert, true)?;
+                }
+                divert.clone_into(&mut self.index_file);
+                self.original_index_file = None;
+            }
+            (Some(_), Some(_)) => unreachable!(
+                "{} is in an inconsistent state of both delay and divert",
+                self.canonical_index_file().display(),
+            ),
+        }
+        Ok(self.canonical_index_file())
+    }
+
+    /// `pub` only for `hg-cpython`. This is made a different method than
+    /// [`Revlog::index`] in case there is a different invariant that pops up
+    /// later.
+    #[doc(hidden)]
+    pub fn shared_index(&self) -> &Index {
+        &self.index
+    }
+}
+
+/// The use of a [`Refcell`] assumes that a given revlog will only
+/// be accessed (read or write) by a single thread.
+type UncompressedChunkCache =
+    RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
+
+/// The node, revision and data for the last revision we've seen. Speeds up
+/// a lot of sequential operations of the revlog.
+///
+/// The data is not just bytes since it can come from Python and we want to
+/// avoid copies if possible.
+type SingleRevisionCache =
+    (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
+
+/// A way of progressively filling a buffer with revision data, then return
+/// that buffer. Used to abstract away Python-allocated code to reduce copying
+/// for performance reasons.
+pub trait RevisionBuffer {
+    /// The owned buffer type to return
+    type Target;
+    /// Copies the slice into the buffer
+    fn extend_from_slice(&mut self, slice: &[u8]);
+    /// Returns the now finished owned buffer
+    fn finish(self) -> Self::Target;
+}
+
+/// A simple vec-based buffer. This is uselessly complicated for the pure Rust
+/// case, but it's the price to pay for Python compatibility.
+#[derive(Debug)]
+pub(super) struct CoreRevisionBuffer {
+    buf: Vec<u8>,
+}
+
+impl CoreRevisionBuffer {
+    pub fn new() -> Self {
+        Self { buf: vec![] }
+    }
+
+    #[inline]
+    pub fn resize(&mut self, size: usize) {
+        self.buf.reserve_exact(size - self.buf.capacity());
+    }
+}
+
+impl RevisionBuffer for CoreRevisionBuffer {
+    type Target = Vec<u8>;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        self.buf.extend_from_slice(slice);
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        self.buf
+    }
+}
+
+/// Calculate the hash of a revision given its data and its parents.
+pub fn hash(
+    data: &[u8],
+    p1_hash: &[u8],
+    p2_hash: &[u8],
+) -> [u8; NODE_BYTES_LENGTH] {
+    let mut hasher = Sha1::new();
+    let (a, b) = (p1_hash, p2_hash);
+    if a > b {
+        hasher.update(b);
+        hasher.update(a);
+    } else {
+        hasher.update(a);
+        hasher.update(b);
+    }
+    hasher.update(data);
+    *hasher.finalize().as_ref()
+}

--- a/rust/hg-core/src/revlog/mod.rs	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -9,7 +9,10 @@
 pub mod nodemap;
 mod nodemap_docket;
 pub mod path_encode;
-use compression::uncompressed_zstd_data;
+use inner_revlog::CoreRevisionBuffer;
+use inner_revlog::InnerRevlog;
+use inner_revlog::RevisionBuffer;
+use memmap2::MmapOptions;
 pub use node::{FromHexError, Node, NodePrefix};
 use options::RevlogOpenOptions;
 pub mod changelog;
@@ -17,25 +20,25 @@
 pub mod file_io;
 pub mod filelog;
 pub mod index;
+pub mod inner_revlog;
 pub mod manifest;
 pub mod options;
 pub mod patch;
 
 use std::borrow::Cow;
+use std::io::ErrorKind;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
 
-use flate2::read::ZlibDecoder;
-use sha1::{Digest, Sha1};
-
-use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
+use self::node::NULL_NODE;
 use self::nodemap_docket::NodeMapDocket;
 use super::index::Index;
-use super::index::INDEX_ENTRY_SIZE;
 use super::nodemap::{NodeMap, NodeMapError};
 use crate::errors::HgError;
+use crate::errors::IoResultExt;
 use crate::exit_codes;
+use crate::vfs::Vfs;
 use crate::vfs::VfsImpl;
 
 /// As noted in revlog.c, revision numbers are actually encoded in
@@ -256,24 +259,17 @@
     }
 }
 
-/// Read only implementation of revlog.
 pub struct Revlog {
-    /// When index and data are not interleaved: bytes of the revlog index.
-    /// When index and data are interleaved: bytes of the revlog index and
-    /// data.
-    index: Index,
-    /// When index and data are not interleaved: bytes of the revlog data
-    data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
+    inner: InnerRevlog,
     /// When present on disk: the persistent nodemap for this revlog
     nodemap: Option<nodemap::NodeTree>,
 }
 
 impl Graph for Revlog {
     fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
-        self.index.parents(rev)
+        self.index().parents(rev)
     }
 }
-
 impl Revlog {
     /// Open a revlog index file.
     ///
@@ -289,6 +285,10 @@
         Self::open_gen(store_vfs, index_path, data_path, options, None)
     }
 
+    fn index(&self) -> &Index {
+        &self.inner.index
+    }
+
     fn open_gen(
         // Todo use the `Vfs` trait here once we create a function for mmap
         store_vfs: &VfsImpl,
@@ -298,37 +298,10 @@
         nodemap_for_test: Option<nodemap::NodeTree>,
     ) -> Result<Self, HgError> {
         let index_path = index_path.as_ref();
-        let index = {
-            match store_vfs.mmap_open_opt(index_path)? {
-                None => Index::new(
-                    Box::<Vec<_>>::default(),
-                    options.index_header(),
-                ),
-                Some(index_mmap) => {
-                    let index = Index::new(
-                        Box::new(index_mmap),
-                        options.index_header(),
-                    )?;
-                    Ok(index)
-                }
-            }
-        }?;
+        let index = open_index(store_vfs, index_path, options)?;
 
         let default_data_path = index_path.with_extension("d");
-
-        // type annotation required
-        // won't recognize Mmap as Deref<Target = [u8]>
-        let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
-            if index.is_inline() {
-                None
-            } else if index.is_empty() {
-                // No need to even try to open the data file then.
-                Some(Box::new(&[][..]))
-            } else {
-                let data_path = data_path.unwrap_or(&default_data_path);
-                let data_mmap = store_vfs.mmap_open(data_path)?;
-                Some(Box::new(data_mmap))
-            };
+        let data_path = data_path.unwrap_or(&default_data_path);
 
         let nodemap = if index.is_inline() || !options.use_nodemap {
             None
@@ -346,20 +319,27 @@
         let nodemap = nodemap_for_test.or(nodemap);
 
         Ok(Revlog {
-            index,
-            data_bytes,
+            inner: InnerRevlog::new(
+                Box::new(store_vfs.clone()),
+                index,
+                index_path.to_path_buf(),
+                data_path.to_path_buf(),
+                options.data_config,
+                options.delta_config,
+                options.feature_config,
+            ),
             nodemap,
         })
     }
 
     /// Return number of entries of the `Revlog`.
     pub fn len(&self) -> usize {
-        self.index.len()
+        self.index().len()
     }
 
     /// Returns `true` if the `Revlog` has zero `entries`.
     pub fn is_empty(&self) -> bool {
-        self.index.is_empty()
+        self.index().is_empty()
     }
 
     /// Returns the node ID for the given revision number, if it exists in this
@@ -368,8 +348,8 @@
         if rev == NULL_REVISION.into() {
             return Some(&NULL_NODE);
         }
-        let rev = self.index.check_revision(rev)?;
-        Some(self.index.get_entry(rev)?.hash())
+        let rev = self.index().check_revision(rev)?;
+        Some(self.index().get_entry(rev)?.hash())
     }
 
     /// Return the revision number for the given node ID, if it exists in this
@@ -380,7 +360,7 @@
     ) -> Result<Revision, RevlogError> {
         if let Some(nodemap) = &self.nodemap {
             nodemap
-                .find_bin(&self.index, node)?
+                .find_bin(self.index(), node)?
                 .ok_or(RevlogError::InvalidRevision(format!("{:x}", node)))
         } else {
             self.rev_from_node_no_persistent_nodemap(node)
@@ -406,7 +386,7 @@
                 NULL_NODE
             } else {
                 let index_entry =
-                    self.index.get_entry(rev).ok_or_else(|| {
+                    self.index().get_entry(rev).ok_or_else(|| {
                         HgError::corrupted(
                             "revlog references a revision not in the index",
                         )
@@ -429,7 +409,21 @@
 
     /// Returns whether the given revision exists in this revlog.
     pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
-        self.index.check_revision(rev).is_some()
+        self.index().check_revision(rev).is_some()
+    }
+
+    pub fn get_entry_for_checked_rev(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        self.inner.get_entry_for_checked_rev(rev)
+    }
+
+    pub fn get_entry(
+        &self,
+        rev: UncheckedRevision,
+    ) -> Result<RevlogEntry, RevlogError> {
+        self.inner.get_entry(rev)
     }
 
     /// Return the full data associated to a revision.
@@ -466,153 +460,93 @@
         expected: &[u8],
         data: &[u8],
     ) -> bool {
-        let e1 = self.index.get_entry(p1);
-        let h1 = match e1 {
-            Some(ref entry) => entry.hash(),
-            None => &NULL_NODE,
-        };
-        let e2 = self.index.get_entry(p2);
-        let h2 = match e2 {
-            Some(ref entry) => entry.hash(),
-            None => &NULL_NODE,
-        };
-
-        hash(data, h1.as_bytes(), h2.as_bytes()) == expected
+        self.inner.check_hash(p1, p2, expected, data)
     }
 
     /// Build the full data of a revision out its snapshot
     /// and its deltas.
-    fn build_data_from_deltas(
-        snapshot: RevlogEntry,
-        deltas: &[RevlogEntry],
-    ) -> Result<Vec<u8>, HgError> {
-        let snapshot = snapshot.data_chunk()?;
-        let deltas = deltas
-            .iter()
-            .rev()
-            .map(RevlogEntry::data_chunk)
-            .collect::<Result<Vec<_>, _>>()?;
-        let patches: Vec<_> =
-            deltas.iter().map(|d| patch::PatchList::new(d)).collect();
-        let patch = patch::fold_patch_lists(&patches);
-        Ok(patch.apply(&snapshot))
-    }
-
-    /// Return the revlog data.
-    fn data(&self) -> &[u8] {
-        match &self.data_bytes {
-            Some(data_bytes) => data_bytes,
-            None => panic!(
-                "forgot to load the data or trying to access inline data"
-            ),
-        }
-    }
-
-    pub fn make_null_entry(&self) -> RevlogEntry {
-        RevlogEntry {
-            revlog: self,
-            rev: NULL_REVISION,
-            bytes: b"",
-            compressed_len: 0,
-            uncompressed_len: 0,
-            base_rev_or_base_of_delta_chain: None,
-            p1: NULL_REVISION,
-            p2: NULL_REVISION,
-            flags: NULL_REVLOG_ENTRY_FLAGS,
-            hash: NULL_NODE,
-        }
-    }
-
-    fn get_entry_for_checked_rev(
-        &self,
-        rev: Revision,
-    ) -> Result<RevlogEntry, RevlogError> {
-        if rev == NULL_REVISION {
-            return Ok(self.make_null_entry());
+    fn build_data_from_deltas<T>(
+        buffer: &mut dyn RevisionBuffer<Target = T>,
+        snapshot: &[u8],
+        deltas: &[impl AsRef<[u8]>],
+    ) -> Result<(), RevlogError> {
+        if deltas.is_empty() {
+            buffer.extend_from_slice(snapshot);
+            return Ok(());
         }
-        let index_entry = self
-            .index
-            .get_entry(rev)
-            .ok_or(RevlogError::InvalidRevision(rev.to_string()))?;
-        let offset = index_entry.offset();
-        let start = if self.index.is_inline() {
-            offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE)
-        } else {
-            offset
-        };
-        let end = start + index_entry.compressed_len() as usize;
-        let data = if self.index.is_inline() {
-            self.index.data(start, end)
-        } else {
-            &self.data()[start..end]
-        };
-        let base_rev = self
-            .index
-            .check_revision(index_entry.base_revision_or_base_of_delta_chain())
-            .ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "base revision for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let p1 =
-            self.index.check_revision(index_entry.p1()).ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "p1 for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let p2 =
-            self.index.check_revision(index_entry.p2()).ok_or_else(|| {
-                RevlogError::corrupted(format!(
-                    "p2 for rev {} is invalid",
-                    rev
-                ))
-            })?;
-        let entry = RevlogEntry {
-            revlog: self,
-            rev,
-            bytes: data,
-            compressed_len: index_entry.compressed_len(),
-            uncompressed_len: index_entry.uncompressed_len(),
-            base_rev_or_base_of_delta_chain: if base_rev == rev {
+        let patches: Result<Vec<_>, _> = deltas
+            .iter()
+            .map(|d| patch::PatchList::new(d.as_ref()))
+            .collect();
+        let patch = patch::fold_patch_lists(&patches?);
+        patch.apply(buffer, snapshot);
+        Ok(())
+    }
+}
+
+type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>;
+
+/// Open the revlog [`Index`] at `index_path`, through the `store_vfs` and the
+/// given `options`. This controls whether (and how) we `mmap` the index file,
+/// and returns an empty buffer if the index does not exist on disk.
+/// This is only used when doing pure-Rust work, in Python contexts this is
+/// unused at the time of writing.
+pub fn open_index(
+    store_vfs: &impl Vfs,
+    index_path: &Path,
+    options: RevlogOpenOptions,
+) -> Result<Index, HgError> {
+    let buf: IndexData = match store_vfs.open_read(index_path) {
+        Ok(mut file) => {
+            let mut buf = if let Some(threshold) =
+                options.data_config.mmap_index_threshold
+            {
+                let size = store_vfs.file_size(&file)?;
+                if size >= threshold {
+                    // Safety is "enforced" by locks and assuming other
+                    // processes are well-behaved. If any misbehaving or
+                    // malicious process does touch the index, it could lead
+                    // to corruption. This is somewhat inherent to file-based
+                    // `mmap`, though some platforms have some ways of
+                    // mitigating.
+                    // TODO linux: set the immutable flag with `chattr(1)`?
+                    let mmap = unsafe { MmapOptions::new().map(&file) }
+                        .when_reading_file(index_path)?;
+                    Some(Box::new(mmap) as IndexData)
+                } else {
+                    None
+                }
+            } else {
                 None
-            } else {
-                Some(base_rev)
-            },
-            p1,
-            p2,
-            flags: index_entry.flags(),
-            hash: *index_entry.hash(),
-        };
-        Ok(entry)
-    }
+            };
 
-    /// Get an entry of the revlog.
-    pub fn get_entry(
-        &self,
-        rev: UncheckedRevision,
-    ) -> Result<RevlogEntry, RevlogError> {
-        if rev == NULL_REVISION.into() {
-            return Ok(self.make_null_entry());
+            if buf.is_none() {
+                let mut data = vec![];
+                file.read_to_end(&mut data).when_reading_file(index_path)?;
+                buf = Some(Box::new(data) as IndexData);
+            }
+            buf.unwrap()
         }
-        let rev = self.index.check_revision(rev).ok_or_else(|| {
-            RevlogError::corrupted(format!("rev {} is invalid", rev))
-        })?;
-        self.get_entry_for_checked_rev(rev)
-    }
+        Err(err) => match err {
+            HgError::IoError { error, context } => match error.kind() {
+                ErrorKind::NotFound => Box::<Vec<u8>>::default(),
+                _ => return Err(HgError::IoError { error, context }),
+            },
+            e => return Err(e),
+        },
+    };
+
+    let index = Index::new(buf, options.index_header())?;
+    Ok(index)
 }
 
 /// The revlog entry's bytes and the necessary informations to extract
 /// the entry's data.
 #[derive(Clone)]
 pub struct RevlogEntry<'revlog> {
-    revlog: &'revlog Revlog,
+    revlog: &'revlog InnerRevlog,
     rev: Revision,
-    bytes: &'revlog [u8],
-    compressed_len: u32,
     uncompressed_len: i32,
-    base_rev_or_base_of_delta_chain: Option<Revision>,
     p1: Revision,
     p2: Revision,
     flags: u16,
@@ -683,33 +617,47 @@
     }
 
     /// The data for this entry, after resolving deltas if any.
-    pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
-        let mut entry = self.clone();
-        let mut delta_chain = vec![];
+    /// Non-Python callers should probably call [`Self::data`] instead.
+    fn rawdata<G, T>(
+        &self,
+        stop_rev: Option<(Revision, &[u8])>,
+        with_buffer: G,
+    ) -> Result<(), RevlogError>
+    where
+        G: FnOnce(
+            usize,
+            &mut dyn FnMut(
+                &mut dyn RevisionBuffer<Target = T>,
+            ) -> Result<(), RevlogError>,
+        ) -> Result<(), RevlogError>,
+    {
+        let (delta_chain, stopped) = self
+            .revlog
+            .delta_chain(self.revision(), stop_rev.map(|(r, _)| r))?;
+        let target_size =
+            self.uncompressed_len().map(|raw_size| 4 * raw_size as u64);
 
-        // The meaning of `base_rev_or_base_of_delta_chain` depends on
-        // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
-        // `mercurial/revlogutils/constants.py` and the code in
-        // [_chaininfo] and in [index_deltachain].
-        let uses_generaldelta = self.revlog.index.uses_generaldelta();
-        while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
-            entry = if uses_generaldelta {
-                delta_chain.push(entry);
-                self.revlog.get_entry_for_checked_rev(base_rev)?
-            } else {
-                let base_rev = UncheckedRevision(entry.rev.0 - 1);
-                delta_chain.push(entry);
-                self.revlog.get_entry(base_rev)?
-            };
-        }
+        let deltas = self.revlog.chunks(delta_chain, target_size)?;
 
-        let data = if delta_chain.is_empty() {
-            entry.data_chunk()?
+        let (base_text, deltas) = if stopped {
+            (
+                stop_rev.as_ref().expect("last revision should be cached").1,
+                &deltas[..],
+            )
         } else {
-            Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
+            let (buf, deltas) = deltas.split_at(1);
+            (buf[0].as_ref(), deltas)
         };
 
-        Ok(data)
+        let size = self
+            .uncompressed_len()
+            .map(|l| l as usize)
+            .unwrap_or(base_text.len());
+        with_buffer(size, &mut |buf| {
+            Revlog::build_data_from_deltas(buf, base_text, deltas)?;
+            Ok(())
+        })?;
+        Ok(())
     }
 
     fn check_data(
@@ -739,86 +687,23 @@
     }
 
     pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
-        let data = self.rawdata()?;
+        // TODO figure out if there is ever a need for `Cow` here anymore.
+        let mut data = CoreRevisionBuffer::new();
         if self.rev == NULL_REVISION {
-            return Ok(data);
+            return Ok(data.finish().into());
         }
+        self.rawdata(None, |size, f| {
+            // Pre-allocate the expected size (received from the index)
+            data.resize(size);
+            // Actually fill the buffer
+            f(&mut data)?;
+            Ok(())
+        })?;
         if self.is_censored() {
             return Err(HgError::CensoredNodeError.into());
         }
-        self.check_data(data)
-    }
-
-    /// Extract the data contained in the entry.
-    /// This may be a delta. (See `is_delta`.)
-    fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
-        if self.bytes.is_empty() {
-            return Ok(Cow::Borrowed(&[]));
-        }
-        match self.bytes[0] {
-            // Revision data is the entirety of the entry, including this
-            // header.
-            b'\0' => Ok(Cow::Borrowed(self.bytes)),
-            // Raw revision data follows.
-            b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
-            // zlib (RFC 1950) data.
-            b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
-            // zstd data.
-            b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data(
-                self.bytes,
-                self.is_delta(),
-                self.uncompressed_len.max(0),
-            )?)),
-            // A proper new format should have had a repo/store requirement.
-            format_type => Err(corrupted(format!(
-                "unknown compression header '{}'",
-                format_type
-            ))),
-        }
+        self.check_data(data.finish().into())
     }
-
-    fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
-        let mut decoder = ZlibDecoder::new(self.bytes);
-        if self.is_delta() {
-            let mut buf = Vec::with_capacity(self.compressed_len as usize);
-            decoder
-                .read_to_end(&mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            Ok(buf)
-        } else {
-            let cap = self.uncompressed_len.max(0) as usize;
-            let mut buf = vec![0; cap];
-            decoder
-                .read_exact(&mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            Ok(buf)
-        }
-    }
-
-    /// Tell if the entry is a snapshot or a delta
-    /// (influences on decompression).
-    fn is_delta(&self) -> bool {
-        self.base_rev_or_base_of_delta_chain.is_some()
-    }
-}
-
-/// Calculate the hash of a revision given its data and its parents.
-fn hash(
-    data: &[u8],
-    p1_hash: &[u8],
-    p2_hash: &[u8],
-) -> [u8; NODE_BYTES_LENGTH] {
-    let mut hasher = Sha1::new();
-    let (a, b) = (p1_hash, p2_hash);
-    if a > b {
-        hasher.update(b);
-        hasher.update(a);
-    } else {
-        hasher.update(a);
-        hasher.update(b);
-    }
-    hasher.update(data);
-    *hasher.finalize().as_ref()
 }
 
 #[cfg(test)]

--- a/rust/hg-core/src/revlog/patch.rs	Thu Oct 10 10:38:35 2024 +0200
+++ b/rust/hg-core/src/revlog/patch.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -1,5 +1,9 @@
 use byteorder::{BigEndian, ByteOrder};
 
+use crate::RevlogError;
+
+use super::inner_revlog::RevisionBuffer;
+
 /// A chunk of data to insert, delete or replace in a patch
 ///
 /// A chunk is:
@@ -61,14 +65,16 @@
 
 impl<'a> PatchList<'a> {
     /// Create a `PatchList` from bytes.
-    pub fn new(data: &'a [u8]) -> Self {
+    pub fn new(data: &'a [u8]) -> Result<Self, RevlogError> {
         let mut chunks = vec![];
         let mut data = data;
         while !data.is_empty() {
             let start = BigEndian::read_u32(&data[0..]);
             let end = BigEndian::read_u32(&data[4..]);
             let len = BigEndian::read_u32(&data[8..]);
-            assert!(start <= end);
+            if start > end {
+                return Err(RevlogError::corrupted("patch cannot be decoded"));
+            }
             chunks.push(Chunk {
                 start,
                 end,
@@ -76,29 +82,23 @@
             });
             data = &data[12 + (len as usize)..];
         }
-        PatchList { chunks }
-    }
-
-    /// Return the final length of data after patching
-    /// given its initial length .
-    fn size(&self, initial_size: i32) -> i32 {
-        self.chunks
-            .iter()
-            .fold(initial_size, |acc, chunk| acc + chunk.len_diff())
+        Ok(PatchList { chunks })
     }
 
     /// Apply the patch to some data.
-    pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
+    pub fn apply<T>(
+        &self,
+        buffer: &mut dyn RevisionBuffer<Target = T>,
+        initial: &[u8],
+    ) {
         let mut last: usize = 0;
-        let mut vec =
-            Vec::with_capacity(self.size(initial.len() as i32) as usize);
         for Chunk { start, end, data } in self.chunks.iter() {
-            vec.extend(&initial[last..(*start as usize)]);
-            vec.extend(data.iter());
+            let slice = &initial[last..(*start as usize)];
+            buffer.extend_from_slice(slice);
+            buffer.extend_from_slice(data);
             last = *end as usize;
         }
-        vec.extend(&initial[last..]);
-        vec
+        buffer.extend_from_slice(&initial[last..]);
     }
 
     /// Combine two patch lists into a single patch list.
@@ -229,6 +229,8 @@
 
 #[cfg(test)]
 mod tests {
+    use crate::inner_revlog::CoreRevisionBuffer;
+
     use super::*;
 
     struct PatchDataBuilder {
@@ -264,17 +266,18 @@
         let data = vec![0u8, 0u8, 0u8];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 1, &[1, 2]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(2, 4, &[3, 4]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -282,17 +285,18 @@
         let data = vec![0u8, 0u8, 0u8];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(2, 3, &[3]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 2, &[1, 2]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![0u8, 1, 2, 3]);
+        assert_eq!(buffer.finish(), vec![0u8, 1, 2, 3]);
     }
 
     #[test]
@@ -300,17 +304,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(1, 2, &[3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 4, &[1, 2, 3]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![0u8, 1, 2, 3]);
+        assert_eq!(buffer.finish(), vec![0u8, 1, 2, 3]);
     }
 
     #[test]
@@ -318,17 +323,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 1, &[1, 3]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 4, &[2, 3, 4]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -336,17 +342,18 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(1, 3, &[1, 3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(0, 2, &[1, 2]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 
     #[test]
@@ -354,16 +361,17 @@
         let data = vec![0u8, 0, 0];
         let mut patch1_data = PatchDataBuilder::new();
         patch1_data.replace(0, 3, &[1, 3, 3, 4]);
-        let mut patch1 = PatchList::new(patch1_data.get());
+        let mut patch1 = PatchList::new(patch1_data.get()).unwrap();
 
         let mut patch2_data = PatchDataBuilder::new();
         patch2_data.replace(1, 3, &[2, 3]);
-        let mut patch2 = PatchList::new(patch2_data.get());
+        let mut patch2 = PatchList::new(patch2_data.get()).unwrap();
 
         let patch = patch1.combine(&mut patch2);
 
-        let result = patch.apply(&data);
+        let mut buffer = CoreRevisionBuffer::new();
+        patch.apply(&mut buffer, &data);
 
-        assert_eq!(result, vec![1u8, 2, 3, 4]);
+        assert_eq!(buffer.finish(), vec![1u8, 2, 3, 4]);
     }
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/transaction.rs	Thu Oct 10 10:34:51 2024 +0200
@@ -0,0 +1,11 @@
+use std::path::Path;
+
+/// The Mercurial transaction system is based on the append-only nature
+/// of its core files. This exposes the necessary methods to safely write to
+/// the different core datastructures.
+pub trait Transaction {
+    /// Record the state of an append-only file before update
+    fn add(&mut self, file: impl AsRef<Path>, offset: usize);
+
+    // TODO the rest of the methods once we do more in Rust.
+}

author	Raphaël Gomès <rgomes@octobus.net>
	Thu, 10 Oct 2024 10:34:51 +0200
changeset 52164	e01e84e5e426
parent 52163	44823c5011fe
child 52165	6f94dc3cf8cf

rust/Cargo.lock		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/Cargo.toml		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/lib.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/revlog/changelog.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/revlog/index.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/revlog/inner_revlog.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/revlog/mod.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/revlog/patch.rs		file \| annotate \| diff \| comparison \| revisions
rust/hg-core/src/transaction.rs		file \| annotate \| diff \| comparison \| revisions