changeset 52160:039b7caeb4d9

rust-revlog: introduce an `options` module This helps group all the relevant revlog options code and makes the `mod.rs` more readable.
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 25 Sep 2024 18:24:15 +0200
parents 426696af24d3
children 46c68c0fe137
files rust/hg-core/src/operations/debugdata.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/filelog.rs rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/revlog/options.rs rust/hg-core/src/update.rs rust/rhg/src/commands/status.rs
diffstat 9 files changed, 447 insertions(+), 421 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/operations/debugdata.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/operations/debugdata.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -7,6 +7,7 @@
 
 use crate::errors::HgError;
 use crate::repo::Repo;
+use crate::revlog::options::default_revlog_options;
 use crate::revlog::Revlog;
 use crate::{exit_codes, RevlogError, RevlogType};
 
@@ -31,7 +32,11 @@
         &repo.store_vfs(),
         index_file,
         None,
-        repo.default_revlog_options(RevlogType::Changelog)?,
+        default_revlog_options(
+            repo.config(),
+            repo.requirements(),
+            RevlogType::Changelog,
+        )?,
     )?;
     let rev =
         crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
--- a/rust/hg-core/src/repo.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/repo.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -10,11 +10,8 @@
 use crate::errors::{HgError, IoResultExt};
 use crate::lock::{try_with_lock_no_wait, LockError};
 use crate::manifest::{Manifest, Manifestlog};
-use crate::requirements::{
-    CHANGELOGV2_REQUIREMENT, DIRSTATE_TRACKED_HINT_V1,
-    GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT,
-    REVLOGV2_REQUIREMENT,
-};
+use crate::options::default_revlog_options;
+use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
 use crate::revlog::filelog::Filelog;
 use crate::revlog::RevlogError;
 use crate::utils::debug::debug_wait_for_file_or_print;
@@ -22,11 +19,10 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::{is_dir, is_file, VfsImpl};
+use crate::DirstateError;
 use crate::{
-    exit_codes, requirements, NodePrefix, RevlogDataConfig, RevlogDeltaConfig,
-    RevlogFeatureConfig, RevlogType, RevlogVersionOptions, UncheckedRevision,
+    exit_codes, requirements, NodePrefix, RevlogType, UncheckedRevision,
 };
-use crate::{DirstateError, RevlogOpenOptions};
 use std::cell::{Ref, RefCell, RefMut};
 use std::collections::HashSet;
 use std::io::Seek;
@@ -577,7 +573,11 @@
     fn new_changelog(&self) -> Result<Changelog, HgError> {
         Changelog::open(
             &self.store_vfs(),
-            self.default_revlog_options(RevlogType::Changelog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Changelog,
+            )?,
         )
     }
 
@@ -592,7 +592,11 @@
     fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
         Manifestlog::open(
             &self.store_vfs(),
-            self.default_revlog_options(RevlogType::Manifestlog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Manifestlog,
+            )?,
         )
     }
 
@@ -642,7 +646,11 @@
         Filelog::open(
             self,
             path,
-            self.default_revlog_options(RevlogType::Filelog)?,
+            default_revlog_options(
+                self.config(),
+                self.requirements(),
+                RevlogType::Filelog,
+            )?,
         )
     }
     /// Write to disk any updates that were made through `dirstate_map_mut`.
@@ -792,50 +800,6 @@
         Ok(())
     }
 
-    pub fn default_revlog_options(
-        &self,
-        revlog_type: RevlogType,
-    ) -> Result<RevlogOpenOptions, HgError> {
-        let requirements = self.requirements();
-        let is_changelog = revlog_type == RevlogType::Changelog;
-        let version = if is_changelog
-            && requirements.contains(CHANGELOGV2_REQUIREMENT)
-        {
-            let compute_rank = self
-                .config()
-                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
-            RevlogVersionOptions::ChangelogV2 { compute_rank }
-        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
-            RevlogVersionOptions::V2
-        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
-            RevlogVersionOptions::V1 {
-                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
-                inline: !is_changelog,
-            }
-        } else {
-            RevlogVersionOptions::V0
-        };
-        Ok(RevlogOpenOptions {
-            version,
-            // We don't need to dance around the slow path like in the Python
-            // implementation since we know we have access to the fast code.
-            use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
-            delta_config: RevlogDeltaConfig::new(
-                self.config(),
-                self.requirements(),
-                revlog_type,
-            )?,
-            data_config: RevlogDataConfig::new(
-                self.config(),
-                self.requirements(),
-            )?,
-            feature_config: RevlogFeatureConfig::new(
-                self.config(),
-                requirements,
-            )?,
-        })
-    }
-
     pub fn node(&self, rev: UncheckedRevision) -> Option<crate::Node> {
         self.changelog()
             .ok()
--- a/rust/hg-core/src/revlog/changelog.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -14,7 +14,9 @@
 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
 use crate::utils::hg_path::HgPath;
 use crate::vfs::VfsImpl;
-use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
+use crate::{Graph, GraphError, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
 
 /// A specialized `Revlog` to work with changelog data format.
 pub struct Changelog {
@@ -504,10 +506,7 @@
 mod tests {
     use super::*;
     use crate::vfs::VfsImpl;
-    use crate::{
-        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
-        NULL_REVISION,
-    };
+    use crate::NULL_REVISION;
     use pretty_assertions::assert_eq;
 
     #[test]
@@ -571,18 +570,9 @@
         };
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
         std::fs::write(temp.path().join("foo.d"), b"").unwrap();
-        let revlog = Revlog::open(
-            &vfs,
-            "foo.i",
-            None,
-            RevlogOpenOptions::new(
-                false,
-                RevlogDataConfig::default(),
-                RevlogDeltaConfig::default(),
-                RevlogFeatureConfig::default(),
-            ),
-        )
-        .unwrap();
+        let revlog =
+            Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
+                .unwrap();
 
         let changelog = Changelog { revlog };
         assert_eq!(
--- a/rust/hg-core/src/revlog/filelog.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/filelog.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -11,10 +11,11 @@
 use crate::utils::SliceExt;
 use crate::Graph;
 use crate::GraphError;
-use crate::RevlogOpenOptions;
 use crate::UncheckedRevision;
 use std::path::PathBuf;
 
+use super::options::RevlogOpenOptions;
+
 /// A specialized `Revlog` to work with file data logs.
 pub struct Filelog {
     /// The generic `revlog` format.
--- a/rust/hg-core/src/revlog/manifest.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/manifest.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -6,9 +6,9 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::VfsImpl;
-use crate::{
-    Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
-};
+use crate::{Graph, GraphError, Revision, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifestlog {
--- a/rust/hg-core/src/revlog/mod.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -9,18 +9,19 @@
 pub mod nodemap;
 mod nodemap_docket;
 pub mod path_encode;
-use compression::{uncompressed_zstd_data, CompressionConfig};
+use compression::uncompressed_zstd_data;
 pub use node::{FromHexError, Node, NodePrefix};
+use options::RevlogOpenOptions;
 pub mod changelog;
 pub mod compression;
 pub mod file_io;
 pub mod filelog;
 pub mod index;
 pub mod manifest;
+pub mod options;
 pub mod patch;
 
 use std::borrow::Cow;
-use std::collections::HashSet;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
@@ -33,12 +34,8 @@
 use super::index::Index;
 use super::index::INDEX_ENTRY_SIZE;
 use super::nodemap::{NodeMap, NodeMapError};
-use crate::config::{Config, ResourceProfileValue};
 use crate::errors::HgError;
 use crate::exit_codes;
-use crate::requirements::{
-    GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
-};
 use crate::vfs::VfsImpl;
 
 /// As noted in revlog.c, revision numbers are actually encoded in
@@ -259,255 +256,6 @@
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how the revlog data is read
-pub struct RevlogDataConfig {
-    /// Should we try to open the "pending" version of the revlog
-    pub try_pending: bool,
-    /// Should we try to open the "split" version of the revlog
-    pub try_split: bool,
-    /// When True, `indexfile` should be opened with `checkambig=True` at
-    /// writing time, to avoid file stat ambiguity
-    pub check_ambig: bool,
-    /// If true, use mmap instead of reading to deal with large indexes
-    pub mmap_large_index: bool,
-    /// How much data is considered large
-    pub mmap_index_threshold: Option<u64>,
-    /// How much data to read and cache into the raw revlog data cache
-    pub chunk_cache_size: u64,
-    /// The size of the uncompressed cache compared to the largest revision
-    /// seen
-    pub uncompressed_cache_factor: Option<f64>,
-    /// The number of chunks cached
-    pub uncompressed_cache_count: Option<u64>,
-    /// Allow sparse reading of the revlog data
-    pub with_sparse_read: bool,
-    /// Minimal density of a sparse read chunk
-    pub sr_density_threshold: f64,
-    /// Minimal size of the data we skip when performing sparse reads
-    pub sr_min_gap_size: u64,
-    /// Whether deltas are encoded against arbitrary bases
-    pub general_delta: bool,
-}
-
-impl RevlogDataConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-    ) -> Result<Self, HgError> {
-        let mut data_config = Self::default();
-        if let Some(chunk_cache_size) =
-            config.get_byte_size(b"format", b"chunkcachesize")?
-        {
-            data_config.chunk_cache_size = chunk_cache_size;
-        }
-
-        let memory_profile = config.get_resource_profile(Some("memory"));
-        if memory_profile.value >= ResourceProfileValue::Medium {
-            data_config.uncompressed_cache_count = Some(10_000);
-            data_config.uncompressed_cache_factor = Some(4.0);
-            if memory_profile.value >= ResourceProfileValue::High {
-                data_config.uncompressed_cache_factor = Some(10.0)
-            }
-        }
-
-        if let Some(mmap_index_threshold) = config
-            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
-        {
-            data_config.mmap_index_threshold = Some(mmap_index_threshold);
-        }
-
-        let with_sparse_read =
-            config.get_bool(b"experimental", b"sparse-read")?;
-        if let Some(sr_density_threshold) = config
-            .get_f64(b"experimental", b"sparse-read.density-threshold")?
-        {
-            data_config.sr_density_threshold = sr_density_threshold;
-        }
-        data_config.with_sparse_read = with_sparse_read;
-        if let Some(sr_min_gap_size) = config
-            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
-        {
-            data_config.sr_min_gap_size = sr_min_gap_size;
-        }
-
-        data_config.with_sparse_read =
-            requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
-        Ok(data_config)
-    }
-}
-
-impl Default for RevlogDataConfig {
-    fn default() -> Self {
-        Self {
-            chunk_cache_size: 65536,
-            sr_density_threshold: 0.50,
-            sr_min_gap_size: 262144,
-            try_pending: Default::default(),
-            try_split: Default::default(),
-            check_ambig: Default::default(),
-            mmap_large_index: Default::default(),
-            mmap_index_threshold: Default::default(),
-            uncompressed_cache_factor: Default::default(),
-            uncompressed_cache_count: Default::default(),
-            with_sparse_read: Default::default(),
-            general_delta: Default::default(),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how new deltas are computed.
-///
-/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
-/// each object self contained.
-pub struct RevlogDeltaConfig {
-    /// Whether deltas can be encoded against arbitrary bases
-    pub general_delta: bool,
-    /// Allow sparse writing of the revlog data
-    pub sparse_revlog: bool,
-    /// Maximum length of a delta chain
-    pub max_chain_len: Option<u64>,
-    /// Maximum distance between a delta chain's start and end
-    pub max_deltachain_span: Option<u64>,
-    /// If `upper_bound_comp` is not None, this is the expected maximal
-    /// gain from compression for the data content
-    pub upper_bound_comp: Option<f64>,
-    /// Should we try a delta against both parents
-    pub delta_both_parents: bool,
-    /// Test delta base candidate groups by chunks of this maximal size
-    pub candidate_group_chunk_size: u64,
-    /// Should we display debug information about delta computation
-    pub debug_delta: bool,
-    /// Trust incoming deltas by default
-    pub lazy_delta: bool,
-    /// Trust the base of incoming deltas by default
-    pub lazy_delta_base: bool,
-}
-impl RevlogDeltaConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-        revlog_type: RevlogType,
-    ) -> Result<Self, HgError> {
-        let mut delta_config = Self {
-            delta_both_parents: config
-                .get_option_no_default(
-                    b"storage",
-                    b"revlog.optimize-delta-parent-choice",
-                )?
-                .unwrap_or(true),
-            candidate_group_chunk_size: config
-                .get_u64(
-                    b"storage",
-                    b"revlog.delta-parent-search.candidate-group-chunk-size",
-                )?
-                .unwrap_or_default(),
-            ..Default::default()
-        };
-
-        delta_config.debug_delta =
-            config.get_bool(b"debug", b"revlog.debug-delta")?;
-
-        delta_config.general_delta =
-            requirements.contains(GENERALDELTA_REQUIREMENT);
-
-        let lazy_delta =
-            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
-
-        if revlog_type == RevlogType::Manifestlog {
-            // upper bound of what we expect from compression
-            // (real life value seems to be 3)
-            delta_config.upper_bound_comp = Some(3.0)
-        }
-
-        let mut lazy_delta_base = false;
-        if lazy_delta {
-            lazy_delta_base = match config.get_option_no_default(
-                b"storage",
-                b"revlog.reuse-external-delta-parent",
-            )? {
-                Some(base) => base,
-                None => config.get_bool(b"format", b"generaldelta")?,
-            };
-        }
-        delta_config.lazy_delta = lazy_delta;
-        delta_config.lazy_delta_base = lazy_delta_base;
-
-        delta_config.max_deltachain_span =
-            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
-                Some(span) => {
-                    if span < 0 {
-                        None
-                    } else {
-                        Some(span as u64)
-                    }
-                }
-                None => None,
-            };
-
-        delta_config.sparse_revlog =
-            requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
-        delta_config.max_chain_len =
-            config.get_byte_size_no_default(b"format", b"maxchainlen")?;
-
-        Ok(delta_config)
-    }
-}
-
-impl Default for RevlogDeltaConfig {
-    fn default() -> Self {
-        Self {
-            delta_both_parents: true,
-            lazy_delta: true,
-            general_delta: Default::default(),
-            sparse_revlog: Default::default(),
-            max_chain_len: Default::default(),
-            max_deltachain_span: Default::default(),
-            upper_bound_comp: Default::default(),
-            candidate_group_chunk_size: Default::default(),
-            debug_delta: Default::default(),
-            lazy_delta_base: Default::default(),
-        }
-    }
-}
-
-#[derive(Debug, Default, Clone, Copy, PartialEq)]
-/// Holds configuration values about the available revlog features
-pub struct RevlogFeatureConfig {
-    /// The compression engine and its options
-    pub compression_engine: CompressionConfig,
-    /// Can we use censor on this revlog
-    pub censorable: bool,
-    /// Does this revlog use the "side data" feature
-    pub has_side_data: bool,
-    /// Might remove this configuration once the rank computation has no
-    /// impact
-    pub compute_rank: bool,
-    /// Parent order is supposed to be semantically irrelevant, so we
-    /// normally re-sort parents to ensure that the first parent is non-null,
-    /// if there is a non-null parent at all.
-    /// filelog abuses the parent order as a flag to mark some instances of
-    /// meta-encoded files, so allow it to disable this behavior.
-    pub canonical_parent_order: bool,
-    /// Can ellipsis commit be used
-    pub enable_ellipsis: bool,
-}
-impl RevlogFeatureConfig {
-    pub fn new(
-        config: &Config,
-        requirements: &HashSet<String>,
-    ) -> Result<Self, HgError> {
-        Ok(Self {
-            compression_engine: CompressionConfig::new(config, requirements)?,
-            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
-            ..Default::default()
-        })
-    }
-}
-
 /// Read only implementation of revlog.
 pub struct Revlog {
     /// When index and data are not interleaved: bytes of the revlog index.
@@ -526,90 +274,6 @@
     }
 }
 
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum RevlogVersionOptions {
-    V0,
-    V1 { general_delta: bool, inline: bool },
-    V2,
-    ChangelogV2 { compute_rank: bool },
-}
-
-/// Options to govern how a revlog should be opened, usually from the
-/// repository configuration or requirements.
-#[derive(Debug, Copy, Clone)]
-pub struct RevlogOpenOptions {
-    /// The revlog version, along with any option specific to this version
-    pub version: RevlogVersionOptions,
-    /// Whether the revlog uses a persistent nodemap.
-    pub use_nodemap: bool,
-    pub delta_config: RevlogDeltaConfig,
-    pub data_config: RevlogDataConfig,
-    pub feature_config: RevlogFeatureConfig,
-}
-
-#[cfg(test)]
-impl Default for RevlogOpenOptions {
-    fn default() -> Self {
-        Self {
-            version: RevlogVersionOptions::V1 {
-                general_delta: true,
-                inline: false,
-            },
-            use_nodemap: true,
-            data_config: Default::default(),
-            delta_config: Default::default(),
-            feature_config: Default::default(),
-        }
-    }
-}
-
-impl RevlogOpenOptions {
-    pub fn new(
-        inline: bool,
-        data_config: RevlogDataConfig,
-        delta_config: RevlogDeltaConfig,
-        feature_config: RevlogFeatureConfig,
-    ) -> Self {
-        Self {
-            version: RevlogVersionOptions::V1 {
-                general_delta: data_config.general_delta,
-                inline,
-            },
-            use_nodemap: false,
-            data_config,
-            delta_config,
-            feature_config,
-        }
-    }
-
-    pub fn index_header(&self) -> index::IndexHeader {
-        index::IndexHeader {
-            header_bytes: match self.version {
-                RevlogVersionOptions::V0 => [0, 0, 0, 0],
-                RevlogVersionOptions::V1 {
-                    general_delta,
-                    inline,
-                } => [
-                    0,
-                    if general_delta && inline {
-                        3
-                    } else if general_delta {
-                        2
-                    } else {
-                        u8::from(inline)
-                    },
-                    0,
-                    1,
-                ],
-                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
-                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
-                    0xD34Du32.to_be_bytes()
-                }
-            },
-        }
-    }
-}
-
 impl Revlog {
     /// Open a revlog index file.
     ///
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/options.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -0,0 +1,393 @@
+//! Helpers for the revlog config and opening options
+
+use std::collections::HashSet;
+
+use crate::{
+    config::{Config, ResourceProfileValue},
+    errors::HgError,
+    requirements::{
+        CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT,
+        NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
+        SPARSEREVLOG_REQUIREMENT,
+    },
+};
+
+use super::{compression::CompressionConfig, RevlogType};
+
+const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536;
+const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50;
+const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144;
+
+/// The known revlog versions and their options
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum RevlogVersionOptions {
+    V0,
+    V1 { general_delta: bool, inline: bool },
+    V2,
+    ChangelogV2 { compute_rank: bool },
+}
+
+/// Options to govern how a revlog should be opened, usually from the
+/// repository configuration or requirements.
+#[derive(Debug, Copy, Clone)]
+pub struct RevlogOpenOptions {
+    /// The revlog version, along with any option specific to this version
+    pub version: RevlogVersionOptions,
+    /// Whether the revlog uses a persistent nodemap.
+    pub use_nodemap: bool,
+    pub delta_config: RevlogDeltaConfig,
+    pub data_config: RevlogDataConfig,
+    pub feature_config: RevlogFeatureConfig,
+}
+
+#[cfg(test)]
+impl Default for RevlogOpenOptions {
+    fn default() -> Self {
+        Self {
+            version: RevlogVersionOptions::V1 {
+                general_delta: true,
+                inline: false,
+            },
+            use_nodemap: true,
+            data_config: Default::default(),
+            delta_config: Default::default(),
+            feature_config: Default::default(),
+        }
+    }
+}
+
+impl RevlogOpenOptions {
+    pub fn new(
+        inline: bool,
+        data_config: RevlogDataConfig,
+        delta_config: RevlogDeltaConfig,
+        feature_config: RevlogFeatureConfig,
+    ) -> Self {
+        Self {
+            version: RevlogVersionOptions::V1 {
+                general_delta: data_config.general_delta,
+                inline,
+            },
+            use_nodemap: false,
+            data_config,
+            delta_config,
+            feature_config,
+        }
+    }
+
+    pub fn index_header(&self) -> super::index::IndexHeader {
+        super::index::IndexHeader {
+            header_bytes: match self.version {
+                RevlogVersionOptions::V0 => [0, 0, 0, 0],
+                RevlogVersionOptions::V1 {
+                    general_delta,
+                    inline,
+                } => [
+                    0,
+                    if general_delta && inline {
+                        3
+                    } else if general_delta {
+                        2
+                    } else {
+                        u8::from(inline)
+                    },
+                    0,
+                    1,
+                ],
+                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
+                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
+                    0xD34Du32.to_be_bytes()
+                }
+            },
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how the revlog data is read
+pub struct RevlogDataConfig {
+    /// Should we try to open the "pending" version of the revlog
+    pub try_pending: bool,
+    /// Should we try to open the "split" version of the revlog
+    pub try_split: bool,
+    /// When True, `indexfile` should be opened with `checkambig=True` at
+    /// writing time, to avoid file stat ambiguity
+    pub check_ambig: bool,
+    /// If true, use mmap instead of reading to deal with large indexes
+    pub mmap_large_index: bool,
+    /// How much data is considered large
+    pub mmap_index_threshold: Option<u64>,
+    /// How much data to read and cache into the raw revlog data cache
+    pub chunk_cache_size: u64,
+    /// The size of the uncompressed cache compared to the largest revision
+    /// seen
+    pub uncompressed_cache_factor: Option<f64>,
+    /// The number of chunks cached
+    pub uncompressed_cache_count: Option<u64>,
+    /// Allow sparse reading of the revlog data
+    pub with_sparse_read: bool,
+    /// Minimal density of a sparse read chunk
+    pub sr_density_threshold: f64,
+    /// Minimal size of the data we skip when performing sparse reads
+    pub sr_min_gap_size: u64,
+    /// Whether deltas are encoded against arbitrary bases
+    pub general_delta: bool,
+}
+
+impl RevlogDataConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        let mut data_config = Self::default();
+        if let Some(chunk_cache_size) =
+            config.get_byte_size(b"format", b"chunkcachesize")?
+        {
+            data_config.chunk_cache_size = chunk_cache_size;
+        }
+
+        let memory_profile = config.get_resource_profile(Some("memory"));
+        if memory_profile.value >= ResourceProfileValue::Medium {
+            data_config.uncompressed_cache_count = Some(10_000);
+            data_config.uncompressed_cache_factor = Some(4.0);
+            if memory_profile.value >= ResourceProfileValue::High {
+                data_config.uncompressed_cache_factor = Some(10.0)
+            }
+        }
+
+        if let Some(mmap_index_threshold) = config
+            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
+        {
+            data_config.mmap_index_threshold = Some(mmap_index_threshold);
+        }
+
+        let with_sparse_read =
+            config.get_bool(b"experimental", b"sparse-read")?;
+        if let Some(sr_density_threshold) = config
+            .get_f64(b"experimental", b"sparse-read.density-threshold")?
+        {
+            data_config.sr_density_threshold = sr_density_threshold;
+        }
+        data_config.with_sparse_read = with_sparse_read;
+        if let Some(sr_min_gap_size) = config
+            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
+        {
+            data_config.sr_min_gap_size = sr_min_gap_size;
+        }
+
+        data_config.with_sparse_read =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        Ok(data_config)
+    }
+}
+
+impl Default for RevlogDataConfig {
+    fn default() -> Self {
+        Self {
+            chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE,
+            sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD,
+            sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE,
+            try_pending: Default::default(),
+            try_split: Default::default(),
+            check_ambig: Default::default(),
+            mmap_large_index: Default::default(),
+            mmap_index_threshold: Default::default(),
+            uncompressed_cache_factor: Default::default(),
+            uncompressed_cache_count: Default::default(),
+            with_sparse_read: Default::default(),
+            general_delta: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how new deltas are computed.
+///
+/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
+/// each object self contained.
+pub struct RevlogDeltaConfig {
+    /// Whether deltas can be encoded against arbitrary bases
+    pub general_delta: bool,
+    /// Allow sparse writing of the revlog data
+    pub sparse_revlog: bool,
+    /// Maximum length of a delta chain
+    pub max_chain_len: Option<u64>,
+    /// Maximum distance between a delta chain's start and end
+    pub max_deltachain_span: Option<u64>,
+    /// If `upper_bound_comp` is not None, this is the expected maximal
+    /// gain from compression for the data content
+    pub upper_bound_comp: Option<f64>,
+    /// Should we try a delta against both parents
+    pub delta_both_parents: bool,
+    /// Test delta base candidate groups by chunks of this maximal size
+    pub candidate_group_chunk_size: u64,
+    /// Should we display debug information about delta computation
+    pub debug_delta: bool,
+    /// Trust incoming deltas by default
+    pub lazy_delta: bool,
+    /// Trust the base of incoming deltas by default
+    pub lazy_delta_base: bool,
+}
+
+impl RevlogDeltaConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+        revlog_type: RevlogType,
+    ) -> Result<Self, HgError> {
+        let mut delta_config = Self {
+            delta_both_parents: config
+                .get_option_no_default(
+                    b"storage",
+                    b"revlog.optimize-delta-parent-choice",
+                )?
+                .unwrap_or(true),
+            candidate_group_chunk_size: config
+                .get_u64(
+                    b"storage",
+                    b"revlog.delta-parent-search.candidate-group-chunk-size",
+                )?
+                .unwrap_or_default(),
+            ..Default::default()
+        };
+
+        delta_config.debug_delta =
+            config.get_bool(b"debug", b"revlog.debug-delta")?;
+
+        delta_config.general_delta =
+            requirements.contains(GENERALDELTA_REQUIREMENT);
+
+        let lazy_delta =
+            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
+
+        if revlog_type == RevlogType::Manifestlog {
+            // upper bound of what we expect from compression
+            // (real life value seems to be 3)
+            delta_config.upper_bound_comp = Some(3.0)
+        }
+
+        let mut lazy_delta_base = false;
+        if lazy_delta {
+            lazy_delta_base = match config.get_option_no_default(
+                b"storage",
+                b"revlog.reuse-external-delta-parent",
+            )? {
+                Some(base) => base,
+                None => config.get_bool(b"format", b"generaldelta")?,
+            };
+        }
+        delta_config.lazy_delta = lazy_delta;
+        delta_config.lazy_delta_base = lazy_delta_base;
+
+        delta_config.max_deltachain_span =
+            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
+                Some(span) => {
+                    if span < 0 {
+                        None
+                    } else {
+                        Some(span as u64)
+                    }
+                }
+                None => None,
+            };
+
+        delta_config.sparse_revlog =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        delta_config.max_chain_len =
+            config.get_byte_size_no_default(b"format", b"maxchainlen")?;
+
+        Ok(delta_config)
+    }
+}
+
+impl Default for RevlogDeltaConfig {
+    fn default() -> Self {
+        Self {
+            delta_both_parents: true,
+            lazy_delta: true,
+            general_delta: Default::default(),
+            sparse_revlog: Default::default(),
+            max_chain_len: Default::default(),
+            max_deltachain_span: Default::default(),
+            upper_bound_comp: Default::default(),
+            candidate_group_chunk_size: Default::default(),
+            debug_delta: Default::default(),
+            lazy_delta_base: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy, PartialEq)]
+/// Holds configuration values about the available revlog features
+pub struct RevlogFeatureConfig {
+    /// The compression engine and its options
+    pub compression_engine: CompressionConfig,
+    /// Can we use censor on this revlog
+    pub censorable: bool,
+    /// Does this revlog use the "side data" feature
+    pub has_side_data: bool,
+    /// Might remove this configuration once the rank computation has no
+    /// impact
+    pub compute_rank: bool,
+    /// Parent order is supposed to be semantically irrelevant, so we
+    /// normally re-sort parents to ensure that the first parent is non-null,
+    /// if there is a non-null parent at all.
+    /// filelog abuses the parent order as a flag to mark some instances of
+    /// meta-encoded files, so allow it to disable this behavior.
+    pub canonical_parent_order: bool,
+    /// Can ellipsis commit be used
+    pub enable_ellipsis: bool,
+}
+
+impl RevlogFeatureConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        Ok(Self {
+            compression_engine: CompressionConfig::new(config, requirements)?,
+            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
+            ..Default::default()
+        })
+    }
+}
+
+/// Return the default options for a revlog of `revlog_type` according to the
+/// current config and requirements.
+pub fn default_revlog_options(
+    config: &Config,
+    requirements: &HashSet<String>,
+    revlog_type: RevlogType,
+) -> Result<RevlogOpenOptions, HgError> {
+    let is_changelog = revlog_type == RevlogType::Changelog;
+    let version =
+        if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) {
+            let compute_rank = config
+                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
+            RevlogVersionOptions::ChangelogV2 { compute_rank }
+        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
+            RevlogVersionOptions::V2
+        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
+            RevlogVersionOptions::V1 {
+                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
+                inline: !is_changelog,
+            }
+        } else {
+            RevlogVersionOptions::V0
+        };
+    Ok(RevlogOpenOptions {
+        version,
+        // We don't need to dance around the slow path like in the Python
+        // implementation since we know we have access to the fast code.
+        use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
+        delta_config: RevlogDeltaConfig::new(
+            config,
+            requirements,
+            revlog_type,
+        )?,
+        data_config: RevlogDataConfig::new(config, requirements)?,
+        feature_config: RevlogFeatureConfig::new(config, requirements)?,
+    })
+}
--- a/rust/hg-core/src/update.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/update.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -19,6 +19,7 @@
     narrow,
     node::NULL_NODE,
     operations::{list_rev_tracked_files, ExpandedManifestEntry},
+    options::{default_revlog_options, RevlogOpenOptions},
     progress::Progress,
     repo::Repo,
     sparse,
@@ -28,7 +29,7 @@
         path_auditor::PathAuditor,
     },
     vfs::{is_on_nfs_mount, VfsImpl},
-    DirstateParents, RevlogError, RevlogOpenOptions, UncheckedRevision,
+    DirstateParents, RevlogError, UncheckedRevision,
 };
 use crossbeam_channel::{Receiver, Sender};
 use rayon::prelude::*;
@@ -89,7 +90,11 @@
         return Ok(0);
     }
     let store_vfs = &repo.store_vfs();
-    let options = repo.default_revlog_options(crate::RevlogType::Filelog)?;
+    let options = default_revlog_options(
+        repo.config(),
+        repo.requirements(),
+        crate::RevlogType::Filelog,
+    )?;
     let (errors_sender, errors_receiver) = crossbeam_channel::unbounded();
     let (files_sender, files_receiver) = crossbeam_channel::unbounded();
     let working_directory_path = &repo.working_directory_path();
--- a/rust/rhg/src/commands/status.rs	Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/rhg/src/commands/status.rs	Wed Sep 25 18:24:15 2024 +0200
@@ -23,16 +23,17 @@
 use hg::manifest::Manifest;
 use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
 use hg::repo::Repo;
+use hg::revlog::options::{default_revlog_options, RevlogOpenOptions};
 use hg::utils::debug::debug_wait_for_file;
 use hg::utils::files::{
     get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
 };
 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
+use hg::DirstateStatus;
 use hg::Revision;
 use hg::StatusError;
 use hg::StatusOptions;
 use hg::{self, narrow, sparse};
-use hg::{DirstateStatus, RevlogOpenOptions};
 use hg::{PatternFileWarning, RevlogType};
 use log::info;
 use rayon::prelude::*;
@@ -383,8 +384,11 @@
             })?;
             let working_directory_vfs = repo.working_directory_vfs();
             let store_vfs = repo.store_vfs();
-            let revlog_open_options =
-                repo.default_revlog_options(RevlogType::Manifestlog)?;
+            let revlog_open_options = default_revlog_options(
+                repo.config(),
+                repo.requirements(),
+                RevlogType::Manifestlog,
+            )?;
             let res: Vec<_> = take(&mut ds_status.unsure)
                 .into_par_iter()
                 .map(|to_check| {