Mercurial > hg
changeset 52160:039b7caeb4d9
rust-revlog: introduce an `options` module
This helps group all the relevant revlog options code and makes the `mod.rs`
more readable.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 25 Sep 2024 18:24:15 +0200 |
parents | 426696af24d3 |
children | 46c68c0fe137 |
files | rust/hg-core/src/operations/debugdata.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/filelog.rs rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/revlog/options.rs rust/hg-core/src/update.rs rust/rhg/src/commands/status.rs |
diffstat | 9 files changed, 447 insertions(+), 421 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/operations/debugdata.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/operations/debugdata.rs Wed Sep 25 18:24:15 2024 +0200 @@ -7,6 +7,7 @@ use crate::errors::HgError; use crate::repo::Repo; +use crate::revlog::options::default_revlog_options; use crate::revlog::Revlog; use crate::{exit_codes, RevlogError, RevlogType}; @@ -31,7 +32,11 @@ &repo.store_vfs(), index_file, None, - repo.default_revlog_options(RevlogType::Changelog)?, + default_revlog_options( + repo.config(), + repo.requirements(), + RevlogType::Changelog, + )?, )?; let rev = crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
--- a/rust/hg-core/src/repo.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/repo.rs Wed Sep 25 18:24:15 2024 +0200 @@ -10,11 +10,8 @@ use crate::errors::{HgError, IoResultExt}; use crate::lock::{try_with_lock_no_wait, LockError}; use crate::manifest::{Manifest, Manifestlog}; -use crate::requirements::{ - CHANGELOGV2_REQUIREMENT, DIRSTATE_TRACKED_HINT_V1, - GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, - REVLOGV2_REQUIREMENT, -}; +use crate::options::default_revlog_options; +use crate::requirements::DIRSTATE_TRACKED_HINT_V1; use crate::revlog::filelog::Filelog; use crate::revlog::RevlogError; use crate::utils::debug::debug_wait_for_file_or_print; @@ -22,11 +19,10 @@ use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::{is_dir, is_file, VfsImpl}; +use crate::DirstateError; use crate::{ - exit_codes, requirements, NodePrefix, RevlogDataConfig, RevlogDeltaConfig, - RevlogFeatureConfig, RevlogType, RevlogVersionOptions, UncheckedRevision, + exit_codes, requirements, NodePrefix, RevlogType, UncheckedRevision, }; -use crate::{DirstateError, RevlogOpenOptions}; use std::cell::{Ref, RefCell, RefMut}; use std::collections::HashSet; use std::io::Seek; @@ -577,7 +573,11 @@ fn new_changelog(&self) -> Result<Changelog, HgError> { Changelog::open( &self.store_vfs(), - self.default_revlog_options(RevlogType::Changelog)?, + default_revlog_options( + self.config(), + self.requirements(), + RevlogType::Changelog, + )?, ) } @@ -592,7 +592,11 @@ fn new_manifestlog(&self) -> Result<Manifestlog, HgError> { Manifestlog::open( &self.store_vfs(), - self.default_revlog_options(RevlogType::Manifestlog)?, + default_revlog_options( + self.config(), + self.requirements(), + RevlogType::Manifestlog, + )?, ) } @@ -642,7 +646,11 @@ Filelog::open( self, path, - self.default_revlog_options(RevlogType::Filelog)?, + default_revlog_options( + self.config(), + self.requirements(), + RevlogType::Filelog, + )?, ) } /// Write to disk any updates that were made through `dirstate_map_mut`. @@ -792,50 +800,6 @@ Ok(()) } - pub fn default_revlog_options( - &self, - revlog_type: RevlogType, - ) -> Result<RevlogOpenOptions, HgError> { - let requirements = self.requirements(); - let is_changelog = revlog_type == RevlogType::Changelog; - let version = if is_changelog - && requirements.contains(CHANGELOGV2_REQUIREMENT) - { - let compute_rank = self - .config() - .get_bool(b"experimental", b"changelog-v2.compute-rank")?; - RevlogVersionOptions::ChangelogV2 { compute_rank } - } else if requirements.contains(REVLOGV2_REQUIREMENT) { - RevlogVersionOptions::V2 - } else if requirements.contains(REVLOGV1_REQUIREMENT) { - RevlogVersionOptions::V1 { - general_delta: requirements.contains(GENERALDELTA_REQUIREMENT), - inline: !is_changelog, - } - } else { - RevlogVersionOptions::V0 - }; - Ok(RevlogOpenOptions { - version, - // We don't need to dance around the slow path like in the Python - // implementation since we know we have access to the fast code. - use_nodemap: requirements.contains(NODEMAP_REQUIREMENT), - delta_config: RevlogDeltaConfig::new( - self.config(), - self.requirements(), - revlog_type, - )?, - data_config: RevlogDataConfig::new( - self.config(), - self.requirements(), - )?, - feature_config: RevlogFeatureConfig::new( - self.config(), - requirements, - )?, - }) - } - pub fn node(&self, rev: UncheckedRevision) -> Option<crate::Node> { self.changelog() .ok()
--- a/rust/hg-core/src/revlog/changelog.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/revlog/changelog.rs Wed Sep 25 18:24:15 2024 +0200 @@ -14,7 +14,9 @@ use crate::revlog::{Revlog, RevlogEntry, RevlogError}; use crate::utils::hg_path::HgPath; use crate::vfs::VfsImpl; -use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision}; +use crate::{Graph, GraphError, UncheckedRevision}; + +use super::options::RevlogOpenOptions; /// A specialized `Revlog` to work with changelog data format. pub struct Changelog { @@ -504,10 +506,7 @@ mod tests { use super::*; use crate::vfs::VfsImpl; - use crate::{ - RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, - NULL_REVISION, - }; + use crate::NULL_REVISION; use pretty_assertions::assert_eq; #[test] @@ -571,18 +570,9 @@ }; std::fs::write(temp.path().join("foo.i"), b"").unwrap(); std::fs::write(temp.path().join("foo.d"), b"").unwrap(); - let revlog = Revlog::open( - &vfs, - "foo.i", - None, - RevlogOpenOptions::new( - false, - RevlogDataConfig::default(), - RevlogDeltaConfig::default(), - RevlogFeatureConfig::default(), - ), - ) - .unwrap(); + let revlog = + Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default()) + .unwrap(); let changelog = Changelog { revlog }; assert_eq!(
--- a/rust/hg-core/src/revlog/filelog.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/revlog/filelog.rs Wed Sep 25 18:24:15 2024 +0200 @@ -11,10 +11,11 @@ use crate::utils::SliceExt; use crate::Graph; use crate::GraphError; -use crate::RevlogOpenOptions; use crate::UncheckedRevision; use std::path::PathBuf; +use super::options::RevlogOpenOptions; + /// A specialized `Revlog` to work with file data logs. pub struct Filelog { /// The generic `revlog` format.
--- a/rust/hg-core/src/revlog/manifest.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/revlog/manifest.rs Wed Sep 25 18:24:15 2024 +0200 @@ -6,9 +6,9 @@ use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::VfsImpl; -use crate::{ - Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision, -}; +use crate::{Graph, GraphError, Revision, UncheckedRevision}; + +use super::options::RevlogOpenOptions; /// A specialized `Revlog` to work with `manifest` data format. pub struct Manifestlog {
--- a/rust/hg-core/src/revlog/mod.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/revlog/mod.rs Wed Sep 25 18:24:15 2024 +0200 @@ -9,18 +9,19 @@ pub mod nodemap; mod nodemap_docket; pub mod path_encode; -use compression::{uncompressed_zstd_data, CompressionConfig}; +use compression::uncompressed_zstd_data; pub use node::{FromHexError, Node, NodePrefix}; +use options::RevlogOpenOptions; pub mod changelog; pub mod compression; pub mod file_io; pub mod filelog; pub mod index; pub mod manifest; +pub mod options; pub mod patch; use std::borrow::Cow; -use std::collections::HashSet; use std::io::Read; use std::ops::Deref; use std::path::Path; @@ -33,12 +34,8 @@ use super::index::Index; use super::index::INDEX_ENTRY_SIZE; use super::nodemap::{NodeMap, NodeMapError}; -use crate::config::{Config, ResourceProfileValue}; use crate::errors::HgError; use crate::exit_codes; -use crate::requirements::{ - GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, -}; use crate::vfs::VfsImpl; /// As noted in revlog.c, revision numbers are actually encoded in @@ -259,255 +256,6 @@ } } -#[derive(Debug, Clone, Copy, PartialEq)] -/// Holds configuration values about how the revlog data is read -pub struct RevlogDataConfig { - /// Should we try to open the "pending" version of the revlog - pub try_pending: bool, - /// Should we try to open the "split" version of the revlog - pub try_split: bool, - /// When True, `indexfile` should be opened with `checkambig=True` at - /// writing time, to avoid file stat ambiguity - pub check_ambig: bool, - /// If true, use mmap instead of reading to deal with large indexes - pub mmap_large_index: bool, - /// How much data is considered large - pub mmap_index_threshold: Option<u64>, - /// How much data to read and cache into the raw revlog data cache - pub chunk_cache_size: u64, - /// The size of the uncompressed cache compared to the largest revision - /// seen - pub uncompressed_cache_factor: Option<f64>, - /// The number of chunks cached - pub uncompressed_cache_count: Option<u64>, - /// Allow sparse reading of the revlog data - pub with_sparse_read: bool, - /// Minimal density of a sparse read chunk - pub sr_density_threshold: f64, - /// Minimal size of the data we skip when performing sparse reads - pub sr_min_gap_size: u64, - /// Whether deltas are encoded against arbitrary bases - pub general_delta: bool, -} - -impl RevlogDataConfig { - pub fn new( - config: &Config, - requirements: &HashSet<String>, - ) -> Result<Self, HgError> { - let mut data_config = Self::default(); - if let Some(chunk_cache_size) = - config.get_byte_size(b"format", b"chunkcachesize")? - { - data_config.chunk_cache_size = chunk_cache_size; - } - - let memory_profile = config.get_resource_profile(Some("memory")); - if memory_profile.value >= ResourceProfileValue::Medium { - data_config.uncompressed_cache_count = Some(10_000); - data_config.uncompressed_cache_factor = Some(4.0); - if memory_profile.value >= ResourceProfileValue::High { - data_config.uncompressed_cache_factor = Some(10.0) - } - } - - if let Some(mmap_index_threshold) = config - .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")? - { - data_config.mmap_index_threshold = Some(mmap_index_threshold); - } - - let with_sparse_read = - config.get_bool(b"experimental", b"sparse-read")?; - if let Some(sr_density_threshold) = config - .get_f64(b"experimental", b"sparse-read.density-threshold")? - { - data_config.sr_density_threshold = sr_density_threshold; - } - data_config.with_sparse_read = with_sparse_read; - if let Some(sr_min_gap_size) = config - .get_byte_size(b"experimental", b"sparse-read.min-gap-size")? - { - data_config.sr_min_gap_size = sr_min_gap_size; - } - - data_config.with_sparse_read = - requirements.contains(SPARSEREVLOG_REQUIREMENT); - - Ok(data_config) - } -} - -impl Default for RevlogDataConfig { - fn default() -> Self { - Self { - chunk_cache_size: 65536, - sr_density_threshold: 0.50, - sr_min_gap_size: 262144, - try_pending: Default::default(), - try_split: Default::default(), - check_ambig: Default::default(), - mmap_large_index: Default::default(), - mmap_index_threshold: Default::default(), - uncompressed_cache_factor: Default::default(), - uncompressed_cache_count: Default::default(), - with_sparse_read: Default::default(), - general_delta: Default::default(), - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -/// Holds configuration values about how new deltas are computed. -/// -/// Some attributes are duplicated from [`RevlogDataConfig`] to help having -/// each object self contained. -pub struct RevlogDeltaConfig { - /// Whether deltas can be encoded against arbitrary bases - pub general_delta: bool, - /// Allow sparse writing of the revlog data - pub sparse_revlog: bool, - /// Maximum length of a delta chain - pub max_chain_len: Option<u64>, - /// Maximum distance between a delta chain's start and end - pub max_deltachain_span: Option<u64>, - /// If `upper_bound_comp` is not None, this is the expected maximal - /// gain from compression for the data content - pub upper_bound_comp: Option<f64>, - /// Should we try a delta against both parents - pub delta_both_parents: bool, - /// Test delta base candidate groups by chunks of this maximal size - pub candidate_group_chunk_size: u64, - /// Should we display debug information about delta computation - pub debug_delta: bool, - /// Trust incoming deltas by default - pub lazy_delta: bool, - /// Trust the base of incoming deltas by default - pub lazy_delta_base: bool, -} -impl RevlogDeltaConfig { - pub fn new( - config: &Config, - requirements: &HashSet<String>, - revlog_type: RevlogType, - ) -> Result<Self, HgError> { - let mut delta_config = Self { - delta_both_parents: config - .get_option_no_default( - b"storage", - b"revlog.optimize-delta-parent-choice", - )? - .unwrap_or(true), - candidate_group_chunk_size: config - .get_u64( - b"storage", - b"revlog.delta-parent-search.candidate-group-chunk-size", - )? - .unwrap_or_default(), - ..Default::default() - }; - - delta_config.debug_delta = - config.get_bool(b"debug", b"revlog.debug-delta")?; - - delta_config.general_delta = - requirements.contains(GENERALDELTA_REQUIREMENT); - - let lazy_delta = - config.get_bool(b"storage", b"revlog.reuse-external-delta")?; - - if revlog_type == RevlogType::Manifestlog { - // upper bound of what we expect from compression - // (real life value seems to be 3) - delta_config.upper_bound_comp = Some(3.0) - } - - let mut lazy_delta_base = false; - if lazy_delta { - lazy_delta_base = match config.get_option_no_default( - b"storage", - b"revlog.reuse-external-delta-parent", - )? { - Some(base) => base, - None => config.get_bool(b"format", b"generaldelta")?, - }; - } - delta_config.lazy_delta = lazy_delta; - delta_config.lazy_delta_base = lazy_delta_base; - - delta_config.max_deltachain_span = - match config.get_i64(b"experimental", b"maxdeltachainspan")? { - Some(span) => { - if span < 0 { - None - } else { - Some(span as u64) - } - } - None => None, - }; - - delta_config.sparse_revlog = - requirements.contains(SPARSEREVLOG_REQUIREMENT); - - delta_config.max_chain_len = - config.get_byte_size_no_default(b"format", b"maxchainlen")?; - - Ok(delta_config) - } -} - -impl Default for RevlogDeltaConfig { - fn default() -> Self { - Self { - delta_both_parents: true, - lazy_delta: true, - general_delta: Default::default(), - sparse_revlog: Default::default(), - max_chain_len: Default::default(), - max_deltachain_span: Default::default(), - upper_bound_comp: Default::default(), - candidate_group_chunk_size: Default::default(), - debug_delta: Default::default(), - lazy_delta_base: Default::default(), - } - } -} - -#[derive(Debug, Default, Clone, Copy, PartialEq)] -/// Holds configuration values about the available revlog features -pub struct RevlogFeatureConfig { - /// The compression engine and its options - pub compression_engine: CompressionConfig, - /// Can we use censor on this revlog - pub censorable: bool, - /// Does this revlog use the "side data" feature - pub has_side_data: bool, - /// Might remove this configuration once the rank computation has no - /// impact - pub compute_rank: bool, - /// Parent order is supposed to be semantically irrelevant, so we - /// normally re-sort parents to ensure that the first parent is non-null, - /// if there is a non-null parent at all. - /// filelog abuses the parent order as a flag to mark some instances of - /// meta-encoded files, so allow it to disable this behavior. - pub canonical_parent_order: bool, - /// Can ellipsis commit be used - pub enable_ellipsis: bool, -} -impl RevlogFeatureConfig { - pub fn new( - config: &Config, - requirements: &HashSet<String>, - ) -> Result<Self, HgError> { - Ok(Self { - compression_engine: CompressionConfig::new(config, requirements)?, - enable_ellipsis: requirements.contains(NARROW_REQUIREMENT), - ..Default::default() - }) - } -} - /// Read only implementation of revlog. pub struct Revlog { /// When index and data are not interleaved: bytes of the revlog index. @@ -526,90 +274,6 @@ } } -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum RevlogVersionOptions { - V0, - V1 { general_delta: bool, inline: bool }, - V2, - ChangelogV2 { compute_rank: bool }, -} - -/// Options to govern how a revlog should be opened, usually from the -/// repository configuration or requirements. -#[derive(Debug, Copy, Clone)] -pub struct RevlogOpenOptions { - /// The revlog version, along with any option specific to this version - pub version: RevlogVersionOptions, - /// Whether the revlog uses a persistent nodemap. - pub use_nodemap: bool, - pub delta_config: RevlogDeltaConfig, - pub data_config: RevlogDataConfig, - pub feature_config: RevlogFeatureConfig, -} - -#[cfg(test)] -impl Default for RevlogOpenOptions { - fn default() -> Self { - Self { - version: RevlogVersionOptions::V1 { - general_delta: true, - inline: false, - }, - use_nodemap: true, - data_config: Default::default(), - delta_config: Default::default(), - feature_config: Default::default(), - } - } -} - -impl RevlogOpenOptions { - pub fn new( - inline: bool, - data_config: RevlogDataConfig, - delta_config: RevlogDeltaConfig, - feature_config: RevlogFeatureConfig, - ) -> Self { - Self { - version: RevlogVersionOptions::V1 { - general_delta: data_config.general_delta, - inline, - }, - use_nodemap: false, - data_config, - delta_config, - feature_config, - } - } - - pub fn index_header(&self) -> index::IndexHeader { - index::IndexHeader { - header_bytes: match self.version { - RevlogVersionOptions::V0 => [0, 0, 0, 0], - RevlogVersionOptions::V1 { - general_delta, - inline, - } => [ - 0, - if general_delta && inline { - 3 - } else if general_delta { - 2 - } else { - u8::from(inline) - }, - 0, - 1, - ], - RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(), - RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => { - 0xD34Du32.to_be_bytes() - } - }, - } - } -} - impl Revlog { /// Open a revlog index file. ///
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/revlog/options.rs Wed Sep 25 18:24:15 2024 +0200 @@ -0,0 +1,393 @@ +//! Helpers for the revlog config and opening options + +use std::collections::HashSet; + +use crate::{ + config::{Config, ResourceProfileValue}, + errors::HgError, + requirements::{ + CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, + NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT, + SPARSEREVLOG_REQUIREMENT, + }, +}; + +use super::{compression::CompressionConfig, RevlogType}; + +const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536; +const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50; +const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144; + +/// The known revlog versions and their options +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum RevlogVersionOptions { + V0, + V1 { general_delta: bool, inline: bool }, + V2, + ChangelogV2 { compute_rank: bool }, +} + +/// Options to govern how a revlog should be opened, usually from the +/// repository configuration or requirements. +#[derive(Debug, Copy, Clone)] +pub struct RevlogOpenOptions { + /// The revlog version, along with any option specific to this version + pub version: RevlogVersionOptions, + /// Whether the revlog uses a persistent nodemap. + pub use_nodemap: bool, + pub delta_config: RevlogDeltaConfig, + pub data_config: RevlogDataConfig, + pub feature_config: RevlogFeatureConfig, +} + +#[cfg(test)] +impl Default for RevlogOpenOptions { + fn default() -> Self { + Self { + version: RevlogVersionOptions::V1 { + general_delta: true, + inline: false, + }, + use_nodemap: true, + data_config: Default::default(), + delta_config: Default::default(), + feature_config: Default::default(), + } + } +} + +impl RevlogOpenOptions { + pub fn new( + inline: bool, + data_config: RevlogDataConfig, + delta_config: RevlogDeltaConfig, + feature_config: RevlogFeatureConfig, + ) -> Self { + Self { + version: RevlogVersionOptions::V1 { + general_delta: data_config.general_delta, + inline, + }, + use_nodemap: false, + data_config, + delta_config, + feature_config, + } + } + + pub fn index_header(&self) -> super::index::IndexHeader { + super::index::IndexHeader { + header_bytes: match self.version { + RevlogVersionOptions::V0 => [0, 0, 0, 0], + RevlogVersionOptions::V1 { + general_delta, + inline, + } => [ + 0, + if general_delta && inline { + 3 + } else if general_delta { + 2 + } else { + u8::from(inline) + }, + 0, + 1, + ], + RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(), + RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => { + 0xD34Du32.to_be_bytes() + } + }, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +/// Holds configuration values about how the revlog data is read +pub struct RevlogDataConfig { + /// Should we try to open the "pending" version of the revlog + pub try_pending: bool, + /// Should we try to open the "split" version of the revlog + pub try_split: bool, + /// When True, `indexfile` should be opened with `checkambig=True` at + /// writing time, to avoid file stat ambiguity + pub check_ambig: bool, + /// If true, use mmap instead of reading to deal with large indexes + pub mmap_large_index: bool, + /// How much data is considered large + pub mmap_index_threshold: Option<u64>, + /// How much data to read and cache into the raw revlog data cache + pub chunk_cache_size: u64, + /// The size of the uncompressed cache compared to the largest revision + /// seen + pub uncompressed_cache_factor: Option<f64>, + /// The number of chunks cached + pub uncompressed_cache_count: Option<u64>, + /// Allow sparse reading of the revlog data + pub with_sparse_read: bool, + /// Minimal density of a sparse read chunk + pub sr_density_threshold: f64, + /// Minimal size of the data we skip when performing sparse reads + pub sr_min_gap_size: u64, + /// Whether deltas are encoded against arbitrary bases + pub general_delta: bool, +} + +impl RevlogDataConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + ) -> Result<Self, HgError> { + let mut data_config = Self::default(); + if let Some(chunk_cache_size) = + config.get_byte_size(b"format", b"chunkcachesize")? + { + data_config.chunk_cache_size = chunk_cache_size; + } + + let memory_profile = config.get_resource_profile(Some("memory")); + if memory_profile.value >= ResourceProfileValue::Medium { + data_config.uncompressed_cache_count = Some(10_000); + data_config.uncompressed_cache_factor = Some(4.0); + if memory_profile.value >= ResourceProfileValue::High { + data_config.uncompressed_cache_factor = Some(10.0) + } + } + + if let Some(mmap_index_threshold) = config + .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")? + { + data_config.mmap_index_threshold = Some(mmap_index_threshold); + } + + let with_sparse_read = + config.get_bool(b"experimental", b"sparse-read")?; + if let Some(sr_density_threshold) = config + .get_f64(b"experimental", b"sparse-read.density-threshold")? + { + data_config.sr_density_threshold = sr_density_threshold; + } + data_config.with_sparse_read = with_sparse_read; + if let Some(sr_min_gap_size) = config + .get_byte_size(b"experimental", b"sparse-read.min-gap-size")? + { + data_config.sr_min_gap_size = sr_min_gap_size; + } + + data_config.with_sparse_read = + requirements.contains(SPARSEREVLOG_REQUIREMENT); + + Ok(data_config) + } +} + +impl Default for RevlogDataConfig { + fn default() -> Self { + Self { + chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE, + sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD, + sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE, + try_pending: Default::default(), + try_split: Default::default(), + check_ambig: Default::default(), + mmap_large_index: Default::default(), + mmap_index_threshold: Default::default(), + uncompressed_cache_factor: Default::default(), + uncompressed_cache_count: Default::default(), + with_sparse_read: Default::default(), + general_delta: Default::default(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +/// Holds configuration values about how new deltas are computed. +/// +/// Some attributes are duplicated from [`RevlogDataConfig`] to help having +/// each object self contained. +pub struct RevlogDeltaConfig { + /// Whether deltas can be encoded against arbitrary bases + pub general_delta: bool, + /// Allow sparse writing of the revlog data + pub sparse_revlog: bool, + /// Maximum length of a delta chain + pub max_chain_len: Option<u64>, + /// Maximum distance between a delta chain's start and end + pub max_deltachain_span: Option<u64>, + /// If `upper_bound_comp` is not None, this is the expected maximal + /// gain from compression for the data content + pub upper_bound_comp: Option<f64>, + /// Should we try a delta against both parents + pub delta_both_parents: bool, + /// Test delta base candidate groups by chunks of this maximal size + pub candidate_group_chunk_size: u64, + /// Should we display debug information about delta computation + pub debug_delta: bool, + /// Trust incoming deltas by default + pub lazy_delta: bool, + /// Trust the base of incoming deltas by default + pub lazy_delta_base: bool, +} + +impl RevlogDeltaConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + revlog_type: RevlogType, + ) -> Result<Self, HgError> { + let mut delta_config = Self { + delta_both_parents: config + .get_option_no_default( + b"storage", + b"revlog.optimize-delta-parent-choice", + )? + .unwrap_or(true), + candidate_group_chunk_size: config + .get_u64( + b"storage", + b"revlog.delta-parent-search.candidate-group-chunk-size", + )? + .unwrap_or_default(), + ..Default::default() + }; + + delta_config.debug_delta = + config.get_bool(b"debug", b"revlog.debug-delta")?; + + delta_config.general_delta = + requirements.contains(GENERALDELTA_REQUIREMENT); + + let lazy_delta = + config.get_bool(b"storage", b"revlog.reuse-external-delta")?; + + if revlog_type == RevlogType::Manifestlog { + // upper bound of what we expect from compression + // (real life value seems to be 3) + delta_config.upper_bound_comp = Some(3.0) + } + + let mut lazy_delta_base = false; + if lazy_delta { + lazy_delta_base = match config.get_option_no_default( + b"storage", + b"revlog.reuse-external-delta-parent", + )? { + Some(base) => base, + None => config.get_bool(b"format", b"generaldelta")?, + }; + } + delta_config.lazy_delta = lazy_delta; + delta_config.lazy_delta_base = lazy_delta_base; + + delta_config.max_deltachain_span = + match config.get_i64(b"experimental", b"maxdeltachainspan")? { + Some(span) => { + if span < 0 { + None + } else { + Some(span as u64) + } + } + None => None, + }; + + delta_config.sparse_revlog = + requirements.contains(SPARSEREVLOG_REQUIREMENT); + + delta_config.max_chain_len = + config.get_byte_size_no_default(b"format", b"maxchainlen")?; + + Ok(delta_config) + } +} + +impl Default for RevlogDeltaConfig { + fn default() -> Self { + Self { + delta_both_parents: true, + lazy_delta: true, + general_delta: Default::default(), + sparse_revlog: Default::default(), + max_chain_len: Default::default(), + max_deltachain_span: Default::default(), + upper_bound_comp: Default::default(), + candidate_group_chunk_size: Default::default(), + debug_delta: Default::default(), + lazy_delta_base: Default::default(), + } + } +} + +#[derive(Debug, Default, Clone, Copy, PartialEq)] +/// Holds configuration values about the available revlog features +pub struct RevlogFeatureConfig { + /// The compression engine and its options + pub compression_engine: CompressionConfig, + /// Can we use censor on this revlog + pub censorable: bool, + /// Does this revlog use the "side data" feature + pub has_side_data: bool, + /// Might remove this configuration once the rank computation has no + /// impact + pub compute_rank: bool, + /// Parent order is supposed to be semantically irrelevant, so we + /// normally re-sort parents to ensure that the first parent is non-null, + /// if there is a non-null parent at all. + /// filelog abuses the parent order as a flag to mark some instances of + /// meta-encoded files, so allow it to disable this behavior. + pub canonical_parent_order: bool, + /// Can ellipsis commit be used + pub enable_ellipsis: bool, +} + +impl RevlogFeatureConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + ) -> Result<Self, HgError> { + Ok(Self { + compression_engine: CompressionConfig::new(config, requirements)?, + enable_ellipsis: requirements.contains(NARROW_REQUIREMENT), + ..Default::default() + }) + } +} + +/// Return the default options for a revlog of `revlog_type` according to the +/// current config and requirements. +pub fn default_revlog_options( + config: &Config, + requirements: &HashSet<String>, + revlog_type: RevlogType, +) -> Result<RevlogOpenOptions, HgError> { + let is_changelog = revlog_type == RevlogType::Changelog; + let version = + if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) { + let compute_rank = config + .get_bool(b"experimental", b"changelog-v2.compute-rank")?; + RevlogVersionOptions::ChangelogV2 { compute_rank } + } else if requirements.contains(REVLOGV2_REQUIREMENT) { + RevlogVersionOptions::V2 + } else if requirements.contains(REVLOGV1_REQUIREMENT) { + RevlogVersionOptions::V1 { + general_delta: requirements.contains(GENERALDELTA_REQUIREMENT), + inline: !is_changelog, + } + } else { + RevlogVersionOptions::V0 + }; + Ok(RevlogOpenOptions { + version, + // We don't need to dance around the slow path like in the Python + // implementation since we know we have access to the fast code. + use_nodemap: requirements.contains(NODEMAP_REQUIREMENT), + delta_config: RevlogDeltaConfig::new( + config, + requirements, + revlog_type, + )?, + data_config: RevlogDataConfig::new(config, requirements)?, + feature_config: RevlogFeatureConfig::new(config, requirements)?, + }) +}
--- a/rust/hg-core/src/update.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/hg-core/src/update.rs Wed Sep 25 18:24:15 2024 +0200 @@ -19,6 +19,7 @@ narrow, node::NULL_NODE, operations::{list_rev_tracked_files, ExpandedManifestEntry}, + options::{default_revlog_options, RevlogOpenOptions}, progress::Progress, repo::Repo, sparse, @@ -28,7 +29,7 @@ path_auditor::PathAuditor, }, vfs::{is_on_nfs_mount, VfsImpl}, - DirstateParents, RevlogError, RevlogOpenOptions, UncheckedRevision, + DirstateParents, RevlogError, UncheckedRevision, }; use crossbeam_channel::{Receiver, Sender}; use rayon::prelude::*; @@ -89,7 +90,11 @@ return Ok(0); } let store_vfs = &repo.store_vfs(); - let options = repo.default_revlog_options(crate::RevlogType::Filelog)?; + let options = default_revlog_options( + repo.config(), + repo.requirements(), + crate::RevlogType::Filelog, + )?; let (errors_sender, errors_receiver) = crossbeam_channel::unbounded(); let (files_sender, files_receiver) = crossbeam_channel::unbounded(); let working_directory_path = &repo.working_directory_path();
--- a/rust/rhg/src/commands/status.rs Wed Sep 25 18:10:03 2024 +0200 +++ b/rust/rhg/src/commands/status.rs Wed Sep 25 18:24:15 2024 +0200 @@ -23,16 +23,17 @@ use hg::manifest::Manifest; use hg::matchers::{AlwaysMatcher, IntersectionMatcher}; use hg::repo::Repo; +use hg::revlog::options::{default_revlog_options, RevlogOpenOptions}; use hg::utils::debug::debug_wait_for_file; use hg::utils::files::{ get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes, }; use hg::utils::hg_path::{hg_path_to_path_buf, HgPath}; +use hg::DirstateStatus; use hg::Revision; use hg::StatusError; use hg::StatusOptions; use hg::{self, narrow, sparse}; -use hg::{DirstateStatus, RevlogOpenOptions}; use hg::{PatternFileWarning, RevlogType}; use log::info; use rayon::prelude::*; @@ -383,8 +384,11 @@ })?; let working_directory_vfs = repo.working_directory_vfs(); let store_vfs = repo.store_vfs(); - let revlog_open_options = - repo.default_revlog_options(RevlogType::Manifestlog)?; + let revlog_open_options = default_revlog_options( + repo.config(), + repo.requirements(), + RevlogType::Manifestlog, + )?; let res: Vec<_> = take(&mut ds_status.unsure) .into_par_iter() .map(|to_check| {