Mercurial > hg
changeset 51865:0604673428b7
rust-revlog: add revlog-specific config objects
These will be used by the upcoming Rust `InnerRevlog` to better centralize
config information that is relevant to revlogs.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 19 Jun 2024 12:25:12 +0200 |
parents | 1c5810ce737e |
children | 09ece563609a |
files | rust/hg-core/src/revlog/mod.rs |
diffstat | 1 files changed, 383 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/mod.rs Thu Sep 12 16:27:58 2024 -0400 +++ b/rust/hg-core/src/revlog/mod.rs Wed Jun 19 12:25:12 2024 +0200 @@ -17,6 +17,7 @@ pub mod patch; use std::borrow::Cow; +use std::collections::HashSet; use std::io::Read; use std::ops::Deref; use std::path::Path; @@ -31,7 +32,12 @@ use super::index::Index; use super::index::INDEX_ENTRY_SIZE; use super::nodemap::{NodeMap, NodeMapError}; +use crate::config::{Config, ResourceProfileValue}; use crate::errors::HgError; +use crate::exit_codes; +use crate::requirements::{ + GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, +}; use crate::vfs::Vfs; /// As noted in revlog.c, revision numbers are actually encoded in @@ -217,6 +223,383 @@ } } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum RevlogType { + Changelog, + Manifestlog, + Filelog, +} + +impl TryFrom<usize> for RevlogType { + type Error = HgError; + + fn try_from(value: usize) -> Result<Self, Self::Error> { + match value { + 1001 => Ok(Self::Changelog), + 1002 => Ok(Self::Manifestlog), + 1003 => Ok(Self::Filelog), + t => Err(HgError::abort( + format!("Unknown revlog type {}", t), + exit_codes::ABORT, + None, + )), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum CompressionEngine { + Zlib { + /// Between 0 and 9 included + level: u32, + }, + Zstd { + /// Between 0 and 22 included + level: u32, + /// Never used in practice for now + threads: u32, + }, + /// No compression is performed + None, +} +impl CompressionEngine { + pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> { + match self { + CompressionEngine::Zlib { level } => { + if new_level > 9 { + return Err(HgError::abort( + format!( + "invalid compression zlib compression level {}", + new_level + ), + exit_codes::ABORT, + None, + )); + } + *level = new_level as u32; + } + CompressionEngine::Zstd { level, .. } => { + if new_level > 22 { + return Err(HgError::abort( + format!( + "invalid compression zstd compression level {}", + new_level + ), + exit_codes::ABORT, + None, + )); + } + *level = new_level as u32; + } + CompressionEngine::None => {} + } + Ok(()) + } + + pub fn zstd( + zstd_level: Option<u32>, + ) -> Result<CompressionEngine, HgError> { + let mut engine = CompressionEngine::Zstd { + level: 3, + threads: 0, + }; + if let Some(level) = zstd_level { + engine.set_level(level as usize)?; + } + Ok(engine) + } +} + +impl Default for CompressionEngine { + fn default() -> Self { + Self::Zlib { level: 6 } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +/// Holds configuration values about how the revlog data is read +pub struct RevlogDataConfig { + /// Should we try to open the "pending" version of the revlog + pub try_pending: bool, + /// Should we try to open the "split" version of the revlog + pub try_split: bool, + /// When True, `indexfile` should be opened with `checkambig=True` at + /// writing time, to avoid file stat ambiguity + pub check_ambig: bool, + /// If true, use mmap instead of reading to deal with large indexes + pub mmap_large_index: bool, + /// How much data is considered large + pub mmap_index_threshold: Option<u64>, + /// How much data to read and cache into the raw revlog data cache + pub chunk_cache_size: u64, + /// The size of the uncompressed cache compared to the largest revision + /// seen + pub uncompressed_cache_factor: Option<f64>, + /// The number of chunks cached + pub uncompressed_cache_count: Option<u64>, + /// Allow sparse reading of the revlog data + pub with_sparse_read: bool, + /// Minimal density of a sparse read chunk + pub sr_density_threshold: f64, + /// Minimal size of the data we skip when performing sparse reads + pub sr_min_gap_size: u64, + /// Whether deltas are encoded against arbitrary bases + pub general_delta: bool, +} + +impl RevlogDataConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + ) -> Result<Self, HgError> { + let mut data_config = Self::default(); + if let Some(chunk_cache_size) = + config.get_byte_size(b"format", b"chunkcachesize")? + { + data_config.chunk_cache_size = chunk_cache_size; + } + + let memory_profile = config.get_resource_profile(Some("memory")); + if memory_profile.value >= ResourceProfileValue::Medium { + data_config.uncompressed_cache_count = Some(10_000); + data_config.uncompressed_cache_factor = Some(4.0); + if memory_profile.value >= ResourceProfileValue::High { + data_config.uncompressed_cache_factor = Some(10.0) + } + } + + if let Some(mmap_index_threshold) = + config.get_byte_size(b"experimental", b"mmapindexthreshold")? + { + data_config.mmap_index_threshold = Some(mmap_index_threshold); + } + + let with_sparse_read = + config.get_bool(b"experimental", b"sparse-read")?; + if let Some(sr_density_threshold) = config + .get_f64(b"experimental", b"sparse-read.density-threshold")? + { + data_config.sr_density_threshold = sr_density_threshold; + } + data_config.with_sparse_read = with_sparse_read; + if let Some(sr_min_gap_size) = config + .get_byte_size(b"experimental", b"sparse-read.min-gap-size")? + { + data_config.sr_min_gap_size = sr_min_gap_size; + } + + data_config.with_sparse_read = + requirements.contains(SPARSEREVLOG_REQUIREMENT); + + Ok(data_config) + } +} + +impl Default for RevlogDataConfig { + fn default() -> Self { + Self { + chunk_cache_size: 65536, + sr_density_threshold: 0.50, + sr_min_gap_size: 262144, + try_pending: Default::default(), + try_split: Default::default(), + check_ambig: Default::default(), + mmap_large_index: Default::default(), + mmap_index_threshold: Default::default(), + uncompressed_cache_factor: Default::default(), + uncompressed_cache_count: Default::default(), + with_sparse_read: Default::default(), + general_delta: Default::default(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +/// Holds configuration values about how new deltas are computed. +/// +/// Some attributes are duplicated from [`RevlogDataConfig`] to help having +/// each object self contained. +pub struct RevlogDeltaConfig { + /// Whether deltas can be encoded against arbitrary bases + pub general_delta: bool, + /// Allow sparse writing of the revlog data + pub sparse_revlog: bool, + /// Maximum length of a delta chain + pub max_chain_len: Option<u64>, + /// Maximum distance between a delta chain's start and end + pub max_deltachain_span: Option<u64>, + /// If `upper_bound_comp` is not None, this is the expected maximal + /// gain from compression for the data content + pub upper_bound_comp: Option<f64>, + /// Should we try a delta against both parents + pub delta_both_parents: bool, + /// Test delta base candidate groups by chunks of this maximal size + pub candidate_group_chunk_size: u64, + /// Should we display debug information about delta computation + pub debug_delta: bool, + /// Trust incoming deltas by default + pub lazy_delta: bool, + /// Trust the base of incoming deltas by default + pub lazy_delta_base: bool, +} +impl RevlogDeltaConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + revlog_type: RevlogType, + ) -> Result<Self, HgError> { + let mut delta_config = Self { + delta_both_parents: config + .get_option_no_default( + b"storage", + b"revlog.optimize-delta-parent-choice", + )? + .unwrap_or(true), + candidate_group_chunk_size: config + .get_u64( + b"storage", + b"revlog.delta-parent-search.candidate-group-chunk-size", + )? + .unwrap_or_default(), + ..Default::default() + }; + + delta_config.debug_delta = + config.get_bool(b"debug", b"revlog.debug-delta")?; + + delta_config.general_delta = + requirements.contains(GENERALDELTA_REQUIREMENT); + + let lazy_delta = + config.get_bool(b"storage", b"revlog.reuse-external-delta")?; + + if revlog_type == RevlogType::Manifestlog { + // upper bound of what we expect from compression + // (real life value seems to be 3) + delta_config.upper_bound_comp = Some(3.0) + } + + let mut lazy_delta_base = false; + if lazy_delta { + lazy_delta_base = match config.get_option_no_default( + b"storage", + b"revlog.reuse-external-delta-parent", + )? { + Some(base) => base, + None => config.get_bool(b"format", b"generaldelta")?, + }; + } + delta_config.lazy_delta = lazy_delta; + delta_config.lazy_delta_base = lazy_delta_base; + + delta_config.max_deltachain_span = + match config.get_i64(b"experimental", b"maxdeltachainspan")? { + Some(span) => { + if span < 0 { + None + } else { + Some(span as u64) + } + } + None => None, + }; + + delta_config.sparse_revlog = + requirements.contains(SPARSEREVLOG_REQUIREMENT); + + delta_config.max_chain_len = + config.get_byte_size_no_default(b"format", b"maxchainlen")?; + + Ok(delta_config) + } +} + +impl Default for RevlogDeltaConfig { + fn default() -> Self { + Self { + delta_both_parents: true, + lazy_delta: true, + general_delta: Default::default(), + sparse_revlog: Default::default(), + max_chain_len: Default::default(), + max_deltachain_span: Default::default(), + upper_bound_comp: Default::default(), + candidate_group_chunk_size: Default::default(), + debug_delta: Default::default(), + lazy_delta_base: Default::default(), + } + } +} + +#[derive(Debug, Default, Clone, Copy, PartialEq)] +/// Holds configuration values about the available revlog features +pub struct RevlogFeatureConfig { + /// The compression engine and its options + pub compression_engine: CompressionEngine, + /// Can we use censor on this revlog + pub censorable: bool, + /// Does this revlog use the "side data" feature + pub has_side_data: bool, + /// Might remove this configuration once the rank computation has no + /// impact + pub compute_rank: bool, + /// Parent order is supposed to be semantically irrelevant, so we + /// normally re-sort parents to ensure that the first parent is non-null, + /// if there is a non-null parent at all. + /// filelog abuses the parent order as a flag to mark some instances of + /// meta-encoded files, so allow it to disable this behavior. + pub canonical_parent_order: bool, + /// Can ellipsis commit be used + pub enable_ellipsis: bool, +} +impl RevlogFeatureConfig { + pub fn new( + config: &Config, + requirements: &HashSet<String>, + ) -> Result<Self, HgError> { + let mut feature_config = Self::default(); + + let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?; + let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?; + + feature_config.compression_engine = CompressionEngine::default(); + + for requirement in requirements { + if requirement.starts_with("revlog-compression-") + || requirement.starts_with("exp-compression-") + { + let split = &mut requirement.splitn(3, '-'); + split.next(); + split.next(); + feature_config.compression_engine = match split.next().unwrap() + { + "zstd" => CompressionEngine::zstd(zstd_level)?, + e => { + return Err(HgError::UnsupportedFeature(format!( + "Unsupported compression engine '{e}'" + ))) + } + }; + } + } + if let Some(level) = zlib_level { + if matches!( + feature_config.compression_engine, + CompressionEngine::Zlib { .. } + ) { + feature_config + .compression_engine + .set_level(level as usize)?; + } + } + + feature_config.enable_ellipsis = + requirements.contains(NARROW_REQUIREMENT); + + Ok(feature_config) + } +} + /// Read only implementation of revlog. pub struct Revlog { /// When index and data are not interleaved: bytes of the revlog index.