rust-revlog: add revlog-specific config objects
authorRaphaël Gomès <rgomes@octobus.net>
Wed, 19 Jun 2024 12:25:12 +0200
changeset 51865 0604673428b7
parent 51864 1c5810ce737e
child 51866 09ece563609a
rust-revlog: add revlog-specific config objects These will be used by the upcoming Rust `InnerRevlog` to better centralize config information that is relevant to revlogs.
rust/hg-core/src/revlog/mod.rs
--- a/rust/hg-core/src/revlog/mod.rs	Thu Sep 12 16:27:58 2024 -0400
+++ b/rust/hg-core/src/revlog/mod.rs	Wed Jun 19 12:25:12 2024 +0200
@@ -17,6 +17,7 @@
 pub mod patch;
 
 use std::borrow::Cow;
+use std::collections::HashSet;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
@@ -31,7 +32,12 @@
 use super::index::Index;
 use super::index::INDEX_ENTRY_SIZE;
 use super::nodemap::{NodeMap, NodeMapError};
+use crate::config::{Config, ResourceProfileValue};
 use crate::errors::HgError;
+use crate::exit_codes;
+use crate::requirements::{
+    GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
+};
 use crate::vfs::Vfs;
 
 /// As noted in revlog.c, revision numbers are actually encoded in
@@ -217,6 +223,383 @@
     }
 }
 
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum RevlogType {
+    Changelog,
+    Manifestlog,
+    Filelog,
+}
+
+impl TryFrom<usize> for RevlogType {
+    type Error = HgError;
+
+    fn try_from(value: usize) -> Result<Self, Self::Error> {
+        match value {
+            1001 => Ok(Self::Changelog),
+            1002 => Ok(Self::Manifestlog),
+            1003 => Ok(Self::Filelog),
+            t => Err(HgError::abort(
+                format!("Unknown revlog type {}", t),
+                exit_codes::ABORT,
+                None,
+            )),
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum CompressionEngine {
+    Zlib {
+        /// Between 0 and 9 included
+        level: u32,
+    },
+    Zstd {
+        /// Between 0 and 22 included
+        level: u32,
+        /// Never used in practice for now
+        threads: u32,
+    },
+    /// No compression is performed
+    None,
+}
+impl CompressionEngine {
+    pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
+        match self {
+            CompressionEngine::Zlib { level } => {
+                if new_level > 9 {
+                    return Err(HgError::abort(
+                        format!(
+                            "invalid compression zlib compression level {}",
+                            new_level
+                        ),
+                        exit_codes::ABORT,
+                        None,
+                    ));
+                }
+                *level = new_level as u32;
+            }
+            CompressionEngine::Zstd { level, .. } => {
+                if new_level > 22 {
+                    return Err(HgError::abort(
+                        format!(
+                            "invalid compression zstd compression level {}",
+                            new_level
+                        ),
+                        exit_codes::ABORT,
+                        None,
+                    ));
+                }
+                *level = new_level as u32;
+            }
+            CompressionEngine::None => {}
+        }
+        Ok(())
+    }
+
+    pub fn zstd(
+        zstd_level: Option<u32>,
+    ) -> Result<CompressionEngine, HgError> {
+        let mut engine = CompressionEngine::Zstd {
+            level: 3,
+            threads: 0,
+        };
+        if let Some(level) = zstd_level {
+            engine.set_level(level as usize)?;
+        }
+        Ok(engine)
+    }
+}
+
+impl Default for CompressionEngine {
+    fn default() -> Self {
+        Self::Zlib { level: 6 }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how the revlog data is read
+pub struct RevlogDataConfig {
+    /// Should we try to open the "pending" version of the revlog
+    pub try_pending: bool,
+    /// Should we try to open the "split" version of the revlog
+    pub try_split: bool,
+    /// When True, `indexfile` should be opened with `checkambig=True` at
+    /// writing time, to avoid file stat ambiguity
+    pub check_ambig: bool,
+    /// If true, use mmap instead of reading to deal with large indexes
+    pub mmap_large_index: bool,
+    /// How much data is considered large
+    pub mmap_index_threshold: Option<u64>,
+    /// How much data to read and cache into the raw revlog data cache
+    pub chunk_cache_size: u64,
+    /// The size of the uncompressed cache compared to the largest revision
+    /// seen
+    pub uncompressed_cache_factor: Option<f64>,
+    /// The number of chunks cached
+    pub uncompressed_cache_count: Option<u64>,
+    /// Allow sparse reading of the revlog data
+    pub with_sparse_read: bool,
+    /// Minimal density of a sparse read chunk
+    pub sr_density_threshold: f64,
+    /// Minimal size of the data we skip when performing sparse reads
+    pub sr_min_gap_size: u64,
+    /// Whether deltas are encoded against arbitrary bases
+    pub general_delta: bool,
+}
+
+impl RevlogDataConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        let mut data_config = Self::default();
+        if let Some(chunk_cache_size) =
+            config.get_byte_size(b"format", b"chunkcachesize")?
+        {
+            data_config.chunk_cache_size = chunk_cache_size;
+        }
+
+        let memory_profile = config.get_resource_profile(Some("memory"));
+        if memory_profile.value >= ResourceProfileValue::Medium {
+            data_config.uncompressed_cache_count = Some(10_000);
+            data_config.uncompressed_cache_factor = Some(4.0);
+            if memory_profile.value >= ResourceProfileValue::High {
+                data_config.uncompressed_cache_factor = Some(10.0)
+            }
+        }
+
+        if let Some(mmap_index_threshold) =
+            config.get_byte_size(b"experimental", b"mmapindexthreshold")?
+        {
+            data_config.mmap_index_threshold = Some(mmap_index_threshold);
+        }
+
+        let with_sparse_read =
+            config.get_bool(b"experimental", b"sparse-read")?;
+        if let Some(sr_density_threshold) = config
+            .get_f64(b"experimental", b"sparse-read.density-threshold")?
+        {
+            data_config.sr_density_threshold = sr_density_threshold;
+        }
+        data_config.with_sparse_read = with_sparse_read;
+        if let Some(sr_min_gap_size) = config
+            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
+        {
+            data_config.sr_min_gap_size = sr_min_gap_size;
+        }
+
+        data_config.with_sparse_read =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        Ok(data_config)
+    }
+}
+
+impl Default for RevlogDataConfig {
+    fn default() -> Self {
+        Self {
+            chunk_cache_size: 65536,
+            sr_density_threshold: 0.50,
+            sr_min_gap_size: 262144,
+            try_pending: Default::default(),
+            try_split: Default::default(),
+            check_ambig: Default::default(),
+            mmap_large_index: Default::default(),
+            mmap_index_threshold: Default::default(),
+            uncompressed_cache_factor: Default::default(),
+            uncompressed_cache_count: Default::default(),
+            with_sparse_read: Default::default(),
+            general_delta: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how new deltas are computed.
+///
+/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
+/// each object self contained.
+pub struct RevlogDeltaConfig {
+    /// Whether deltas can be encoded against arbitrary bases
+    pub general_delta: bool,
+    /// Allow sparse writing of the revlog data
+    pub sparse_revlog: bool,
+    /// Maximum length of a delta chain
+    pub max_chain_len: Option<u64>,
+    /// Maximum distance between a delta chain's start and end
+    pub max_deltachain_span: Option<u64>,
+    /// If `upper_bound_comp` is not None, this is the expected maximal
+    /// gain from compression for the data content
+    pub upper_bound_comp: Option<f64>,
+    /// Should we try a delta against both parents
+    pub delta_both_parents: bool,
+    /// Test delta base candidate groups by chunks of this maximal size
+    pub candidate_group_chunk_size: u64,
+    /// Should we display debug information about delta computation
+    pub debug_delta: bool,
+    /// Trust incoming deltas by default
+    pub lazy_delta: bool,
+    /// Trust the base of incoming deltas by default
+    pub lazy_delta_base: bool,
+}
+impl RevlogDeltaConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+        revlog_type: RevlogType,
+    ) -> Result<Self, HgError> {
+        let mut delta_config = Self {
+            delta_both_parents: config
+                .get_option_no_default(
+                    b"storage",
+                    b"revlog.optimize-delta-parent-choice",
+                )?
+                .unwrap_or(true),
+            candidate_group_chunk_size: config
+                .get_u64(
+                    b"storage",
+                    b"revlog.delta-parent-search.candidate-group-chunk-size",
+                )?
+                .unwrap_or_default(),
+            ..Default::default()
+        };
+
+        delta_config.debug_delta =
+            config.get_bool(b"debug", b"revlog.debug-delta")?;
+
+        delta_config.general_delta =
+            requirements.contains(GENERALDELTA_REQUIREMENT);
+
+        let lazy_delta =
+            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
+
+        if revlog_type == RevlogType::Manifestlog {
+            // upper bound of what we expect from compression
+            // (real life value seems to be 3)
+            delta_config.upper_bound_comp = Some(3.0)
+        }
+
+        let mut lazy_delta_base = false;
+        if lazy_delta {
+            lazy_delta_base = match config.get_option_no_default(
+                b"storage",
+                b"revlog.reuse-external-delta-parent",
+            )? {
+                Some(base) => base,
+                None => config.get_bool(b"format", b"generaldelta")?,
+            };
+        }
+        delta_config.lazy_delta = lazy_delta;
+        delta_config.lazy_delta_base = lazy_delta_base;
+
+        delta_config.max_deltachain_span =
+            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
+                Some(span) => {
+                    if span < 0 {
+                        None
+                    } else {
+                        Some(span as u64)
+                    }
+                }
+                None => None,
+            };
+
+        delta_config.sparse_revlog =
+            requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+        delta_config.max_chain_len =
+            config.get_byte_size_no_default(b"format", b"maxchainlen")?;
+
+        Ok(delta_config)
+    }
+}
+
+impl Default for RevlogDeltaConfig {
+    fn default() -> Self {
+        Self {
+            delta_both_parents: true,
+            lazy_delta: true,
+            general_delta: Default::default(),
+            sparse_revlog: Default::default(),
+            max_chain_len: Default::default(),
+            max_deltachain_span: Default::default(),
+            upper_bound_comp: Default::default(),
+            candidate_group_chunk_size: Default::default(),
+            debug_delta: Default::default(),
+            lazy_delta_base: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy, PartialEq)]
+/// Holds configuration values about the available revlog features
+pub struct RevlogFeatureConfig {
+    /// The compression engine and its options
+    pub compression_engine: CompressionEngine,
+    /// Can we use censor on this revlog
+    pub censorable: bool,
+    /// Does this revlog use the "side data" feature
+    pub has_side_data: bool,
+    /// Might remove this configuration once the rank computation has no
+    /// impact
+    pub compute_rank: bool,
+    /// Parent order is supposed to be semantically irrelevant, so we
+    /// normally re-sort parents to ensure that the first parent is non-null,
+    /// if there is a non-null parent at all.
+    /// filelog abuses the parent order as a flag to mark some instances of
+    /// meta-encoded files, so allow it to disable this behavior.
+    pub canonical_parent_order: bool,
+    /// Can ellipsis commit be used
+    pub enable_ellipsis: bool,
+}
+impl RevlogFeatureConfig {
+    pub fn new(
+        config: &Config,
+        requirements: &HashSet<String>,
+    ) -> Result<Self, HgError> {
+        let mut feature_config = Self::default();
+
+        let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
+        let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
+
+        feature_config.compression_engine = CompressionEngine::default();
+
+        for requirement in requirements {
+            if requirement.starts_with("revlog-compression-")
+                || requirement.starts_with("exp-compression-")
+            {
+                let split = &mut requirement.splitn(3, '-');
+                split.next();
+                split.next();
+                feature_config.compression_engine = match split.next().unwrap()
+                {
+                    "zstd" => CompressionEngine::zstd(zstd_level)?,
+                    e => {
+                        return Err(HgError::UnsupportedFeature(format!(
+                            "Unsupported compression engine '{e}'"
+                        )))
+                    }
+                };
+            }
+        }
+        if let Some(level) = zlib_level {
+            if matches!(
+                feature_config.compression_engine,
+                CompressionEngine::Zlib { .. }
+            ) {
+                feature_config
+                    .compression_engine
+                    .set_level(level as usize)?;
+            }
+        }
+
+        feature_config.enable_ellipsis =
+            requirements.contains(NARROW_REQUIREMENT);
+
+        Ok(feature_config)
+    }
+}
+
 /// Read only implementation of revlog.
 pub struct Revlog {
     /// When index and data are not interleaved: bytes of the revlog index.