view rust/hg-core/src/revlog/options.rs @ 52300:e2319309bed4

rust-revlog: rename `start` to `data_start` This is more explicit.
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 04 Nov 2024 10:37:42 +0100
parents 84b5802ba7d3
children 33d8cb64e9da
line wrap: on
line source

//! Helpers for the revlog config and opening options

use std::collections::HashSet;

use crate::{
    config::{Config, ResourceProfileValue},
    errors::HgError,
    requirements::{
        CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT,
        NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
        SPARSEREVLOG_REQUIREMENT,
    },
};

use super::{compression::CompressionConfig, RevlogType};

const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536;
const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50;
const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144;

/// The known revlog versions and their options
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum RevlogVersionOptions {
    V0,
    V1 { general_delta: bool, inline: bool },
    V2,
    ChangelogV2 { compute_rank: bool },
}

/// Options to govern how a revlog should be opened, usually from the
/// repository configuration or requirements.
#[derive(Debug, Copy, Clone)]
pub struct RevlogOpenOptions {
    /// The revlog version, along with any option specific to this version
    pub version: RevlogVersionOptions,
    /// Whether the revlog uses a persistent nodemap.
    pub use_nodemap: bool,
    pub delta_config: RevlogDeltaConfig,
    pub data_config: RevlogDataConfig,
    pub feature_config: RevlogFeatureConfig,
}

#[cfg(test)]
impl Default for RevlogOpenOptions {
    fn default() -> Self {
        Self {
            version: RevlogVersionOptions::V1 {
                general_delta: true,
                inline: false,
            },
            use_nodemap: true,
            data_config: Default::default(),
            delta_config: Default::default(),
            feature_config: Default::default(),
        }
    }
}

impl RevlogOpenOptions {
    pub fn new(
        inline: bool,
        data_config: RevlogDataConfig,
        delta_config: RevlogDeltaConfig,
        feature_config: RevlogFeatureConfig,
    ) -> Self {
        Self {
            version: RevlogVersionOptions::V1 {
                general_delta: data_config.general_delta,
                inline,
            },
            use_nodemap: false,
            data_config,
            delta_config,
            feature_config,
        }
    }

    pub fn index_header(&self) -> super::index::IndexHeader {
        super::index::IndexHeader {
            header_bytes: match self.version {
                RevlogVersionOptions::V0 => [0, 0, 0, 0],
                RevlogVersionOptions::V1 {
                    general_delta,
                    inline,
                } => [
                    0,
                    if general_delta && inline {
                        3
                    } else if general_delta {
                        2
                    } else {
                        u8::from(inline)
                    },
                    0,
                    1,
                ],
                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
                    0xD34Du32.to_be_bytes()
                }
            },
        }
    }
}

/// Technically only Linux 2.5.46+ has `MAP_POPULATE` and only `2.6.23` on
/// private mappings, but if you're using such ancient Linux, you have other
/// problems.
#[cfg(target_os = "linux")]
const fn can_populate_mmap() -> bool {
    true
}

/// There is a of populating mmaps for Windows, but it would need testing.
#[cfg(not(target_os = "linux"))]
const fn can_populate_mmap() {
    false
}

#[derive(Debug, Clone, Copy, PartialEq)]
/// Holds configuration values about how the revlog data is read
pub struct RevlogDataConfig {
    /// Should we try to open the "pending" version of the revlog
    pub try_pending: bool,
    /// Should we try to open the "split" version of the revlog
    pub try_split: bool,
    /// When True, `indexfile` should be opened with `checkambig=True` at
    /// writing time, to avoid file stat ambiguity
    pub check_ambig: bool,
    /// If true, use mmap instead of reading to deal with large indexes
    pub mmap_large_index: bool,
    /// How much data is considered large
    pub mmap_index_threshold: Option<u64>,
    /// How much data to read and cache into the raw revlog data cache
    pub chunk_cache_size: u64,
    /// The size of the uncompressed cache compared to the largest revision
    /// seen
    pub uncompressed_cache_factor: Option<f64>,
    /// The number of chunks cached
    pub uncompressed_cache_count: Option<u64>,
    /// Allow sparse reading of the revlog data
    pub with_sparse_read: bool,
    /// Minimal density of a sparse read chunk
    pub sr_density_threshold: f64,
    /// Minimal size of the data we skip when performing sparse reads
    pub sr_min_gap_size: u64,
    /// Whether deltas are encoded against arbitrary bases
    pub general_delta: bool,
}

impl RevlogDataConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
    ) -> Result<Self, HgError> {
        let mut data_config = Self::default();
        if let Some(chunk_cache_size) =
            config.get_byte_size(b"format", b"chunkcachesize")?
        {
            data_config.chunk_cache_size = chunk_cache_size;
        }

        let memory_profile = config.get_resource_profile(Some("memory"));
        if memory_profile.value >= ResourceProfileValue::Medium {
            data_config.uncompressed_cache_count = Some(10_000);
            data_config.uncompressed_cache_factor = Some(4.0);
            if memory_profile.value >= ResourceProfileValue::High {
                data_config.uncompressed_cache_factor = Some(10.0)
            }
        }

        // Use mmap if requested, or by default if we can fully populate it
        let mmap_index = config
            .get_option_no_default(b"storage", b"revlog.mmap.index")?
            .unwrap_or(can_populate_mmap());
        if mmap_index {
            if let Some(mmap_index_threshold) = config.get_byte_size(
                b"storage",
                b"revlog.mmap.index:size-threshold",
            )? {
                // Only mmap if above the requested size threshold
                data_config.mmap_index_threshold = Some(mmap_index_threshold);
            }
        }

        if let Some(mmap_index_threshold) = config
            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
        {
            data_config.mmap_index_threshold = Some(mmap_index_threshold);
        }

        let with_sparse_read =
            config.get_bool(b"experimental", b"sparse-read")?;
        if let Some(sr_density_threshold) = config
            .get_f64(b"experimental", b"sparse-read.density-threshold")?
        {
            data_config.sr_density_threshold = sr_density_threshold;
        }
        data_config.with_sparse_read = with_sparse_read;
        if let Some(sr_min_gap_size) = config
            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
        {
            data_config.sr_min_gap_size = sr_min_gap_size;
        }

        data_config.with_sparse_read =
            requirements.contains(SPARSEREVLOG_REQUIREMENT);

        Ok(data_config)
    }
}

impl Default for RevlogDataConfig {
    fn default() -> Self {
        Self {
            chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE,
            sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD,
            sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE,
            try_pending: Default::default(),
            try_split: Default::default(),
            check_ambig: Default::default(),
            mmap_large_index: Default::default(),
            mmap_index_threshold: Default::default(),
            uncompressed_cache_factor: Default::default(),
            uncompressed_cache_count: Default::default(),
            with_sparse_read: Default::default(),
            general_delta: Default::default(),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
/// Holds configuration values about how new deltas are computed.
///
/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
/// each object self contained.
pub struct RevlogDeltaConfig {
    /// Whether deltas can be encoded against arbitrary bases
    pub general_delta: bool,
    /// Allow sparse writing of the revlog data
    pub sparse_revlog: bool,
    /// Maximum length of a delta chain
    pub max_chain_len: Option<u64>,
    /// Maximum distance between a delta chain's start and end
    pub max_deltachain_span: Option<u64>,
    /// If `upper_bound_comp` is not None, this is the expected maximal
    /// gain from compression for the data content
    pub upper_bound_comp: Option<f64>,
    /// Should we try a delta against both parents
    pub delta_both_parents: bool,
    /// Test delta base candidate groups by chunks of this maximal size
    pub candidate_group_chunk_size: u64,
    /// Should we display debug information about delta computation
    pub debug_delta: bool,
    /// Trust incoming deltas by default
    pub lazy_delta: bool,
    /// Trust the base of incoming deltas by default
    pub lazy_delta_base: bool,
}

impl RevlogDeltaConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
        revlog_type: RevlogType,
    ) -> Result<Self, HgError> {
        let mut delta_config = Self {
            delta_both_parents: config
                .get_option_no_default(
                    b"storage",
                    b"revlog.optimize-delta-parent-choice",
                )?
                .unwrap_or(true),
            candidate_group_chunk_size: config
                .get_u64(
                    b"storage",
                    b"revlog.delta-parent-search.candidate-group-chunk-size",
                )?
                .unwrap_or_default(),
            ..Default::default()
        };

        delta_config.debug_delta =
            config.get_bool(b"debug", b"revlog.debug-delta")?;

        delta_config.general_delta =
            requirements.contains(GENERALDELTA_REQUIREMENT);

        let lazy_delta =
            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;

        if revlog_type == RevlogType::Manifestlog {
            // upper bound of what we expect from compression
            // (real life value seems to be 3)
            delta_config.upper_bound_comp = Some(3.0)
        }

        let mut lazy_delta_base = false;
        if lazy_delta {
            lazy_delta_base = match config.get_option_no_default(
                b"storage",
                b"revlog.reuse-external-delta-parent",
            )? {
                Some(base) => base,
                None => config.get_bool(b"format", b"generaldelta")?,
            };
        }
        delta_config.lazy_delta = lazy_delta;
        delta_config.lazy_delta_base = lazy_delta_base;

        delta_config.max_deltachain_span =
            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
                Some(span) => {
                    if span < 0 {
                        None
                    } else {
                        Some(span as u64)
                    }
                }
                None => None,
            };

        delta_config.sparse_revlog =
            requirements.contains(SPARSEREVLOG_REQUIREMENT);

        delta_config.max_chain_len =
            config.get_byte_size_no_default(b"format", b"maxchainlen")?;

        Ok(delta_config)
    }
}

impl Default for RevlogDeltaConfig {
    fn default() -> Self {
        Self {
            delta_both_parents: true,
            lazy_delta: true,
            general_delta: Default::default(),
            sparse_revlog: Default::default(),
            max_chain_len: Default::default(),
            max_deltachain_span: Default::default(),
            upper_bound_comp: Default::default(),
            candidate_group_chunk_size: Default::default(),
            debug_delta: Default::default(),
            lazy_delta_base: Default::default(),
        }
    }
}

#[derive(Debug, Default, Clone, Copy, PartialEq)]
/// Holds configuration values about the available revlog features
pub struct RevlogFeatureConfig {
    /// The compression engine and its options
    pub compression_engine: CompressionConfig,
    /// Can we use censor on this revlog
    pub censorable: bool,
    /// Does this revlog use the "side data" feature
    pub has_side_data: bool,
    /// Might remove this configuration once the rank computation has no
    /// impact
    pub compute_rank: bool,
    /// Parent order is supposed to be semantically irrelevant, so we
    /// normally re-sort parents to ensure that the first parent is non-null,
    /// if there is a non-null parent at all.
    /// filelog abuses the parent order as a flag to mark some instances of
    /// meta-encoded files, so allow it to disable this behavior.
    pub canonical_parent_order: bool,
    /// Can ellipsis commit be used
    pub enable_ellipsis: bool,
}

impl RevlogFeatureConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
    ) -> Result<Self, HgError> {
        Ok(Self {
            compression_engine: CompressionConfig::new(config, requirements)?,
            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
            ..Default::default()
        })
    }
}

/// Return the default options for a revlog of `revlog_type` according to the
/// current config and requirements.
pub fn default_revlog_options(
    config: &Config,
    requirements: &HashSet<String>,
    revlog_type: RevlogType,
) -> Result<RevlogOpenOptions, HgError> {
    let is_changelog = revlog_type == RevlogType::Changelog;
    let version =
        if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) {
            let compute_rank = config
                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
            RevlogVersionOptions::ChangelogV2 { compute_rank }
        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
            RevlogVersionOptions::V2
        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
            RevlogVersionOptions::V1 {
                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
                inline: !is_changelog,
            }
        } else {
            RevlogVersionOptions::V0
        };
    Ok(RevlogOpenOptions {
        version,
        // We don't need to dance around the slow path like in the Python
        // implementation since we know we have access to the fast code.
        use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
        delta_config: RevlogDeltaConfig::new(
            config,
            requirements,
            revlog_type,
        )?,
        data_config: RevlogDataConfig::new(config, requirements)?,
        feature_config: RevlogFeatureConfig::new(config, requirements)?,
    })
}