# HG changeset patch # User Raphaël Gomès # Date 1658237865 -7200 # Node ID ffd4b1f1c9cb4a96dd7b8144c3b8299a0c3ac5d8 # Parent 85f5d11c77ddb25132782c678eb4af986744051a rhg: add sparse support diff -r 85f5d11c77dd -r ffd4b1f1c9cb rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs Tue Jul 19 15:37:09 2022 +0200 +++ b/rust/hg-core/src/lib.rs Tue Jul 19 15:37:45 2022 +0200 @@ -7,6 +7,7 @@ mod ancestors; pub mod dagops; pub mod errors; +pub mod sparse; pub use ancestors::{AncestorsIterator, MissingAncestors}; pub mod dirstate; pub mod dirstate_tree; diff -r 85f5d11c77dd -r ffd4b1f1c9cb rust/hg-core/src/sparse.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/sparse.rs Tue Jul 19 15:37:45 2022 +0200 @@ -0,0 +1,333 @@ +use std::{collections::HashSet, path::Path}; + +use format_bytes::{write_bytes, DisplayBytes}; + +use crate::{ + errors::HgError, + filepatterns::parse_pattern_file_contents, + matchers::{ + AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher, + UnionMatcher, + }, + operations::cat, + repo::Repo, + requirements::SPARSE_REQUIREMENT, + utils::{hg_path::HgPath, SliceExt}, + IgnorePattern, PatternError, PatternFileWarning, PatternSyntax, Revision, + NULL_REVISION, +}; + +/// Command which is triggering the config read +#[derive(Copy, Clone, Debug)] +pub enum SparseConfigContext { + Sparse, + Narrow, +} + +impl DisplayBytes for SparseConfigContext { + fn display_bytes( + &self, + output: &mut dyn std::io::Write, + ) -> std::io::Result<()> { + match self { + SparseConfigContext::Sparse => write_bytes!(output, b"sparse"), + SparseConfigContext::Narrow => write_bytes!(output, b"narrow"), + } + } +} + +/// Possible warnings when reading sparse configuration +#[derive(Debug, derive_more::From)] +pub enum SparseWarning { + /// Warns about improper paths that start with "/" + RootWarning { + context: SparseConfigContext, + line: Vec, + }, + /// Warns about a profile missing from the given changelog revision + ProfileNotFound { profile: Vec, rev: Revision }, + #[from] + Pattern(PatternFileWarning), +} + +/// Parsed sparse config +#[derive(Debug, Default)] +pub struct SparseConfig { + // Line-separated + includes: Vec, + // Line-separated + excludes: Vec, + profiles: HashSet>, + warnings: Vec, +} + +/// All possible errors when reading sparse config +#[derive(Debug, derive_more::From)] +pub enum SparseConfigError { + IncludesAfterExcludes { + context: SparseConfigContext, + }, + EntryOutsideSection { + context: SparseConfigContext, + line: Vec, + }, + #[from] + HgError(HgError), + #[from] + PatternError(PatternError), +} + +/// Parse sparse config file content. +fn parse_config( + raw: &[u8], + context: SparseConfigContext, +) -> Result { + let mut includes = vec![]; + let mut excludes = vec![]; + let mut profiles = HashSet::new(); + let mut warnings = vec![]; + + #[derive(PartialEq, Eq)] + enum Current { + Includes, + Excludes, + None, + }; + + let mut current = Current::None; + let mut in_section = false; + + for line in raw.split(|c| *c == b'\n') { + let line = line.trim(); + if line.is_empty() || line[0] == b'#' { + // empty or comment line, skip + continue; + } + if line.starts_with(b"%include ") { + let profile = line[b"%include ".len()..].trim(); + if !profile.is_empty() { + profiles.insert(profile.into()); + } + } else if line == b"[include]" { + if in_section && current == Current::Includes { + return Err(SparseConfigError::IncludesAfterExcludes { + context, + }); + } + in_section = true; + current = Current::Includes; + continue; + } else if line == b"[exclude]" { + in_section = true; + current = Current::Excludes; + } else { + if current == Current::None { + return Err(SparseConfigError::EntryOutsideSection { + context, + line: line.into(), + }); + } + if line.trim().starts_with(b"/") { + warnings.push(SparseWarning::RootWarning { + context, + line: line.into(), + }); + continue; + } + match current { + Current::Includes => { + includes.push(b'\n'); + includes.extend(line.iter()); + } + Current::Excludes => { + excludes.push(b'\n'); + excludes.extend(line.iter()); + } + Current::None => unreachable!(), + } + } + } + + Ok(SparseConfig { + includes, + excludes, + profiles, + warnings, + }) +} + +fn read_temporary_includes( + repo: &Repo, +) -> Result>, SparseConfigError> { + let raw = repo.hg_vfs().try_read("tempsparse")?.unwrap_or(vec![]); + if raw.is_empty() { + return Ok(vec![]); + } + Ok(raw.split(|c| *c == b'\n').map(ToOwned::to_owned).collect()) +} + +/// Obtain sparse checkout patterns for the given revision +fn patterns_for_rev( + repo: &Repo, + rev: Revision, +) -> Result, SparseConfigError> { + if !repo.has_sparse() { + return Ok(None); + } + let raw = repo.hg_vfs().try_read("sparse")?.unwrap_or(vec![]); + + if raw.is_empty() { + return Ok(None); + } + + let mut config = parse_config(&raw, SparseConfigContext::Sparse)?; + + if !config.profiles.is_empty() { + let mut profiles: Vec> = config.profiles.into_iter().collect(); + let mut visited = HashSet::new(); + + while let Some(profile) = profiles.pop() { + if visited.contains(&profile) { + continue; + } + visited.insert(profile.to_owned()); + + let output = + cat(repo, &rev.to_string(), vec![HgPath::new(&profile)]) + .map_err(|_| { + HgError::corrupted(format!( + "dirstate points to non-existent parent node" + )) + })?; + if output.results.is_empty() { + config.warnings.push(SparseWarning::ProfileNotFound { + profile: profile.to_owned(), + rev, + }) + } + + let subconfig = parse_config( + &output.results[0].1, + SparseConfigContext::Sparse, + )?; + if !subconfig.includes.is_empty() { + config.includes.push(b'\n'); + config.includes.extend(&subconfig.includes); + } + if !subconfig.includes.is_empty() { + config.includes.push(b'\n'); + config.excludes.extend(&subconfig.excludes); + } + config.warnings.extend(subconfig.warnings.into_iter()); + profiles.extend(subconfig.profiles.into_iter()); + } + + config.profiles = visited; + } + + if !config.includes.is_empty() { + config.includes.extend(b"\n.hg*"); + } + + Ok(Some(config)) +} + +/// Obtain a matcher for sparse working directories. +pub fn matcher( + repo: &Repo, +) -> Result<(Box, Vec), SparseConfigError> { + let mut warnings = vec![]; + if !repo.requirements().contains(SPARSE_REQUIREMENT) { + return Ok((Box::new(AlwaysMatcher), warnings)); + } + + let parents = repo.dirstate_parents()?; + let mut revs = vec![]; + let p1_rev = + repo.changelog()? + .rev_from_node(parents.p1.into()) + .map_err(|_| { + HgError::corrupted(format!( + "dirstate points to non-existent parent node" + )) + })?; + if p1_rev != NULL_REVISION { + revs.push(p1_rev) + } + let p2_rev = + repo.changelog()? + .rev_from_node(parents.p2.into()) + .map_err(|_| { + HgError::corrupted(format!( + "dirstate points to non-existent parent node" + )) + })?; + if p2_rev != NULL_REVISION { + revs.push(p2_rev) + } + let mut matchers = vec![]; + + for rev in revs.iter() { + let config = patterns_for_rev(repo, *rev); + if let Ok(Some(config)) = config { + warnings.extend(config.warnings); + let mut m: Box = Box::new(AlwaysMatcher); + if !config.includes.is_empty() { + let (patterns, subwarnings) = parse_pattern_file_contents( + &config.includes, + Path::new(""), + Some(b"relglob:".as_ref()), + false, + )?; + warnings.extend(subwarnings.into_iter().map(From::from)); + m = Box::new(IncludeMatcher::new(patterns)?); + } + if !config.excludes.is_empty() { + let (patterns, subwarnings) = parse_pattern_file_contents( + &config.excludes, + Path::new(""), + Some(b"relglob:".as_ref()), + false, + )?; + warnings.extend(subwarnings.into_iter().map(From::from)); + m = Box::new(DifferenceMatcher::new( + m, + Box::new(IncludeMatcher::new(patterns)?), + )); + } + matchers.push(m); + } + } + let result: Box = match matchers.len() { + 0 => Box::new(AlwaysMatcher), + 1 => matchers.pop().expect("1 is equal to 0"), + _ => Box::new(UnionMatcher::new(matchers)), + }; + + let matcher = + force_include_matcher(result, &read_temporary_includes(repo)?)?; + Ok((matcher, warnings)) +} + +/// Returns a matcher that returns true for any of the forced includes before +/// testing against the actual matcher +fn force_include_matcher( + result: Box, + temp_includes: &[Vec], +) -> Result, PatternError> { + if temp_includes.is_empty() { + return Ok(result); + } + let forced_include_matcher = IncludeMatcher::new( + temp_includes + .into_iter() + .map(|include| { + IgnorePattern::new(PatternSyntax::Path, include, Path::new("")) + }) + .collect(), + )?; + Ok(Box::new(UnionMatcher::new(vec![ + Box::new(forced_include_matcher), + result, + ]))) +} diff -r 85f5d11c77dd -r ffd4b1f1c9cb rust/hg-core/src/vfs.rs --- a/rust/hg-core/src/vfs.rs Tue Jul 19 15:37:09 2022 +0200 +++ b/rust/hg-core/src/vfs.rs Tue Jul 19 15:37:45 2022 +0200 @@ -40,6 +40,23 @@ std::fs::read(&path).when_reading_file(&path) } + /// Returns `Ok(None)` if the file does not exist. + pub fn try_read( + &self, + relative_path: impl AsRef, + ) -> Result>, HgError> { + match self.read(relative_path) { + Err(e) => match &e { + HgError::IoError { error, .. } => match error.kind() { + ErrorKind::NotFound => return Ok(None), + _ => Err(e), + }, + _ => Err(e), + }, + Ok(v) => Ok(Some(v)), + } + } + fn mmap_open_gen( &self, relative_path: impl AsRef, diff -r 85f5d11c77dd -r ffd4b1f1c9cb rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs Tue Jul 19 15:37:09 2022 +0200 +++ b/rust/rhg/src/commands/status.rs Tue Jul 19 15:37:45 2022 +0200 @@ -18,8 +18,8 @@ use hg::errors::{HgError, IoResultExt}; use hg::lock::LockError; use hg::manifest::Manifest; -use hg::matchers::AlwaysMatcher; use hg::repo::Repo; +use hg::sparse::{matcher, SparseWarning}; use hg::utils::files::get_bytes_from_os_string; use hg::utils::files::get_bytes_from_path; use hg::utils::files::get_path_from_bytes; @@ -251,9 +251,9 @@ }; } - if repo.has_sparse() || repo.has_narrow() { + if repo.has_narrow() { return Err(CommandError::unsupported( - "rhg status is not supported for sparse checkouts or narrow clones yet" + "rhg status is not supported for narrow clones yet", )); } @@ -366,9 +366,36 @@ filesystem_time_at_status_start, )) }; + let (matcher, sparse_warnings) = matcher(repo)?; + + for warning in sparse_warnings { + match &warning { + SparseWarning::RootWarning { context, line } => { + let msg = format_bytes!( + b"warning: {} profile cannot use paths \" + starting with /, ignoring {}\n", + context, + line + ); + ui.write_stderr(&msg)?; + } + SparseWarning::ProfileNotFound { profile, rev } => { + let msg = format_bytes!( + b"warning: sparse profile '{}' not found \" + in rev {} - ignoring it\n", + profile, + rev + ); + ui.write_stderr(&msg)?; + } + SparseWarning::Pattern(e) => { + ui.write_stderr(&print_pattern_file_warning(e, &repo))?; + } + } + } let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) = dmap.with_status( - &AlwaysMatcher, + matcher.as_ref(), repo.working_directory_path().to_owned(), ignore_files(repo, config), options, diff -r 85f5d11c77dd -r ffd4b1f1c9cb rust/rhg/src/error.rs --- a/rust/rhg/src/error.rs Tue Jul 19 15:37:09 2022 +0200 +++ b/rust/rhg/src/error.rs Tue Jul 19 15:37:45 2022 +0200 @@ -8,6 +8,7 @@ use hg::exit_codes; use hg::repo::RepoError; use hg::revlog::revlog::RevlogError; +use hg::sparse::SparseConfigError; use hg::utils::files::get_bytes_from_path; use hg::{DirstateError, DirstateMapError, StatusError}; use std::convert::From; @@ -52,6 +53,18 @@ } } + pub fn abort_with_exit_code_bytes( + message: impl AsRef<[u8]>, + detailed_exit_code: exit_codes::ExitCode, + ) -> Self { + // TODO: use this everywhere it makes sense instead of the string + // version. + CommandError::Abort { + message: message.as_ref().into(), + detailed_exit_code, + } + } + pub fn unsupported(message: impl AsRef) -> Self { CommandError::UnsupportedFeature { message: utf8_to_local(message.as_ref()).into(), @@ -212,3 +225,33 @@ HgError::from(error).into() } } + +impl From for CommandError { + fn from(e: SparseConfigError) -> Self { + match e { + SparseConfigError::IncludesAfterExcludes { context } => { + Self::abort_with_exit_code_bytes( + format_bytes!( + b"{} config cannot have includes after excludes", + context + ), + exit_codes::CONFIG_PARSE_ERROR_ABORT, + ) + } + SparseConfigError::EntryOutsideSection { context, line } => { + Self::abort_with_exit_code_bytes( + format_bytes!( + b"{} config entry outside of section: {}", + context, + &line, + ), + exit_codes::CONFIG_PARSE_ERROR_ABORT, + ) + } + SparseConfigError::HgError(e) => Self::from(e), + SparseConfigError::PatternError(e) => { + Self::unsupported(format!("{}", e)) + } + } + } +} diff -r 85f5d11c77dd -r ffd4b1f1c9cb tests/test-rhg-sparse-narrow.t --- a/tests/test-rhg-sparse-narrow.t Tue Jul 19 15:37:09 2022 +0200 +++ b/tests/test-rhg-sparse-narrow.t Tue Jul 19 15:37:45 2022 +0200 @@ -92,7 +92,7 @@ $ touch dir2/q $ "$real_hg" status $ $NO_FALLBACK rhg --config rhg.status=true status - unsupported feature: rhg status is not supported for sparse checkouts or narrow clones yet + unsupported feature: rhg status is not supported for narrow clones yet [252] Adding "orphaned" index files: