# HG changeset patch # User Simon Sapin # Date 1622649823 -7200 # Node ID f6bb181c75f81290cffd090f440374e97ec8395f # Parent 777c3d2319130e1284eb67e30270bb7586d56531 rust: Parse "subinclude"d files along the way, not later When parsing a `.hgignore` file and encountering an `include:` line, the included file is parsed recursively right then in a depth-first fashion. With `subinclude:` however included files were parsed (recursively) much later. This changes it to be expanded during parsing, like `.hgignore`. The motivation for this is an upcoming changeset that needs to detect changes in which files are ignored or not. The plan is to hash all ignore files while they are being read, and store that hash in the dirstate (in v2 format). In order to allow a potential alternative implementations to read that format, the algorithm to compute that hash must be documented. Having a well-defined depth-first ordering for the tree of (sub-)included files makes that easier. Differential Revision: https://phab.mercurial-scm.org/D10834 diff -r 777c3d231913 -r f6bb181c75f8 rust/hg-core/src/filepatterns.rs --- a/rust/hg-core/src/filepatterns.rs Wed Jun 02 18:14:44 2021 +0200 +++ b/rust/hg-core/src/filepatterns.rs Wed Jun 02 18:03:43 2021 +0200 @@ -41,7 +41,7 @@ /// Appended to the regexp of globs const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; -#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum PatternSyntax { /// A regular expression Regexp, @@ -65,6 +65,14 @@ Include, /// A file of patterns to match against files under the same directory SubInclude, + /// SubInclude with the result of parsing the included file + /// + /// Note: there is no ExpandedInclude because that expansion can be done + /// in place by replacing the Include pattern by the included patterns. + /// SubInclude requires more handling. + /// + /// Note: `Box` is used to minimize size impact on other enum variants + ExpandedSubInclude(Box), } /// Transforms a glob pattern into a regex @@ -218,7 +226,9 @@ PatternSyntax::Glob | PatternSyntax::RootGlob => { [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() } - PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), + PatternSyntax::Include + | PatternSyntax::SubInclude + | PatternSyntax::ExpandedSubInclude(_) => unreachable!(), } } @@ -441,10 +451,10 @@ pub type PatternResult = Result; /// Wrapper for `read_pattern_file` that also recursively expands `include:` -/// patterns. +/// and `subinclude:` patterns. /// -/// `subinclude:` is not treated as a special pattern here: unraveling them -/// needs to occur in the "ignore" phase. +/// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude` +/// is used for the latter to form a tree of patterns. pub fn get_patterns_from_file( pattern_file: &Path, root_dir: &Path, @@ -453,18 +463,35 @@ let patterns = patterns .into_iter() .flat_map(|entry| -> PatternResult<_> { - let IgnorePattern { - syntax, pattern, .. - } = &entry; - Ok(match syntax { + Ok(match &entry.syntax { PatternSyntax::Include => { let inner_include = - root_dir.join(get_path_from_bytes(&pattern)); + root_dir.join(get_path_from_bytes(&entry.pattern)); let (inner_pats, inner_warnings) = get_patterns_from_file(&inner_include, root_dir)?; warnings.extend(inner_warnings); inner_pats } + PatternSyntax::SubInclude => { + let mut sub_include = SubInclude::new( + &root_dir, + &entry.pattern, + &entry.source, + )?; + let (inner_patterns, inner_warnings) = + get_patterns_from_file( + &sub_include.path, + &sub_include.root, + )?; + sub_include.included_patterns = inner_patterns; + warnings.extend(inner_warnings); + vec![IgnorePattern { + syntax: PatternSyntax::ExpandedSubInclude(Box::new( + sub_include, + )), + ..entry + }] + } _ => vec![entry], }) }) @@ -475,6 +502,7 @@ } /// Holds all the information needed to handle a `subinclude:` pattern. +#[derive(Debug, PartialEq, Eq, Clone)] pub struct SubInclude { /// Will be used for repository (hg) paths that start with this prefix. /// It is relative to the current working directory, so comparing against @@ -484,6 +512,8 @@ pub path: PathBuf, /// Folder in the filesystem where this it applies pub root: PathBuf, + + pub included_patterns: Vec, } impl SubInclude { @@ -513,29 +543,25 @@ })?, path: path.to_owned(), root: new_root.to_owned(), + included_patterns: Vec::new(), }) } } /// Separate and pre-process subincludes from other patterns for the "ignore" /// phase. -pub fn filter_subincludes<'a>( - ignore_patterns: &'a [IgnorePattern], - root_dir: &Path, -) -> Result<(Vec, Vec<&'a IgnorePattern>), HgPathError> { +pub fn filter_subincludes( + ignore_patterns: Vec, +) -> Result<(Vec>, Vec), HgPathError> { let mut subincludes = vec![]; let mut others = vec![]; - for ignore_pattern in ignore_patterns.iter() { - let IgnorePattern { - syntax, - pattern, - source, - } = ignore_pattern; - if *syntax == PatternSyntax::SubInclude { - subincludes.push(SubInclude::new(root_dir, pattern, &source)?); + for pattern in ignore_patterns { + if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax + { + subincludes.push(sub_include); } else { - others.push(ignore_pattern) + others.push(pattern) } } Ok((subincludes, others)) diff -r 777c3d231913 -r f6bb181c75f8 rust/hg-core/src/matchers.rs --- a/rust/hg-core/src/matchers.rs Wed Jun 02 18:14:44 2021 +0200 +++ b/rust/hg-core/src/matchers.rs Wed Jun 02 18:03:43 2021 +0200 @@ -11,7 +11,7 @@ dirstate::dirs_multiset::DirsChildrenMultiset, filepatterns::{ build_single_regex, filter_subincludes, get_patterns_from_file, - PatternFileWarning, PatternResult, SubInclude, + PatternFileWarning, PatternResult, }, utils::{ files::find_dirs, @@ -237,7 +237,7 @@ /// /// /// let ignore_patterns = /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))]; -/// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "".as_ref()).unwrap(); +/// let matcher = IncludeMatcher::new(ignore_patterns).unwrap(); /// /// /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false); /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true); @@ -341,8 +341,8 @@ /// Returns the regex pattern and a function that matches an `HgPath` against /// said regex formed by the given ignore patterns. -fn build_regex_match<'a>( - ignore_patterns: &'a [&'a IgnorePattern], +fn build_regex_match( + ignore_patterns: &[IgnorePattern], ) -> PatternResult<(Vec, Box bool + Sync>)> { let mut regexps = vec![]; let mut exact_set = HashSet::new(); @@ -478,32 +478,25 @@ /// Returns a function that checks whether a given file (in the general sense) /// should be matched. fn build_match<'a, 'b>( - ignore_patterns: &'a [IgnorePattern], - root_dir: &Path, -) -> PatternResult<( - Vec, - Box bool + 'b + Sync>, - Vec, -)> { + ignore_patterns: Vec, +) -> PatternResult<(Vec, Box bool + 'b + Sync>)> { let mut match_funcs: Vec bool + Sync>> = vec![]; // For debugging and printing let mut patterns = vec![]; - let mut all_warnings = vec![]; - let (subincludes, ignore_patterns) = - filter_subincludes(ignore_patterns, root_dir)?; + let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?; if !subincludes.is_empty() { // Build prefix-based matcher functions for subincludes let mut submatchers = FastHashMap::default(); let mut prefixes = vec![]; - for SubInclude { prefix, root, path } in subincludes.into_iter() { - let (match_fn, warnings) = - get_ignore_function(vec![path.to_path_buf()], &root)?; - all_warnings.extend(warnings); - prefixes.push(prefix.to_owned()); - submatchers.insert(prefix.to_owned(), match_fn); + for sub_include in subincludes { + let matcher = IncludeMatcher::new(sub_include.included_patterns)?; + let match_fn = + Box::new(move |path: &HgPath| matcher.matches(path)); + prefixes.push(sub_include.prefix.clone()); + submatchers.insert(sub_include.prefix.clone(), match_fn); } let match_subinclude = move |filename: &HgPath| { @@ -556,14 +549,13 @@ } Ok(if match_funcs.len() == 1 { - (patterns, match_funcs.remove(0), all_warnings) + (patterns, match_funcs.remove(0)) } else { ( patterns, Box::new(move |f: &HgPath| -> bool { match_funcs.iter().any(|match_func| match_func(f)) }), - all_warnings, ) }) } @@ -588,8 +580,7 @@ all_patterns.extend(patterns.to_owned()); all_warnings.extend(warnings); } - let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; - all_warnings.extend(warnings); + let matcher = IncludeMatcher::new(all_patterns)?; Ok(( Box::new(move |path: &HgPath| matcher.matches(path)), all_warnings, @@ -597,34 +588,26 @@ } impl<'a> IncludeMatcher<'a> { - pub fn new( - ignore_patterns: Vec, - root_dir: &Path, - ) -> PatternResult<(Self, Vec)> { - let (patterns, match_fn, warnings) = - build_match(&ignore_patterns, root_dir)?; + pub fn new(ignore_patterns: Vec) -> PatternResult { let RootsDirsAndParents { roots, dirs, parents, } = roots_dirs_and_parents(&ignore_patterns)?; - let prefix = ignore_patterns.iter().any(|k| match k.syntax { PatternSyntax::Path | PatternSyntax::RelPath => true, _ => false, }); + let (patterns, match_fn) = build_match(ignore_patterns)?; - Ok(( - Self { - patterns, - match_fn, - prefix, - roots, - dirs, - parents, - }, - warnings, - )) + Ok(Self { + patterns, + match_fn, + prefix, + roots, + dirs, + parents, + }) } fn get_all_parents_children(&self) -> DirsChildrenMultiset { @@ -810,14 +793,11 @@ #[test] fn test_includematcher() { // VisitchildrensetPrefix - let (matcher, _) = IncludeMatcher::new( - vec![IgnorePattern::new( - PatternSyntax::RelPath, - b"dir/subdir", - Path::new(""), - )], - "".as_ref(), - ) + let matcher = IncludeMatcher::new(vec![IgnorePattern::new( + PatternSyntax::RelPath, + b"dir/subdir", + Path::new(""), + )]) .unwrap(); let mut set = HashSet::new(); @@ -848,14 +828,11 @@ ); // VisitchildrensetRootfilesin - let (matcher, _) = IncludeMatcher::new( - vec![IgnorePattern::new( - PatternSyntax::RootFiles, - b"dir/subdir", - Path::new(""), - )], - "".as_ref(), - ) + let matcher = IncludeMatcher::new(vec![IgnorePattern::new( + PatternSyntax::RootFiles, + b"dir/subdir", + Path::new(""), + )]) .unwrap(); let mut set = HashSet::new(); @@ -886,14 +863,11 @@ ); // VisitchildrensetGlob - let (matcher, _) = IncludeMatcher::new( - vec![IgnorePattern::new( - PatternSyntax::Glob, - b"dir/z*", - Path::new(""), - )], - "".as_ref(), - ) + let matcher = IncludeMatcher::new(vec![IgnorePattern::new( + PatternSyntax::Glob, + b"dir/z*", + Path::new(""), + )]) .unwrap(); let mut set = HashSet::new(); diff -r 777c3d231913 -r f6bb181c75f8 rust/hg-cpython/src/dirstate/status.rs --- a/rust/hg-cpython/src/dirstate/status.rs Wed Jun 02 18:14:44 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/status.rs Wed Jun 02 18:03:43 2021 +0200 @@ -211,12 +211,9 @@ .collect(); let ignore_patterns = ignore_patterns?; - let mut all_warnings = vec![]; - let (matcher, warnings) = - IncludeMatcher::new(ignore_patterns, &root_dir) - .map_err(|e| handle_fallback(py, e.into()))?; - all_warnings.extend(warnings); + let matcher = IncludeMatcher::new(ignore_patterns) + .map_err(|e| handle_fallback(py, e.into()))?; let (status_res, warnings) = dmap .status( @@ -234,9 +231,7 @@ ) .map_err(|e| handle_fallback(py, e))?; - all_warnings.extend(warnings); - - build_response(py, status_res, all_warnings) + build_response(py, status_res, warnings) } e => Err(PyErr::new::( py,