diff rust/hg-core/src/matchers.rs @ 47379:f6bb181c75f8

rust: Parse "subinclude"d files along the way, not later When parsing a `.hgignore` file and encountering an `include:` line, the included file is parsed recursively right then in a depth-first fashion. With `subinclude:` however included files were parsed (recursively) much later. This changes it to be expanded during parsing, like `.hgignore`. The motivation for this is an upcoming changeset that needs to detect changes in which files are ignored or not. The plan is to hash all ignore files while they are being read, and store that hash in the dirstate (in v2 format). In order to allow a potential alternative implementations to read that format, the algorithm to compute that hash must be documented. Having a well-defined depth-first ordering for the tree of (sub-)included files makes that easier. Differential Revision: https://phab.mercurial-scm.org/D10834
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 02 Jun 2021 18:03:43 +0200
parents 777c3d231913
children 0ef8231e413f
line wrap: on
line diff
--- a/rust/hg-core/src/matchers.rs	Wed Jun 02 18:14:44 2021 +0200
+++ b/rust/hg-core/src/matchers.rs	Wed Jun 02 18:03:43 2021 +0200
@@ -11,7 +11,7 @@
     dirstate::dirs_multiset::DirsChildrenMultiset,
     filepatterns::{
         build_single_regex, filter_subincludes, get_patterns_from_file,
-        PatternFileWarning, PatternResult, SubInclude,
+        PatternFileWarning, PatternResult,
     },
     utils::{
         files::find_dirs,
@@ -237,7 +237,7 @@
 /// ///
 /// let ignore_patterns =
 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
-/// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "".as_ref()).unwrap();
+/// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
 /// ///
 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
@@ -341,8 +341,8 @@
 
 /// Returns the regex pattern and a function that matches an `HgPath` against
 /// said regex formed by the given ignore patterns.
-fn build_regex_match<'a>(
-    ignore_patterns: &'a [&'a IgnorePattern],
+fn build_regex_match(
+    ignore_patterns: &[IgnorePattern],
 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
     let mut regexps = vec![];
     let mut exact_set = HashSet::new();
@@ -478,32 +478,25 @@
 /// Returns a function that checks whether a given file (in the general sense)
 /// should be matched.
 fn build_match<'a, 'b>(
-    ignore_patterns: &'a [IgnorePattern],
-    root_dir: &Path,
-) -> PatternResult<(
-    Vec<u8>,
-    Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
-    Vec<PatternFileWarning>,
-)> {
+    ignore_patterns: Vec<IgnorePattern>,
+) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
     let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
     // For debugging and printing
     let mut patterns = vec![];
-    let mut all_warnings = vec![];
 
-    let (subincludes, ignore_patterns) =
-        filter_subincludes(ignore_patterns, root_dir)?;
+    let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
 
     if !subincludes.is_empty() {
         // Build prefix-based matcher functions for subincludes
         let mut submatchers = FastHashMap::default();
         let mut prefixes = vec![];
 
-        for SubInclude { prefix, root, path } in subincludes.into_iter() {
-            let (match_fn, warnings) =
-                get_ignore_function(vec![path.to_path_buf()], &root)?;
-            all_warnings.extend(warnings);
-            prefixes.push(prefix.to_owned());
-            submatchers.insert(prefix.to_owned(), match_fn);
+        for sub_include in subincludes {
+            let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
+            let match_fn =
+                Box::new(move |path: &HgPath| matcher.matches(path));
+            prefixes.push(sub_include.prefix.clone());
+            submatchers.insert(sub_include.prefix.clone(), match_fn);
         }
 
         let match_subinclude = move |filename: &HgPath| {
@@ -556,14 +549,13 @@
     }
 
     Ok(if match_funcs.len() == 1 {
-        (patterns, match_funcs.remove(0), all_warnings)
+        (patterns, match_funcs.remove(0))
     } else {
         (
             patterns,
             Box::new(move |f: &HgPath| -> bool {
                 match_funcs.iter().any(|match_func| match_func(f))
             }),
-            all_warnings,
         )
     })
 }
@@ -588,8 +580,7 @@
         all_patterns.extend(patterns.to_owned());
         all_warnings.extend(warnings);
     }
-    let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
-    all_warnings.extend(warnings);
+    let matcher = IncludeMatcher::new(all_patterns)?;
     Ok((
         Box::new(move |path: &HgPath| matcher.matches(path)),
         all_warnings,
@@ -597,34 +588,26 @@
 }
 
 impl<'a> IncludeMatcher<'a> {
-    pub fn new(
-        ignore_patterns: Vec<IgnorePattern>,
-        root_dir: &Path,
-    ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
-        let (patterns, match_fn, warnings) =
-            build_match(&ignore_patterns, root_dir)?;
+    pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
         let RootsDirsAndParents {
             roots,
             dirs,
             parents,
         } = roots_dirs_and_parents(&ignore_patterns)?;
-
         let prefix = ignore_patterns.iter().any(|k| match k.syntax {
             PatternSyntax::Path | PatternSyntax::RelPath => true,
             _ => false,
         });
+        let (patterns, match_fn) = build_match(ignore_patterns)?;
 
-        Ok((
-            Self {
-                patterns,
-                match_fn,
-                prefix,
-                roots,
-                dirs,
-                parents,
-            },
-            warnings,
-        ))
+        Ok(Self {
+            patterns,
+            match_fn,
+            prefix,
+            roots,
+            dirs,
+            parents,
+        })
     }
 
     fn get_all_parents_children(&self) -> DirsChildrenMultiset {
@@ -810,14 +793,11 @@
     #[test]
     fn test_includematcher() {
         // VisitchildrensetPrefix
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::RelPath,
-                b"dir/subdir",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::RelPath,
+            b"dir/subdir",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();
@@ -848,14 +828,11 @@
         );
 
         // VisitchildrensetRootfilesin
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::RootFiles,
-                b"dir/subdir",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::RootFiles,
+            b"dir/subdir",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();
@@ -886,14 +863,11 @@
         );
 
         // VisitchildrensetGlob
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::Glob,
-                b"dir/z*",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::Glob,
+            b"dir/z*",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();