rust/hg-core/src/matchers.rs
changeset 44532 c697638e0e91
parent 44531 a21881b40942
child 44534 483fce658e43
equal deleted inserted replaced
44531:a21881b40942 44532:c697638e0e91
     8 //! Structs and types for matching files and directories.
     8 //! Structs and types for matching files and directories.
     9 
     9 
    10 #[cfg(feature = "with-re2")]
    10 #[cfg(feature = "with-re2")]
    11 use crate::re2::Re2;
    11 use crate::re2::Re2;
    12 use crate::{
    12 use crate::{
    13     filepatterns::{build_single_regex, PatternResult},
    13     dirstate::dirs_multiset::DirsChildrenMultiset,
    14     utils::hg_path::{HgPath, HgPathBuf},
    14     filepatterns::{
    15     DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
    15         build_single_regex, filter_subincludes, get_patterns_from_file,
       
    16         PatternFileWarning, PatternResult, SubInclude,
       
    17     },
       
    18     utils::{
       
    19         files::find_dirs,
       
    20         hg_path::{HgPath, HgPathBuf},
       
    21         Escaped,
       
    22     },
       
    23     DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
    16     PatternSyntax,
    24     PatternSyntax,
    17 };
    25 };
       
    26 
    18 use std::collections::HashSet;
    27 use std::collections::HashSet;
       
    28 use std::fmt::{Display, Error, Formatter};
    19 use std::iter::FromIterator;
    29 use std::iter::FromIterator;
    20 use std::ops::Deref;
    30 use std::ops::Deref;
       
    31 use std::path::Path;
    21 
    32 
    22 #[derive(Debug, PartialEq)]
    33 #[derive(Debug, PartialEq)]
    23 pub enum VisitChildrenSet<'a> {
    34 pub enum VisitChildrenSet<'a> {
    24     /// Don't visit anything
    35     /// Don't visit anything
    25     Empty,
    36     Empty,
   221     fn is_exact(&self) -> bool {
   232     fn is_exact(&self) -> bool {
   222         true
   233         true
   223     }
   234     }
   224 }
   235 }
   225 
   236 
       
   237 /// Matches files that are included in the ignore rules.
       
   238 #[cfg_attr(
       
   239     feature = "with-re2",
       
   240     doc = r##"
       
   241 ```
       
   242 use hg::{
       
   243     matchers::{IncludeMatcher, Matcher},
       
   244     IgnorePattern,
       
   245     PatternSyntax,
       
   246     utils::hg_path::HgPath
       
   247 };
       
   248 use std::path::Path;
       
   249 ///
       
   250 let ignore_patterns =
       
   251 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
       
   252 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
       
   253 ///
       
   254 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
       
   255 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
       
   256 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
       
   257 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
       
   258 ```
       
   259 "##
       
   260 )]
       
   261 pub struct IncludeMatcher<'a> {
       
   262     patterns: Vec<u8>,
       
   263     match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
       
   264     /// Whether all the patterns match a prefix (i.e. recursively)
       
   265     prefix: bool,
       
   266     roots: HashSet<HgPathBuf>,
       
   267     dirs: HashSet<HgPathBuf>,
       
   268     parents: HashSet<HgPathBuf>,
       
   269 }
       
   270 
       
   271 impl<'a> Matcher for IncludeMatcher<'a> {
       
   272     fn file_set(&self) -> Option<&HashSet<&HgPath>> {
       
   273         None
       
   274     }
       
   275 
       
   276     fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
       
   277         false
       
   278     }
       
   279 
       
   280     fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
       
   281         (self.match_fn)(filename.as_ref())
       
   282     }
       
   283 
       
   284     fn visit_children_set(
       
   285         &self,
       
   286         directory: impl AsRef<HgPath>,
       
   287     ) -> VisitChildrenSet {
       
   288         let dir = directory.as_ref();
       
   289         if self.prefix && self.roots.contains(dir) {
       
   290             return VisitChildrenSet::Recursive;
       
   291         }
       
   292         if self.roots.contains(HgPath::new(b""))
       
   293             || self.roots.contains(dir)
       
   294             || self.dirs.contains(dir)
       
   295             || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
       
   296         {
       
   297             return VisitChildrenSet::This;
       
   298         }
       
   299 
       
   300         if self.parents.contains(directory.as_ref()) {
       
   301             let multiset = self.get_all_parents_children();
       
   302             if let Some(children) = multiset.get(dir) {
       
   303                 return VisitChildrenSet::Set(children.to_owned());
       
   304             }
       
   305         }
       
   306         VisitChildrenSet::Empty
       
   307     }
       
   308 
       
   309     fn matches_everything(&self) -> bool {
       
   310         false
       
   311     }
       
   312 
       
   313     fn is_exact(&self) -> bool {
       
   314         false
       
   315     }
       
   316 }
       
   317 
   226 #[cfg(feature = "with-re2")]
   318 #[cfg(feature = "with-re2")]
   227 /// Returns a function that matches an `HgPath` against the given regex
   319 /// Returns a function that matches an `HgPath` against the given regex
   228 /// pattern.
   320 /// pattern.
   229 ///
   321 ///
   230 /// This can fail when the pattern is invalid or not supported by the
   322 /// This can fail when the pattern is invalid or not supported by the
   359         dirs: HashSet::from_iter(dirs),
   451         dirs: HashSet::from_iter(dirs),
   360         parents,
   452         parents,
   361     })
   453     })
   362 }
   454 }
   363 
   455 
       
   456 /// Returns a function that checks whether a given file (in the general sense)
       
   457 /// should be matched.
       
   458 fn build_match<'a, 'b>(
       
   459     ignore_patterns: &'a [IgnorePattern],
       
   460     root_dir: impl AsRef<Path>,
       
   461 ) -> PatternResult<(
       
   462     Vec<u8>,
       
   463     Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
       
   464     Vec<PatternFileWarning>,
       
   465 )> {
       
   466     let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
       
   467     // For debugging and printing
       
   468     let mut patterns = vec![];
       
   469     let mut all_warnings = vec![];
       
   470 
       
   471     let (subincludes, ignore_patterns) =
       
   472         filter_subincludes(ignore_patterns, root_dir)?;
       
   473 
       
   474     if !subincludes.is_empty() {
       
   475         // Build prefix-based matcher functions for subincludes
       
   476         let mut submatchers = FastHashMap::default();
       
   477         let mut prefixes = vec![];
       
   478 
       
   479         for SubInclude { prefix, root, path } in subincludes.into_iter() {
       
   480             let (match_fn, warnings) = get_ignore_function(&[path], root)?;
       
   481             all_warnings.extend(warnings);
       
   482             prefixes.push(prefix.to_owned());
       
   483             submatchers.insert(prefix.to_owned(), match_fn);
       
   484         }
       
   485 
       
   486         let match_subinclude = move |filename: &HgPath| {
       
   487             for prefix in prefixes.iter() {
       
   488                 if let Some(rel) = filename.relative_to(prefix) {
       
   489                     if (submatchers.get(prefix).unwrap())(rel) {
       
   490                         return true;
       
   491                     }
       
   492                 }
       
   493             }
       
   494             false
       
   495         };
       
   496 
       
   497         match_funcs.push(Box::new(match_subinclude));
       
   498     }
       
   499 
       
   500     if !ignore_patterns.is_empty() {
       
   501         // Either do dumb matching if all patterns are rootfiles, or match
       
   502         // with a regex.
       
   503         if ignore_patterns
       
   504             .iter()
       
   505             .all(|k| k.syntax == PatternSyntax::RootFiles)
       
   506         {
       
   507             let dirs: HashSet<_> = ignore_patterns
       
   508                 .iter()
       
   509                 .map(|k| k.pattern.to_owned())
       
   510                 .collect();
       
   511             let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
       
   512 
       
   513             let match_func = move |path: &HgPath| -> bool {
       
   514                 let path = path.as_bytes();
       
   515                 let i = path.iter().rfind(|a| **a == b'/');
       
   516                 let dir = if let Some(i) = i {
       
   517                     &path[..*i as usize]
       
   518                 } else {
       
   519                     b"."
       
   520                 };
       
   521                 dirs.contains(dir.deref())
       
   522             };
       
   523             match_funcs.push(Box::new(match_func));
       
   524 
       
   525             patterns.extend(b"rootfilesin: ");
       
   526             dirs_vec.sort();
       
   527             patterns.extend(dirs_vec.escaped_bytes());
       
   528         } else {
       
   529             let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
       
   530             patterns = new_re;
       
   531             match_funcs.push(match_func)
       
   532         }
       
   533     }
       
   534 
       
   535     Ok(if match_funcs.len() == 1 {
       
   536         (patterns, match_funcs.remove(0), all_warnings)
       
   537     } else {
       
   538         (
       
   539             patterns,
       
   540             Box::new(move |f: &HgPath| -> bool {
       
   541                 match_funcs.iter().any(|match_func| match_func(f))
       
   542             }),
       
   543             all_warnings,
       
   544         )
       
   545     })
       
   546 }
       
   547 
       
   548 /// Parses all "ignore" files with their recursive includes and returns a
       
   549 /// function that checks whether a given file (in the general sense) should be
       
   550 /// ignored.
       
   551 pub fn get_ignore_function<'a>(
       
   552     all_pattern_files: &[impl AsRef<Path>],
       
   553     root_dir: impl AsRef<Path>,
       
   554 ) -> PatternResult<(
       
   555     impl for<'r> Fn(&'r HgPath) -> bool + Sync,
       
   556     Vec<PatternFileWarning>,
       
   557 )> {
       
   558     let mut all_patterns = vec![];
       
   559     let mut all_warnings = vec![];
       
   560 
       
   561     for pattern_file in all_pattern_files.into_iter() {
       
   562         let (patterns, warnings) =
       
   563             get_patterns_from_file(pattern_file, &root_dir)?;
       
   564 
       
   565         all_patterns.extend(patterns);
       
   566         all_warnings.extend(warnings);
       
   567     }
       
   568     let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
       
   569     all_warnings.extend(warnings);
       
   570     Ok((move |path: &HgPath| matcher.matches(path), all_warnings))
       
   571 }
       
   572 
       
   573 impl<'a> IncludeMatcher<'a> {
       
   574     pub fn new(
       
   575         ignore_patterns: Vec<IgnorePattern>,
       
   576         root_dir: impl AsRef<Path>,
       
   577     ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
       
   578         let (patterns, match_fn, warnings) =
       
   579             build_match(&ignore_patterns, root_dir)?;
       
   580         let RootsDirsAndParents {
       
   581             roots,
       
   582             dirs,
       
   583             parents,
       
   584         } = roots_dirs_and_parents(&ignore_patterns)?;
       
   585 
       
   586         let prefix = ignore_patterns.iter().any(|k| match k.syntax {
       
   587             PatternSyntax::Path | PatternSyntax::RelPath => true,
       
   588             _ => false,
       
   589         });
       
   590 
       
   591         Ok((
       
   592             Self {
       
   593                 patterns,
       
   594                 match_fn,
       
   595                 prefix,
       
   596                 roots,
       
   597                 dirs,
       
   598                 parents,
       
   599             },
       
   600             warnings,
       
   601         ))
       
   602     }
       
   603 
       
   604     fn get_all_parents_children(&self) -> DirsChildrenMultiset {
       
   605         // TODO cache
       
   606         let thing = self
       
   607             .dirs
       
   608             .iter()
       
   609             .chain(self.roots.iter())
       
   610             .chain(self.parents.iter());
       
   611         DirsChildrenMultiset::new(thing, Some(&self.parents))
       
   612     }
       
   613 }
       
   614 
       
   615 impl<'a> Display for IncludeMatcher<'a> {
       
   616     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
       
   617         write!(
       
   618             f,
       
   619             "IncludeMatcher(includes='{}')",
       
   620             String::from_utf8_lossy(&self.patterns.escaped_bytes())
       
   621         )
       
   622     }
       
   623 }
       
   624 
   364 #[cfg(test)]
   625 #[cfg(test)]
   365 mod tests {
   626 mod tests {
   366     use super::*;
   627     use super::*;
   367     use pretty_assertions::assert_eq;
   628     use pretty_assertions::assert_eq;
   368     use std::path::Path;
   629     use std::path::Path;
   507         assert_eq!(
   768         assert_eq!(
   508             matcher.visit_children_set(HgPath::new(b"folder")),
   769             matcher.visit_children_set(HgPath::new(b"folder")),
   509             VisitChildrenSet::Empty
   770             VisitChildrenSet::Empty
   510         );
   771         );
   511     }
   772     }
   512 }
   773 
       
   774     #[cfg(feature = "with-re2")]
       
   775     #[test]
       
   776     fn test_includematcher() {
       
   777         // VisitchildrensetPrefix
       
   778         let (matcher, _) = IncludeMatcher::new(
       
   779             vec![IgnorePattern::new(
       
   780                 PatternSyntax::RelPath,
       
   781                 b"dir/subdir",
       
   782                 Path::new(""),
       
   783             )],
       
   784             "",
       
   785         )
       
   786         .unwrap();
       
   787 
       
   788         let mut set = HashSet::new();
       
   789         set.insert(HgPath::new(b"dir"));
       
   790         assert_eq!(
       
   791             matcher.visit_children_set(HgPath::new(b"")),
       
   792             VisitChildrenSet::Set(set)
       
   793         );
       
   794 
       
   795         let mut set = HashSet::new();
       
   796         set.insert(HgPath::new(b"subdir"));
       
   797         assert_eq!(
       
   798             matcher.visit_children_set(HgPath::new(b"dir")),
       
   799             VisitChildrenSet::Set(set)
       
   800         );
       
   801         assert_eq!(
       
   802             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
       
   803             VisitChildrenSet::Recursive
       
   804         );
       
   805         // OPT: This should probably be 'all' if its parent is?
       
   806         assert_eq!(
       
   807             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
       
   808             VisitChildrenSet::This
       
   809         );
       
   810         assert_eq!(
       
   811             matcher.visit_children_set(HgPath::new(b"folder")),
       
   812             VisitChildrenSet::Empty
       
   813         );
       
   814 
       
   815         // VisitchildrensetRootfilesin
       
   816         let (matcher, _) = IncludeMatcher::new(
       
   817             vec![IgnorePattern::new(
       
   818                 PatternSyntax::RootFiles,
       
   819                 b"dir/subdir",
       
   820                 Path::new(""),
       
   821             )],
       
   822             "",
       
   823         )
       
   824         .unwrap();
       
   825 
       
   826         let mut set = HashSet::new();
       
   827         set.insert(HgPath::new(b"dir"));
       
   828         assert_eq!(
       
   829             matcher.visit_children_set(HgPath::new(b"")),
       
   830             VisitChildrenSet::Set(set)
       
   831         );
       
   832 
       
   833         let mut set = HashSet::new();
       
   834         set.insert(HgPath::new(b"subdir"));
       
   835         assert_eq!(
       
   836             matcher.visit_children_set(HgPath::new(b"dir")),
       
   837             VisitChildrenSet::Set(set)
       
   838         );
       
   839 
       
   840         assert_eq!(
       
   841             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
       
   842             VisitChildrenSet::This
       
   843         );
       
   844         assert_eq!(
       
   845             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
       
   846             VisitChildrenSet::Empty
       
   847         );
       
   848         assert_eq!(
       
   849             matcher.visit_children_set(HgPath::new(b"folder")),
       
   850             VisitChildrenSet::Empty
       
   851         );
       
   852 
       
   853         // VisitchildrensetGlob
       
   854         let (matcher, _) = IncludeMatcher::new(
       
   855             vec![IgnorePattern::new(
       
   856                 PatternSyntax::Glob,
       
   857                 b"dir/z*",
       
   858                 Path::new(""),
       
   859             )],
       
   860             "",
       
   861         )
       
   862         .unwrap();
       
   863 
       
   864         let mut set = HashSet::new();
       
   865         set.insert(HgPath::new(b"dir"));
       
   866         assert_eq!(
       
   867             matcher.visit_children_set(HgPath::new(b"")),
       
   868             VisitChildrenSet::Set(set)
       
   869         );
       
   870         assert_eq!(
       
   871             matcher.visit_children_set(HgPath::new(b"folder")),
       
   872             VisitChildrenSet::Empty
       
   873         );
       
   874         assert_eq!(
       
   875             matcher.visit_children_set(HgPath::new(b"dir")),
       
   876             VisitChildrenSet::This
       
   877         );
       
   878         // OPT: these should probably be set().
       
   879         assert_eq!(
       
   880             matcher.visit_children_set(HgPath::new(b"dir/subdir")),
       
   881             VisitChildrenSet::This
       
   882         );
       
   883         assert_eq!(
       
   884             matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
       
   885             VisitChildrenSet::This
       
   886         );
       
   887     }
       
   888 }