8 //! Structs and types for matching files and directories. |
8 //! Structs and types for matching files and directories. |
9 |
9 |
10 #[cfg(feature = "with-re2")] |
10 #[cfg(feature = "with-re2")] |
11 use crate::re2::Re2; |
11 use crate::re2::Re2; |
12 use crate::{ |
12 use crate::{ |
13 filepatterns::{build_single_regex, PatternResult}, |
13 dirstate::dirs_multiset::DirsChildrenMultiset, |
14 utils::hg_path::{HgPath, HgPathBuf}, |
14 filepatterns::{ |
15 DirsMultiset, DirstateMapError, IgnorePattern, PatternError, |
15 build_single_regex, filter_subincludes, get_patterns_from_file, |
|
16 PatternFileWarning, PatternResult, SubInclude, |
|
17 }, |
|
18 utils::{ |
|
19 files::find_dirs, |
|
20 hg_path::{HgPath, HgPathBuf}, |
|
21 Escaped, |
|
22 }, |
|
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError, |
16 PatternSyntax, |
24 PatternSyntax, |
17 }; |
25 }; |
|
26 |
18 use std::collections::HashSet; |
27 use std::collections::HashSet; |
|
28 use std::fmt::{Display, Error, Formatter}; |
19 use std::iter::FromIterator; |
29 use std::iter::FromIterator; |
20 use std::ops::Deref; |
30 use std::ops::Deref; |
|
31 use std::path::Path; |
21 |
32 |
22 #[derive(Debug, PartialEq)] |
33 #[derive(Debug, PartialEq)] |
23 pub enum VisitChildrenSet<'a> { |
34 pub enum VisitChildrenSet<'a> { |
24 /// Don't visit anything |
35 /// Don't visit anything |
25 Empty, |
36 Empty, |
221 fn is_exact(&self) -> bool { |
232 fn is_exact(&self) -> bool { |
222 true |
233 true |
223 } |
234 } |
224 } |
235 } |
225 |
236 |
|
237 /// Matches files that are included in the ignore rules. |
|
238 #[cfg_attr( |
|
239 feature = "with-re2", |
|
240 doc = r##" |
|
241 ``` |
|
242 use hg::{ |
|
243 matchers::{IncludeMatcher, Matcher}, |
|
244 IgnorePattern, |
|
245 PatternSyntax, |
|
246 utils::hg_path::HgPath |
|
247 }; |
|
248 use std::path::Path; |
|
249 /// |
|
250 let ignore_patterns = |
|
251 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))]; |
|
252 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap(); |
|
253 /// |
|
254 assert_eq!(matcher.matches(HgPath::new(b"testing")), false); |
|
255 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true); |
|
256 assert_eq!(matcher.matches(HgPath::new(b"this also")), true); |
|
257 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false); |
|
258 ``` |
|
259 "## |
|
260 )] |
|
261 pub struct IncludeMatcher<'a> { |
|
262 patterns: Vec<u8>, |
|
263 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>, |
|
264 /// Whether all the patterns match a prefix (i.e. recursively) |
|
265 prefix: bool, |
|
266 roots: HashSet<HgPathBuf>, |
|
267 dirs: HashSet<HgPathBuf>, |
|
268 parents: HashSet<HgPathBuf>, |
|
269 } |
|
270 |
|
271 impl<'a> Matcher for IncludeMatcher<'a> { |
|
272 fn file_set(&self) -> Option<&HashSet<&HgPath>> { |
|
273 None |
|
274 } |
|
275 |
|
276 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool { |
|
277 false |
|
278 } |
|
279 |
|
280 fn matches(&self, filename: impl AsRef<HgPath>) -> bool { |
|
281 (self.match_fn)(filename.as_ref()) |
|
282 } |
|
283 |
|
284 fn visit_children_set( |
|
285 &self, |
|
286 directory: impl AsRef<HgPath>, |
|
287 ) -> VisitChildrenSet { |
|
288 let dir = directory.as_ref(); |
|
289 if self.prefix && self.roots.contains(dir) { |
|
290 return VisitChildrenSet::Recursive; |
|
291 } |
|
292 if self.roots.contains(HgPath::new(b"")) |
|
293 || self.roots.contains(dir) |
|
294 || self.dirs.contains(dir) |
|
295 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir)) |
|
296 { |
|
297 return VisitChildrenSet::This; |
|
298 } |
|
299 |
|
300 if self.parents.contains(directory.as_ref()) { |
|
301 let multiset = self.get_all_parents_children(); |
|
302 if let Some(children) = multiset.get(dir) { |
|
303 return VisitChildrenSet::Set(children.to_owned()); |
|
304 } |
|
305 } |
|
306 VisitChildrenSet::Empty |
|
307 } |
|
308 |
|
309 fn matches_everything(&self) -> bool { |
|
310 false |
|
311 } |
|
312 |
|
313 fn is_exact(&self) -> bool { |
|
314 false |
|
315 } |
|
316 } |
|
317 |
226 #[cfg(feature = "with-re2")] |
318 #[cfg(feature = "with-re2")] |
227 /// Returns a function that matches an `HgPath` against the given regex |
319 /// Returns a function that matches an `HgPath` against the given regex |
228 /// pattern. |
320 /// pattern. |
229 /// |
321 /// |
230 /// This can fail when the pattern is invalid or not supported by the |
322 /// This can fail when the pattern is invalid or not supported by the |
359 dirs: HashSet::from_iter(dirs), |
451 dirs: HashSet::from_iter(dirs), |
360 parents, |
452 parents, |
361 }) |
453 }) |
362 } |
454 } |
363 |
455 |
|
456 /// Returns a function that checks whether a given file (in the general sense) |
|
457 /// should be matched. |
|
458 fn build_match<'a, 'b>( |
|
459 ignore_patterns: &'a [IgnorePattern], |
|
460 root_dir: impl AsRef<Path>, |
|
461 ) -> PatternResult<( |
|
462 Vec<u8>, |
|
463 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>, |
|
464 Vec<PatternFileWarning>, |
|
465 )> { |
|
466 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![]; |
|
467 // For debugging and printing |
|
468 let mut patterns = vec![]; |
|
469 let mut all_warnings = vec![]; |
|
470 |
|
471 let (subincludes, ignore_patterns) = |
|
472 filter_subincludes(ignore_patterns, root_dir)?; |
|
473 |
|
474 if !subincludes.is_empty() { |
|
475 // Build prefix-based matcher functions for subincludes |
|
476 let mut submatchers = FastHashMap::default(); |
|
477 let mut prefixes = vec![]; |
|
478 |
|
479 for SubInclude { prefix, root, path } in subincludes.into_iter() { |
|
480 let (match_fn, warnings) = get_ignore_function(&[path], root)?; |
|
481 all_warnings.extend(warnings); |
|
482 prefixes.push(prefix.to_owned()); |
|
483 submatchers.insert(prefix.to_owned(), match_fn); |
|
484 } |
|
485 |
|
486 let match_subinclude = move |filename: &HgPath| { |
|
487 for prefix in prefixes.iter() { |
|
488 if let Some(rel) = filename.relative_to(prefix) { |
|
489 if (submatchers.get(prefix).unwrap())(rel) { |
|
490 return true; |
|
491 } |
|
492 } |
|
493 } |
|
494 false |
|
495 }; |
|
496 |
|
497 match_funcs.push(Box::new(match_subinclude)); |
|
498 } |
|
499 |
|
500 if !ignore_patterns.is_empty() { |
|
501 // Either do dumb matching if all patterns are rootfiles, or match |
|
502 // with a regex. |
|
503 if ignore_patterns |
|
504 .iter() |
|
505 .all(|k| k.syntax == PatternSyntax::RootFiles) |
|
506 { |
|
507 let dirs: HashSet<_> = ignore_patterns |
|
508 .iter() |
|
509 .map(|k| k.pattern.to_owned()) |
|
510 .collect(); |
|
511 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect(); |
|
512 |
|
513 let match_func = move |path: &HgPath| -> bool { |
|
514 let path = path.as_bytes(); |
|
515 let i = path.iter().rfind(|a| **a == b'/'); |
|
516 let dir = if let Some(i) = i { |
|
517 &path[..*i as usize] |
|
518 } else { |
|
519 b"." |
|
520 }; |
|
521 dirs.contains(dir.deref()) |
|
522 }; |
|
523 match_funcs.push(Box::new(match_func)); |
|
524 |
|
525 patterns.extend(b"rootfilesin: "); |
|
526 dirs_vec.sort(); |
|
527 patterns.extend(dirs_vec.escaped_bytes()); |
|
528 } else { |
|
529 let (new_re, match_func) = build_regex_match(&ignore_patterns)?; |
|
530 patterns = new_re; |
|
531 match_funcs.push(match_func) |
|
532 } |
|
533 } |
|
534 |
|
535 Ok(if match_funcs.len() == 1 { |
|
536 (patterns, match_funcs.remove(0), all_warnings) |
|
537 } else { |
|
538 ( |
|
539 patterns, |
|
540 Box::new(move |f: &HgPath| -> bool { |
|
541 match_funcs.iter().any(|match_func| match_func(f)) |
|
542 }), |
|
543 all_warnings, |
|
544 ) |
|
545 }) |
|
546 } |
|
547 |
|
548 /// Parses all "ignore" files with their recursive includes and returns a |
|
549 /// function that checks whether a given file (in the general sense) should be |
|
550 /// ignored. |
|
551 pub fn get_ignore_function<'a>( |
|
552 all_pattern_files: &[impl AsRef<Path>], |
|
553 root_dir: impl AsRef<Path>, |
|
554 ) -> PatternResult<( |
|
555 impl for<'r> Fn(&'r HgPath) -> bool + Sync, |
|
556 Vec<PatternFileWarning>, |
|
557 )> { |
|
558 let mut all_patterns = vec![]; |
|
559 let mut all_warnings = vec![]; |
|
560 |
|
561 for pattern_file in all_pattern_files.into_iter() { |
|
562 let (patterns, warnings) = |
|
563 get_patterns_from_file(pattern_file, &root_dir)?; |
|
564 |
|
565 all_patterns.extend(patterns); |
|
566 all_warnings.extend(warnings); |
|
567 } |
|
568 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; |
|
569 all_warnings.extend(warnings); |
|
570 Ok((move |path: &HgPath| matcher.matches(path), all_warnings)) |
|
571 } |
|
572 |
|
573 impl<'a> IncludeMatcher<'a> { |
|
574 pub fn new( |
|
575 ignore_patterns: Vec<IgnorePattern>, |
|
576 root_dir: impl AsRef<Path>, |
|
577 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> { |
|
578 let (patterns, match_fn, warnings) = |
|
579 build_match(&ignore_patterns, root_dir)?; |
|
580 let RootsDirsAndParents { |
|
581 roots, |
|
582 dirs, |
|
583 parents, |
|
584 } = roots_dirs_and_parents(&ignore_patterns)?; |
|
585 |
|
586 let prefix = ignore_patterns.iter().any(|k| match k.syntax { |
|
587 PatternSyntax::Path | PatternSyntax::RelPath => true, |
|
588 _ => false, |
|
589 }); |
|
590 |
|
591 Ok(( |
|
592 Self { |
|
593 patterns, |
|
594 match_fn, |
|
595 prefix, |
|
596 roots, |
|
597 dirs, |
|
598 parents, |
|
599 }, |
|
600 warnings, |
|
601 )) |
|
602 } |
|
603 |
|
604 fn get_all_parents_children(&self) -> DirsChildrenMultiset { |
|
605 // TODO cache |
|
606 let thing = self |
|
607 .dirs |
|
608 .iter() |
|
609 .chain(self.roots.iter()) |
|
610 .chain(self.parents.iter()); |
|
611 DirsChildrenMultiset::new(thing, Some(&self.parents)) |
|
612 } |
|
613 } |
|
614 |
|
615 impl<'a> Display for IncludeMatcher<'a> { |
|
616 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { |
|
617 write!( |
|
618 f, |
|
619 "IncludeMatcher(includes='{}')", |
|
620 String::from_utf8_lossy(&self.patterns.escaped_bytes()) |
|
621 ) |
|
622 } |
|
623 } |
|
624 |
364 #[cfg(test)] |
625 #[cfg(test)] |
365 mod tests { |
626 mod tests { |
366 use super::*; |
627 use super::*; |
367 use pretty_assertions::assert_eq; |
628 use pretty_assertions::assert_eq; |
368 use std::path::Path; |
629 use std::path::Path; |
507 assert_eq!( |
768 assert_eq!( |
508 matcher.visit_children_set(HgPath::new(b"folder")), |
769 matcher.visit_children_set(HgPath::new(b"folder")), |
509 VisitChildrenSet::Empty |
770 VisitChildrenSet::Empty |
510 ); |
771 ); |
511 } |
772 } |
512 } |
773 |
|
774 #[cfg(feature = "with-re2")] |
|
775 #[test] |
|
776 fn test_includematcher() { |
|
777 // VisitchildrensetPrefix |
|
778 let (matcher, _) = IncludeMatcher::new( |
|
779 vec![IgnorePattern::new( |
|
780 PatternSyntax::RelPath, |
|
781 b"dir/subdir", |
|
782 Path::new(""), |
|
783 )], |
|
784 "", |
|
785 ) |
|
786 .unwrap(); |
|
787 |
|
788 let mut set = HashSet::new(); |
|
789 set.insert(HgPath::new(b"dir")); |
|
790 assert_eq!( |
|
791 matcher.visit_children_set(HgPath::new(b"")), |
|
792 VisitChildrenSet::Set(set) |
|
793 ); |
|
794 |
|
795 let mut set = HashSet::new(); |
|
796 set.insert(HgPath::new(b"subdir")); |
|
797 assert_eq!( |
|
798 matcher.visit_children_set(HgPath::new(b"dir")), |
|
799 VisitChildrenSet::Set(set) |
|
800 ); |
|
801 assert_eq!( |
|
802 matcher.visit_children_set(HgPath::new(b"dir/subdir")), |
|
803 VisitChildrenSet::Recursive |
|
804 ); |
|
805 // OPT: This should probably be 'all' if its parent is? |
|
806 assert_eq!( |
|
807 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), |
|
808 VisitChildrenSet::This |
|
809 ); |
|
810 assert_eq!( |
|
811 matcher.visit_children_set(HgPath::new(b"folder")), |
|
812 VisitChildrenSet::Empty |
|
813 ); |
|
814 |
|
815 // VisitchildrensetRootfilesin |
|
816 let (matcher, _) = IncludeMatcher::new( |
|
817 vec![IgnorePattern::new( |
|
818 PatternSyntax::RootFiles, |
|
819 b"dir/subdir", |
|
820 Path::new(""), |
|
821 )], |
|
822 "", |
|
823 ) |
|
824 .unwrap(); |
|
825 |
|
826 let mut set = HashSet::new(); |
|
827 set.insert(HgPath::new(b"dir")); |
|
828 assert_eq!( |
|
829 matcher.visit_children_set(HgPath::new(b"")), |
|
830 VisitChildrenSet::Set(set) |
|
831 ); |
|
832 |
|
833 let mut set = HashSet::new(); |
|
834 set.insert(HgPath::new(b"subdir")); |
|
835 assert_eq!( |
|
836 matcher.visit_children_set(HgPath::new(b"dir")), |
|
837 VisitChildrenSet::Set(set) |
|
838 ); |
|
839 |
|
840 assert_eq!( |
|
841 matcher.visit_children_set(HgPath::new(b"dir/subdir")), |
|
842 VisitChildrenSet::This |
|
843 ); |
|
844 assert_eq!( |
|
845 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), |
|
846 VisitChildrenSet::Empty |
|
847 ); |
|
848 assert_eq!( |
|
849 matcher.visit_children_set(HgPath::new(b"folder")), |
|
850 VisitChildrenSet::Empty |
|
851 ); |
|
852 |
|
853 // VisitchildrensetGlob |
|
854 let (matcher, _) = IncludeMatcher::new( |
|
855 vec![IgnorePattern::new( |
|
856 PatternSyntax::Glob, |
|
857 b"dir/z*", |
|
858 Path::new(""), |
|
859 )], |
|
860 "", |
|
861 ) |
|
862 .unwrap(); |
|
863 |
|
864 let mut set = HashSet::new(); |
|
865 set.insert(HgPath::new(b"dir")); |
|
866 assert_eq!( |
|
867 matcher.visit_children_set(HgPath::new(b"")), |
|
868 VisitChildrenSet::Set(set) |
|
869 ); |
|
870 assert_eq!( |
|
871 matcher.visit_children_set(HgPath::new(b"folder")), |
|
872 VisitChildrenSet::Empty |
|
873 ); |
|
874 assert_eq!( |
|
875 matcher.visit_children_set(HgPath::new(b"dir")), |
|
876 VisitChildrenSet::This |
|
877 ); |
|
878 // OPT: these should probably be set(). |
|
879 assert_eq!( |
|
880 matcher.visit_children_set(HgPath::new(b"dir/subdir")), |
|
881 VisitChildrenSet::This |
|
882 ); |
|
883 assert_eq!( |
|
884 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), |
|
885 VisitChildrenSet::This |
|
886 ); |
|
887 } |
|
888 } |