comparison rust/hg-core/src/matchers.rs @ 47379:f6bb181c75f8

rust: Parse "subinclude"d files along the way, not later When parsing a `.hgignore` file and encountering an `include:` line, the included file is parsed recursively right then in a depth-first fashion. With `subinclude:` however included files were parsed (recursively) much later. This changes it to be expanded during parsing, like `.hgignore`. The motivation for this is an upcoming changeset that needs to detect changes in which files are ignored or not. The plan is to hash all ignore files while they are being read, and store that hash in the dirstate (in v2 format). In order to allow a potential alternative implementations to read that format, the algorithm to compute that hash must be documented. Having a well-defined depth-first ordering for the tree of (sub-)included files makes that easier. Differential Revision: https://phab.mercurial-scm.org/D10834
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 02 Jun 2021 18:03:43 +0200
parents 777c3d231913
children 0ef8231e413f
comparison
equal deleted inserted replaced
47378:777c3d231913 47379:f6bb181c75f8
9 9
10 use crate::{ 10 use crate::{
11 dirstate::dirs_multiset::DirsChildrenMultiset, 11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 filepatterns::{ 12 filepatterns::{
13 build_single_regex, filter_subincludes, get_patterns_from_file, 13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 PatternFileWarning, PatternResult, SubInclude, 14 PatternFileWarning, PatternResult,
15 }, 15 },
16 utils::{ 16 utils::{
17 files::find_dirs, 17 files::find_dirs,
18 hg_path::{HgPath, HgPathBuf}, 18 hg_path::{HgPath, HgPathBuf},
19 Escaped, 19 Escaped,
235 /// }; 235 /// };
236 /// use std::path::Path; 236 /// use std::path::Path;
237 /// /// 237 /// ///
238 /// let ignore_patterns = 238 /// let ignore_patterns =
239 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))]; 239 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
240 /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "".as_ref()).unwrap(); 240 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
241 /// /// 241 /// ///
242 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false); 242 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
243 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true); 243 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
244 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true); 244 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
245 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false); 245 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
339 Ok(move |path: &HgPath| re.is_match(path.as_bytes())) 339 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
340 } 340 }
341 341
342 /// Returns the regex pattern and a function that matches an `HgPath` against 342 /// Returns the regex pattern and a function that matches an `HgPath` against
343 /// said regex formed by the given ignore patterns. 343 /// said regex formed by the given ignore patterns.
344 fn build_regex_match<'a>( 344 fn build_regex_match(
345 ignore_patterns: &'a [&'a IgnorePattern], 345 ignore_patterns: &[IgnorePattern],
346 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> { 346 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
347 let mut regexps = vec![]; 347 let mut regexps = vec![];
348 let mut exact_set = HashSet::new(); 348 let mut exact_set = HashSet::new();
349 349
350 for pattern in ignore_patterns { 350 for pattern in ignore_patterns {
476 } 476 }
477 477
478 /// Returns a function that checks whether a given file (in the general sense) 478 /// Returns a function that checks whether a given file (in the general sense)
479 /// should be matched. 479 /// should be matched.
480 fn build_match<'a, 'b>( 480 fn build_match<'a, 'b>(
481 ignore_patterns: &'a [IgnorePattern], 481 ignore_patterns: Vec<IgnorePattern>,
482 root_dir: &Path, 482 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
483 ) -> PatternResult<(
484 Vec<u8>,
485 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
486 Vec<PatternFileWarning>,
487 )> {
488 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![]; 483 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
489 // For debugging and printing 484 // For debugging and printing
490 let mut patterns = vec![]; 485 let mut patterns = vec![];
491 let mut all_warnings = vec![]; 486
492 487 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
493 let (subincludes, ignore_patterns) =
494 filter_subincludes(ignore_patterns, root_dir)?;
495 488
496 if !subincludes.is_empty() { 489 if !subincludes.is_empty() {
497 // Build prefix-based matcher functions for subincludes 490 // Build prefix-based matcher functions for subincludes
498 let mut submatchers = FastHashMap::default(); 491 let mut submatchers = FastHashMap::default();
499 let mut prefixes = vec![]; 492 let mut prefixes = vec![];
500 493
501 for SubInclude { prefix, root, path } in subincludes.into_iter() { 494 for sub_include in subincludes {
502 let (match_fn, warnings) = 495 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
503 get_ignore_function(vec![path.to_path_buf()], &root)?; 496 let match_fn =
504 all_warnings.extend(warnings); 497 Box::new(move |path: &HgPath| matcher.matches(path));
505 prefixes.push(prefix.to_owned()); 498 prefixes.push(sub_include.prefix.clone());
506 submatchers.insert(prefix.to_owned(), match_fn); 499 submatchers.insert(sub_include.prefix.clone(), match_fn);
507 } 500 }
508 501
509 let match_subinclude = move |filename: &HgPath| { 502 let match_subinclude = move |filename: &HgPath| {
510 for prefix in prefixes.iter() { 503 for prefix in prefixes.iter() {
511 if let Some(rel) = filename.relative_to(prefix) { 504 if let Some(rel) = filename.relative_to(prefix) {
554 match_funcs.push(match_func) 547 match_funcs.push(match_func)
555 } 548 }
556 } 549 }
557 550
558 Ok(if match_funcs.len() == 1 { 551 Ok(if match_funcs.len() == 1 {
559 (patterns, match_funcs.remove(0), all_warnings) 552 (patterns, match_funcs.remove(0))
560 } else { 553 } else {
561 ( 554 (
562 patterns, 555 patterns,
563 Box::new(move |f: &HgPath| -> bool { 556 Box::new(move |f: &HgPath| -> bool {
564 match_funcs.iter().any(|match_func| match_func(f)) 557 match_funcs.iter().any(|match_func| match_func(f))
565 }), 558 }),
566 all_warnings,
567 ) 559 )
568 }) 560 })
569 } 561 }
570 562
571 /// Parses all "ignore" files with their recursive includes and returns a 563 /// Parses all "ignore" files with their recursive includes and returns a
586 get_patterns_from_file(pattern_file, root_dir)?; 578 get_patterns_from_file(pattern_file, root_dir)?;
587 579
588 all_patterns.extend(patterns.to_owned()); 580 all_patterns.extend(patterns.to_owned());
589 all_warnings.extend(warnings); 581 all_warnings.extend(warnings);
590 } 582 }
591 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?; 583 let matcher = IncludeMatcher::new(all_patterns)?;
592 all_warnings.extend(warnings);
593 Ok(( 584 Ok((
594 Box::new(move |path: &HgPath| matcher.matches(path)), 585 Box::new(move |path: &HgPath| matcher.matches(path)),
595 all_warnings, 586 all_warnings,
596 )) 587 ))
597 } 588 }
598 589
599 impl<'a> IncludeMatcher<'a> { 590 impl<'a> IncludeMatcher<'a> {
600 pub fn new( 591 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
601 ignore_patterns: Vec<IgnorePattern>,
602 root_dir: &Path,
603 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
604 let (patterns, match_fn, warnings) =
605 build_match(&ignore_patterns, root_dir)?;
606 let RootsDirsAndParents { 592 let RootsDirsAndParents {
607 roots, 593 roots,
608 dirs, 594 dirs,
609 parents, 595 parents,
610 } = roots_dirs_and_parents(&ignore_patterns)?; 596 } = roots_dirs_and_parents(&ignore_patterns)?;
611
612 let prefix = ignore_patterns.iter().any(|k| match k.syntax { 597 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
613 PatternSyntax::Path | PatternSyntax::RelPath => true, 598 PatternSyntax::Path | PatternSyntax::RelPath => true,
614 _ => false, 599 _ => false,
615 }); 600 });
616 601 let (patterns, match_fn) = build_match(ignore_patterns)?;
617 Ok(( 602
618 Self { 603 Ok(Self {
619 patterns, 604 patterns,
620 match_fn, 605 match_fn,
621 prefix, 606 prefix,
622 roots, 607 roots,
623 dirs, 608 dirs,
624 parents, 609 parents,
625 }, 610 })
626 warnings,
627 ))
628 } 611 }
629 612
630 fn get_all_parents_children(&self) -> DirsChildrenMultiset { 613 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
631 // TODO cache 614 // TODO cache
632 let thing = self 615 let thing = self
808 } 791 }
809 792
810 #[test] 793 #[test]
811 fn test_includematcher() { 794 fn test_includematcher() {
812 // VisitchildrensetPrefix 795 // VisitchildrensetPrefix
813 let (matcher, _) = IncludeMatcher::new( 796 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
814 vec![IgnorePattern::new( 797 PatternSyntax::RelPath,
815 PatternSyntax::RelPath, 798 b"dir/subdir",
816 b"dir/subdir", 799 Path::new(""),
817 Path::new(""), 800 )])
818 )],
819 "".as_ref(),
820 )
821 .unwrap(); 801 .unwrap();
822 802
823 let mut set = HashSet::new(); 803 let mut set = HashSet::new();
824 set.insert(HgPath::new(b"dir")); 804 set.insert(HgPath::new(b"dir"));
825 assert_eq!( 805 assert_eq!(
846 matcher.visit_children_set(HgPath::new(b"folder")), 826 matcher.visit_children_set(HgPath::new(b"folder")),
847 VisitChildrenSet::Empty 827 VisitChildrenSet::Empty
848 ); 828 );
849 829
850 // VisitchildrensetRootfilesin 830 // VisitchildrensetRootfilesin
851 let (matcher, _) = IncludeMatcher::new( 831 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
852 vec![IgnorePattern::new( 832 PatternSyntax::RootFiles,
853 PatternSyntax::RootFiles, 833 b"dir/subdir",
854 b"dir/subdir", 834 Path::new(""),
855 Path::new(""), 835 )])
856 )],
857 "".as_ref(),
858 )
859 .unwrap(); 836 .unwrap();
860 837
861 let mut set = HashSet::new(); 838 let mut set = HashSet::new();
862 set.insert(HgPath::new(b"dir")); 839 set.insert(HgPath::new(b"dir"));
863 assert_eq!( 840 assert_eq!(
884 matcher.visit_children_set(HgPath::new(b"folder")), 861 matcher.visit_children_set(HgPath::new(b"folder")),
885 VisitChildrenSet::Empty 862 VisitChildrenSet::Empty
886 ); 863 );
887 864
888 // VisitchildrensetGlob 865 // VisitchildrensetGlob
889 let (matcher, _) = IncludeMatcher::new( 866 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
890 vec![IgnorePattern::new( 867 PatternSyntax::Glob,
891 PatternSyntax::Glob, 868 b"dir/z*",
892 b"dir/z*", 869 Path::new(""),
893 Path::new(""), 870 )])
894 )],
895 "".as_ref(),
896 )
897 .unwrap(); 871 .unwrap();
898 872
899 let mut set = HashSet::new(); 873 let mut set = HashSet::new();
900 set.insert(HgPath::new(b"dir")); 874 set.insert(HgPath::new(b"dir"));
901 assert_eq!( 875 assert_eq!(