comparison rust/hg-core/src/filepatterns.rs @ 50861:090658724abf

rust: de-hardcode glob_suffix We're adding patternmatcher in a subsequent commit, and this needs needs to be different for includematcher and patternmatcher.
author Spencer Baugh <sbaugh@janestreet.com>
date Mon, 14 Aug 2023 09:25:36 -0400
parents 2b4bcdc948e7
children c112cc9effdc
comparison
equal deleted inserted replaced
50860:f50e71fdfcb4 50861:090658724abf
33 } 33 }
34 34
35 /// These are matched in order 35 /// These are matched in order
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38
39 /// Appended to the regexp of globs
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
41 38
42 #[derive(Debug, Clone, PartialEq, Eq)] 39 #[derive(Debug, Clone, PartialEq, Eq)]
43 pub enum PatternSyntax { 40 pub enum PatternSyntax {
44 /// A regular expression 41 /// A regular expression
45 Regexp, 42 Regexp,
179 } 176 }
180 177
181 /// Builds the regex that corresponds to the given pattern. 178 /// Builds the regex that corresponds to the given pattern.
182 /// If within a `syntax: regexp` context, returns the pattern, 179 /// If within a `syntax: regexp` context, returns the pattern,
183 /// otherwise, returns the corresponding regex. 180 /// otherwise, returns the corresponding regex.
184 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> { 181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
185 let IgnorePattern { 182 let IgnorePattern {
186 syntax, pattern, .. 183 syntax, pattern, ..
187 } = entry; 184 } = entry;
188 if pattern.is_empty() { 185 if pattern.is_empty() {
189 return vec![]; 186 return vec![];
243 res 240 res
244 } 241 }
245 PatternSyntax::RelGlob => { 242 PatternSyntax::RelGlob => {
246 let glob_re = glob_to_re(pattern); 243 let glob_re = glob_to_re(pattern);
247 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { 244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
248 [b".*", rest, GLOB_SUFFIX].concat() 245 [b".*", rest, glob_suffix].concat()
249 } else { 246 } else {
250 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() 247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
251 } 248 }
252 } 249 }
253 PatternSyntax::Glob | PatternSyntax::RootGlob => { 250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
254 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() 251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
255 } 252 }
256 PatternSyntax::Include 253 PatternSyntax::Include
257 | PatternSyntax::SubInclude 254 | PatternSyntax::SubInclude
258 | PatternSyntax::ExpandedSubInclude(_) 255 | PatternSyntax::ExpandedSubInclude(_)
259 | PatternSyntax::FilePath => unreachable!(), 256 | PatternSyntax::FilePath => unreachable!(),
307 304
308 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs 305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
309 /// that don't need to be transformed into a regex. 306 /// that don't need to be transformed into a regex.
310 pub fn build_single_regex( 307 pub fn build_single_regex(
311 entry: &IgnorePattern, 308 entry: &IgnorePattern,
309 glob_suffix: &[u8],
312 ) -> Result<Option<Vec<u8>>, PatternError> { 310 ) -> Result<Option<Vec<u8>>, PatternError> {
313 let IgnorePattern { 311 let IgnorePattern {
314 pattern, syntax, .. 312 pattern, syntax, ..
315 } = entry; 313 } = entry;
316 let pattern = match syntax { 314 let pattern = match syntax {
329 if is_simple_rootglob || syntax == &PatternSyntax::FilePath { 327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
330 Ok(None) 328 Ok(None)
331 } else { 329 } else {
332 let mut entry = entry.clone(); 330 let mut entry = entry.clone();
333 entry.pattern = pattern; 331 entry.pattern = pattern;
334 Ok(Some(_build_single_regex(&entry))) 332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
335 } 333 }
336 } 334 }
337 335
338 lazy_static! { 336 lazy_static! {
339 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { 337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
714 } 712 }
715 713
716 #[test] 714 #[test]
717 fn test_build_single_regex() { 715 fn test_build_single_regex() {
718 assert_eq!( 716 assert_eq!(
719 build_single_regex(&IgnorePattern::new( 717 build_single_regex(
720 PatternSyntax::RelGlob, 718 &IgnorePattern::new(
721 b"rust/target/", 719 PatternSyntax::RelGlob,
722 Path::new("") 720 b"rust/target/",
723 )) 721 Path::new("")
722 ),
723 b"(?:/|$)"
724 )
724 .unwrap(), 725 .unwrap(),
725 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), 726 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
726 ); 727 );
727 assert_eq!( 728 assert_eq!(
728 build_single_regex(&IgnorePattern::new( 729 build_single_regex(
729 PatternSyntax::Regexp, 730 &IgnorePattern::new(
730 br"rust/target/\d+", 731 PatternSyntax::Regexp,
731 Path::new("") 732 br"rust/target/\d+",
732 )) 733 Path::new("")
734 ),
735 b"(?:/|$)"
736 )
733 .unwrap(), 737 .unwrap(),
734 Some(br"rust/target/\d+".to_vec()), 738 Some(br"rust/target/\d+".to_vec()),
735 ); 739 );
736 } 740 }
737 741
738 #[test] 742 #[test]
739 fn test_build_single_regex_shortcut() { 743 fn test_build_single_regex_shortcut() {
740 assert_eq!( 744 assert_eq!(
741 build_single_regex(&IgnorePattern::new( 745 build_single_regex(
742 PatternSyntax::RootGlob, 746 &IgnorePattern::new(
743 b"", 747 PatternSyntax::RootGlob,
744 Path::new("") 748 b"",
745 )) 749 Path::new("")
750 ),
751 b"(?:/|$)"
752 )
746 .unwrap(), 753 .unwrap(),
747 None, 754 None,
748 ); 755 );
749 assert_eq!( 756 assert_eq!(
750 build_single_regex(&IgnorePattern::new( 757 build_single_regex(
751 PatternSyntax::RootGlob, 758 &IgnorePattern::new(
752 b"whatever", 759 PatternSyntax::RootGlob,
753 Path::new("") 760 b"whatever",
754 )) 761 Path::new("")
762 ),
763 b"(?:/|$)"
764 )
755 .unwrap(), 765 .unwrap(),
756 None, 766 None,
757 ); 767 );
758 assert_eq!( 768 assert_eq!(
759 build_single_regex(&IgnorePattern::new( 769 build_single_regex(
760 PatternSyntax::RootGlob, 770 &IgnorePattern::new(
761 b"*.o", 771 PatternSyntax::RootGlob,
762 Path::new("") 772 b"*.o",
763 )) 773 Path::new("")
774 ),
775 b"(?:/|$)"
776 )
764 .unwrap(), 777 .unwrap(),
765 Some(br"[^/]*\.o(?:/|$)".to_vec()), 778 Some(br"[^/]*\.o(?:/|$)".to_vec()),
766 ); 779 );
767 } 780 }
768 781
769 #[test] 782 #[test]
770 fn test_build_single_relregex() { 783 fn test_build_single_relregex() {
771 assert_eq!( 784 assert_eq!(
772 build_single_regex(&IgnorePattern::new( 785 build_single_regex(
773 PatternSyntax::RelRegexp, 786 &IgnorePattern::new(
774 b"^ba{2}r", 787 PatternSyntax::RelRegexp,
775 Path::new("") 788 b"^ba{2}r",
776 )) 789 Path::new("")
790 ),
791 b"(?:/|$)"
792 )
777 .unwrap(), 793 .unwrap(),
778 Some(b"^ba{2}r".to_vec()), 794 Some(b"^ba{2}r".to_vec()),
779 ); 795 );
780 assert_eq!( 796 assert_eq!(
781 build_single_regex(&IgnorePattern::new( 797 build_single_regex(
782 PatternSyntax::RelRegexp, 798 &IgnorePattern::new(
783 b"ba{2}r", 799 PatternSyntax::RelRegexp,
784 Path::new("") 800 b"ba{2}r",
785 )) 801 Path::new("")
802 ),
803 b"(?:/|$)"
804 )
786 .unwrap(), 805 .unwrap(),
787 Some(b".*ba{2}r".to_vec()), 806 Some(b".*ba{2}r".to_vec()),
788 ); 807 );
789 assert_eq!( 808 assert_eq!(
790 build_single_regex(&IgnorePattern::new( 809 build_single_regex(
791 PatternSyntax::RelRegexp, 810 &IgnorePattern::new(
792 b"(?ia)ba{2}r", 811 PatternSyntax::RelRegexp,
793 Path::new("") 812 b"(?ia)ba{2}r",
794 )) 813 Path::new("")
814 ),
815 b"(?:/|$)"
816 )
795 .unwrap(), 817 .unwrap(),
796 Some(b"(?ia:.*ba{2}r)".to_vec()), 818 Some(b"(?ia:.*ba{2}r)".to_vec()),
797 ); 819 );
798 assert_eq!( 820 assert_eq!(
799 build_single_regex(&IgnorePattern::new( 821 build_single_regex(
800 PatternSyntax::RelRegexp, 822 &IgnorePattern::new(
801 b"(?ia)^ba{2}r", 823 PatternSyntax::RelRegexp,
802 Path::new("") 824 b"(?ia)^ba{2}r",
803 )) 825 Path::new("")
826 ),
827 b"(?:/|$)"
828 )
804 .unwrap(), 829 .unwrap(),
805 Some(b"(?ia:^ba{2}r)".to_vec()), 830 Some(b"(?ia:^ba{2}r)".to_vec()),
806 ); 831 );
807 } 832 }
808 } 833 }