Mercurial > hg
changeset 50861:090658724abf
rust: de-hardcode glob_suffix
We're adding patternmatcher in a subsequent commit, and this needs
needs to be different for includematcher and patternmatcher.
author | Spencer Baugh <sbaugh@janestreet.com> |
---|---|
date | Mon, 14 Aug 2023 09:25:36 -0400 |
parents | f50e71fdfcb4 |
children | 5efccea9cf38 |
files | rust/hg-core/src/filepatterns.rs rust/hg-core/src/matchers.rs |
diffstat | 2 files changed, 84 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs Tue Aug 08 11:50:26 2023 -0400 +++ b/rust/hg-core/src/filepatterns.rs Mon Aug 14 09:25:36 2023 -0400 @@ -36,9 +36,6 @@ const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; -/// Appended to the regexp of globs -const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; - #[derive(Debug, Clone, PartialEq, Eq)] pub enum PatternSyntax { /// A regular expression @@ -181,7 +178,7 @@ /// Builds the regex that corresponds to the given pattern. /// If within a `syntax: regexp` context, returns the pattern, /// otherwise, returns the corresponding regex. -fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> { +fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> { let IgnorePattern { syntax, pattern, .. } = entry; @@ -245,13 +242,13 @@ PatternSyntax::RelGlob => { let glob_re = glob_to_re(pattern); if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { - [b".*", rest, GLOB_SUFFIX].concat() + [b".*", rest, glob_suffix].concat() } else { - [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() + [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat() } } PatternSyntax::Glob | PatternSyntax::RootGlob => { - [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() + [glob_to_re(pattern).as_slice(), glob_suffix].concat() } PatternSyntax::Include | PatternSyntax::SubInclude @@ -309,6 +306,7 @@ /// that don't need to be transformed into a regex. pub fn build_single_regex( entry: &IgnorePattern, + glob_suffix: &[u8], ) -> Result<Option<Vec<u8>>, PatternError> { let IgnorePattern { pattern, syntax, .. @@ -331,7 +329,7 @@ } else { let mut entry = entry.clone(); entry.pattern = pattern; - Ok(Some(_build_single_regex(&entry))) + Ok(Some(_build_single_regex(&entry, glob_suffix))) } } @@ -716,20 +714,26 @@ #[test] fn test_build_single_regex() { assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RelGlob, - b"rust/target/", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RelGlob, + b"rust/target/", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::Regexp, - br"rust/target/\d+", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::Regexp, + br"rust/target/\d+", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(br"rust/target/\d+".to_vec()), ); @@ -738,29 +742,38 @@ #[test] fn test_build_single_regex_shortcut() { assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RootGlob, - b"", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RootGlob, + b"", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), None, ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RootGlob, - b"whatever", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RootGlob, + b"whatever", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), None, ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RootGlob, - b"*.o", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RootGlob, + b"*.o", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(br"[^/]*\.o(?:/|$)".to_vec()), ); @@ -769,38 +782,50 @@ #[test] fn test_build_single_relregex() { assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RelRegexp, - b"^ba{2}r", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RelRegexp, + b"^ba{2}r", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(b"^ba{2}r".to_vec()), ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RelRegexp, - b"ba{2}r", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RelRegexp, + b"ba{2}r", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(b".*ba{2}r".to_vec()), ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RelRegexp, - b"(?ia)ba{2}r", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RelRegexp, + b"(?ia)ba{2}r", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(b"(?ia:.*ba{2}r)".to_vec()), ); assert_eq!( - build_single_regex(&IgnorePattern::new( - PatternSyntax::RelRegexp, - b"(?ia)^ba{2}r", - Path::new("") - )) + build_single_regex( + &IgnorePattern::new( + PatternSyntax::RelRegexp, + b"(?ia)^ba{2}r", + Path::new("") + ), + b"(?:/|$)" + ) .unwrap(), Some(b"(?ia:^ba{2}r)".to_vec()), );
--- a/rust/hg-core/src/matchers.rs Tue Aug 08 11:50:26 2023 -0400 +++ b/rust/hg-core/src/matchers.rs Mon Aug 14 09:25:36 2023 -0400 @@ -654,12 +654,13 @@ /// said regex formed by the given ignore patterns. fn build_regex_match<'a, 'b>( ignore_patterns: &'a [IgnorePattern], + glob_suffix: &[u8], ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> { let mut regexps = vec![]; let mut exact_set = HashSet::new(); for pattern in ignore_patterns { - if let Some(re) = build_single_regex(pattern)? { + if let Some(re) = build_single_regex(pattern, glob_suffix)? { regexps.push(re); } else { let exact = normalize_path_bytes(&pattern.pattern); @@ -780,6 +781,7 @@ /// should be matched. fn build_match<'a>( ignore_patterns: Vec<IgnorePattern>, + glob_suffix: &[u8], ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![]; // For debugging and printing @@ -843,7 +845,8 @@ dirs_vec.sort(); patterns.extend(dirs_vec.escaped_bytes()); } else { - let (new_re, match_func) = build_regex_match(&ignore_patterns)?; + let (new_re, match_func) = + build_regex_match(&ignore_patterns, glob_suffix)?; patterns = new_re; match_funcs.push(match_func) } @@ -922,7 +925,7 @@ let prefix = ignore_patterns.iter().all(|k| { matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath) }); - let (patterns, match_fn) = build_match(ignore_patterns)?; + let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?; Ok(Self { patterns,