changeset 50861:090658724abf

rust: de-hardcode glob_suffix We're adding patternmatcher in a subsequent commit, and this needs needs to be different for includematcher and patternmatcher.
author Spencer Baugh <sbaugh@janestreet.com>
date Mon, 14 Aug 2023 09:25:36 -0400
parents f50e71fdfcb4
children 5efccea9cf38
files rust/hg-core/src/filepatterns.rs rust/hg-core/src/matchers.rs
diffstat 2 files changed, 84 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs	Tue Aug 08 11:50:26 2023 -0400
+++ b/rust/hg-core/src/filepatterns.rs	Mon Aug 14 09:25:36 2023 -0400
@@ -36,9 +36,6 @@
 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
     &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
 
-/// Appended to the regexp of globs
-const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
-
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum PatternSyntax {
     /// A regular expression
@@ -181,7 +178,7 @@
 /// Builds the regex that corresponds to the given pattern.
 /// If within a `syntax: regexp` context, returns the pattern,
 /// otherwise, returns the corresponding regex.
-fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
+fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
     let IgnorePattern {
         syntax, pattern, ..
     } = entry;
@@ -245,13 +242,13 @@
         PatternSyntax::RelGlob => {
             let glob_re = glob_to_re(pattern);
             if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
-                [b".*", rest, GLOB_SUFFIX].concat()
+                [b".*", rest, glob_suffix].concat()
             } else {
-                [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
+                [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
             }
         }
         PatternSyntax::Glob | PatternSyntax::RootGlob => {
-            [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
+            [glob_to_re(pattern).as_slice(), glob_suffix].concat()
         }
         PatternSyntax::Include
         | PatternSyntax::SubInclude
@@ -309,6 +306,7 @@
 /// that don't need to be transformed into a regex.
 pub fn build_single_regex(
     entry: &IgnorePattern,
+    glob_suffix: &[u8],
 ) -> Result<Option<Vec<u8>>, PatternError> {
     let IgnorePattern {
         pattern, syntax, ..
@@ -331,7 +329,7 @@
     } else {
         let mut entry = entry.clone();
         entry.pattern = pattern;
-        Ok(Some(_build_single_regex(&entry)))
+        Ok(Some(_build_single_regex(&entry, glob_suffix)))
     }
 }
 
@@ -716,20 +714,26 @@
     #[test]
     fn test_build_single_regex() {
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RelGlob,
-                b"rust/target/",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RelGlob,
+                    b"rust/target/",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::Regexp,
-                br"rust/target/\d+",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::Regexp,
+                    br"rust/target/\d+",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(br"rust/target/\d+".to_vec()),
         );
@@ -738,29 +742,38 @@
     #[test]
     fn test_build_single_regex_shortcut() {
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RootGlob,
-                b"",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RootGlob,
+                    b"",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             None,
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RootGlob,
-                b"whatever",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RootGlob,
+                    b"whatever",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             None,
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RootGlob,
-                b"*.o",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RootGlob,
+                    b"*.o",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(br"[^/]*\.o(?:/|$)".to_vec()),
         );
@@ -769,38 +782,50 @@
     #[test]
     fn test_build_single_relregex() {
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RelRegexp,
-                b"^ba{2}r",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RelRegexp,
+                    b"^ba{2}r",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(b"^ba{2}r".to_vec()),
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RelRegexp,
-                b"ba{2}r",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RelRegexp,
+                    b"ba{2}r",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(b".*ba{2}r".to_vec()),
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RelRegexp,
-                b"(?ia)ba{2}r",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RelRegexp,
+                    b"(?ia)ba{2}r",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(b"(?ia:.*ba{2}r)".to_vec()),
         );
         assert_eq!(
-            build_single_regex(&IgnorePattern::new(
-                PatternSyntax::RelRegexp,
-                b"(?ia)^ba{2}r",
-                Path::new("")
-            ))
+            build_single_regex(
+                &IgnorePattern::new(
+                    PatternSyntax::RelRegexp,
+                    b"(?ia)^ba{2}r",
+                    Path::new("")
+                ),
+                b"(?:/|$)"
+            )
             .unwrap(),
             Some(b"(?ia:^ba{2}r)".to_vec()),
         );
--- a/rust/hg-core/src/matchers.rs	Tue Aug 08 11:50:26 2023 -0400
+++ b/rust/hg-core/src/matchers.rs	Mon Aug 14 09:25:36 2023 -0400
@@ -654,12 +654,13 @@
 /// said regex formed by the given ignore patterns.
 fn build_regex_match<'a, 'b>(
     ignore_patterns: &'a [IgnorePattern],
+    glob_suffix: &[u8],
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
     let mut regexps = vec![];
     let mut exact_set = HashSet::new();
 
     for pattern in ignore_patterns {
-        if let Some(re) = build_single_regex(pattern)? {
+        if let Some(re) = build_single_regex(pattern, glob_suffix)? {
             regexps.push(re);
         } else {
             let exact = normalize_path_bytes(&pattern.pattern);
@@ -780,6 +781,7 @@
 /// should be matched.
 fn build_match<'a>(
     ignore_patterns: Vec<IgnorePattern>,
+    glob_suffix: &[u8],
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
     let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
     // For debugging and printing
@@ -843,7 +845,8 @@
             dirs_vec.sort();
             patterns.extend(dirs_vec.escaped_bytes());
         } else {
-            let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
+            let (new_re, match_func) =
+                build_regex_match(&ignore_patterns, glob_suffix)?;
             patterns = new_re;
             match_funcs.push(match_func)
         }
@@ -922,7 +925,7 @@
         let prefix = ignore_patterns.iter().all(|k| {
             matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
         });
-        let (patterns, match_fn) = build_match(ignore_patterns)?;
+        let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
 
         Ok(Self {
             patterns,