Mercurial > hg
changeset 50857:796b5d6693a4
rust: simplify pattern file parsing
Previously we kept the pattern syntax as a &[u8] until the last
possible moment, which meant we had to handle potential errors when
parsing that &[u8]. But such errors could never actually occur, given
the structure of the code.
Now we parse it eagerly (in two places) and pass it around as a
PatternSyntax, so we can delete some error handling code.
parse_one_pattern is in particular useful for parsing patterns passed
on the command line, as we'll support later in this series.
author | Spencer Baugh <sbaugh@janestreet.com> |
---|---|
date | Wed, 09 Aug 2023 18:08:28 -0400 |
parents | e037af7de2ce |
children | df6dfad5009a |
files | rust/hg-core/src/filepatterns.rs rust/hg-core/src/narrow.rs rust/hg-core/src/sparse.rs |
diffstat | 3 files changed, 89 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs Wed Aug 02 09:57:29 2023 -0400 +++ b/rust/hg-core/src/filepatterns.rs Wed Aug 09 18:08:28 2023 -0400 @@ -335,17 +335,22 @@ } lazy_static! { - static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { + static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { let mut m = FastHashMap::default(); - m.insert(b"re".as_ref(), b"relre:".as_ref()); - m.insert(b"regexp".as_ref(), b"relre:".as_ref()); - m.insert(b"glob".as_ref(), b"relglob:".as_ref()); - m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); - m.insert(b"include".as_ref(), b"include:".as_ref()); - m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref()); - m.insert(b"path".as_ref(), b"path:".as_ref()); - m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref()); + m.insert(b"re:".as_ref(), PatternSyntax::Regexp); + m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp); + m.insert(b"path:".as_ref(), PatternSyntax::Path); + m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath); + m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath); + m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles); + m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob); + m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp); + m.insert(b"glob:".as_ref(), PatternSyntax::Glob); + m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob); + m.insert(b"include:".as_ref(), PatternSyntax::Include); + m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude); + m }; } @@ -358,11 +363,37 @@ NoSuchFile(PathBuf), } +pub fn parse_one_pattern( + pattern: &[u8], + source: &Path, + default: PatternSyntax, +) -> IgnorePattern { + let mut pattern_bytes: &[u8] = pattern; + let mut syntax = default; + + for (s, val) in SYNTAXES.iter() { + if let Some(rest) = pattern_bytes.drop_prefix(s) { + syntax = val.clone(); + pattern_bytes = rest; + break; + } + } + + let pattern = pattern_bytes.to_vec(); + + IgnorePattern { + syntax, + pattern, + source: source.to_owned(), + } +} + pub fn parse_pattern_file_contents( lines: &[u8], file_path: &Path, - default_syntax_override: Option<&[u8]>, + default_syntax_override: Option<PatternSyntax>, warn: bool, + relativize: bool, ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); @@ -372,11 +403,9 @@ let mut warnings: Vec<PatternFileWarning> = vec![]; let mut current_syntax = - default_syntax_override.unwrap_or_else(|| b"relre:".as_ref()); + default_syntax_override.unwrap_or(PatternSyntax::RelRegexp); - for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { - let line_number = line_number + 1; - + for mut line in lines.split(|c| *c == b'\n') { let line_buf; if line.contains(&b'#') { if let Some(cap) = comment_regex.captures(line) { @@ -386,7 +415,7 @@ line = &line_buf; } - let mut line = line.trim_end(); + let line = line.trim_end(); if line.is_empty() { continue; @@ -395,46 +424,28 @@ if let Some(syntax) = line.drop_prefix(b"syntax:") { let syntax = syntax.trim(); - if let Some(rel_syntax) = SYNTAXES.get(syntax) { - current_syntax = rel_syntax; + if let Some(parsed) = + SYNTAXES.get([syntax, &b":"[..]].concat().as_slice()) + { + current_syntax = parsed.clone(); } else if warn { warnings.push(PatternFileWarning::InvalidSyntax( file_path.to_owned(), syntax.to_owned(), )); } - continue; + } else { + let pattern = parse_one_pattern( + line, + file_path, + current_syntax.clone(), + ); + inputs.push(if relativize { + pattern.to_relative() + } else { + pattern + }) } - - let mut line_syntax: &[u8] = current_syntax; - - for (s, rels) in SYNTAXES.iter() { - if let Some(rest) = line.drop_prefix(rels) { - line_syntax = rels; - line = rest; - break; - } - if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) { - line_syntax = rels; - line = rest; - break; - } - } - - inputs.push(IgnorePattern::new( - parse_pattern_syntax(line_syntax).map_err(|e| match e { - PatternError::UnsupportedSyntax(syntax) => { - PatternError::UnsupportedSyntaxInFile( - syntax, - file_path.to_string_lossy().into(), - line_number, - ) - } - _ => e, - })?, - line, - file_path, - )); } Ok((inputs, warnings)) } @@ -447,7 +458,7 @@ match std::fs::read(file_path) { Ok(contents) => { inspect_pattern_bytes(file_path, &contents); - parse_pattern_file_contents(&contents, file_path, None, warn) + parse_pattern_file_contents(&contents, file_path, None, warn, true) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( vec![], @@ -473,6 +484,23 @@ source: source.to_owned(), } } + + pub fn to_relative(self) -> Self { + let Self { + syntax, + pattern, + source, + } = self; + Self { + syntax: match syntax { + PatternSyntax::Regexp => PatternSyntax::RelRegexp, + PatternSyntax::Glob => PatternSyntax::RelGlob, + x => x, + }, + pattern, + source, + } + } } pub type PatternResult<T> = Result<T, PatternError>; @@ -639,7 +667,8 @@ lines, Path::new("file_path"), None, - false + false, + true, ) .unwrap() .0, @@ -657,7 +686,8 @@ lines, Path::new("file_path"), None, - false + false, + true, ) .unwrap() .0, @@ -669,7 +699,8 @@ lines, Path::new("file_path"), None, - false + false, + true, ) .unwrap() .0,
--- a/rust/hg-core/src/narrow.rs Wed Aug 02 09:57:29 2023 -0400 +++ b/rust/hg-core/src/narrow.rs Wed Aug 09 18:08:28 2023 -0400 @@ -74,6 +74,7 @@ Path::new(""), None, false, + true, )?; warnings.extend(subwarnings.into_iter().map(From::from)); @@ -85,6 +86,7 @@ Path::new(""), None, false, + true, )?; if !patterns.is_empty() { warnings.extend(subwarnings.into_iter().map(From::from));
--- a/rust/hg-core/src/sparse.rs Wed Aug 02 09:57:29 2023 -0400 +++ b/rust/hg-core/src/sparse.rs Wed Aug 09 18:08:28 2023 -0400 @@ -282,7 +282,8 @@ let (patterns, subwarnings) = parse_pattern_file_contents( &config.includes, Path::new(""), - Some(b"glob:".as_ref()), + Some(PatternSyntax::Glob), + false, false, )?; warnings.extend(subwarnings.into_iter().map(From::from)); @@ -292,7 +293,8 @@ let (patterns, subwarnings) = parse_pattern_file_contents( &config.excludes, Path::new(""), - Some(b"glob:".as_ref()), + Some(PatternSyntax::Glob), + false, false, )?; warnings.extend(subwarnings.into_iter().map(From::from));