Mercurial > hg
changeset 51471:5633de951d34 stable
rust-matchers: raw regular expression builder
Extracting this `re_builder()` from `re_matcher()` makes it reusable
in more general cases than matching `HgPath` instances and would
help reducing code duplication in RHGitaly.
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Mon, 11 Mar 2024 13:36:25 +0100 |
parents | 406b413e3cf2 |
children | 9da3fcc5f70f |
files | rust/hg-core/src/matchers.rs |
diffstat | 1 files changed, 16 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/matchers.rs Mon Mar 11 13:23:18 2024 +0100 +++ b/rust/hg-core/src/matchers.rs Mon Mar 11 13:36:25 2024 +0100 @@ -737,14 +737,11 @@ } } -/// Returns a function that matches an `HgPath` against the given regex -/// pattern. +/// Return a `RegexBuilder` from a bytes pattern /// -/// This can fail when the pattern is invalid or not supported by the -/// underlying engine (the `regex` crate), for instance anything with -/// back-references. -#[logging_timer::time("trace")] -fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { +/// This works around the fact that even if it works on byte haysacks, +/// [`regex::bytes::Regex`] still uses UTF-8 patterns. +pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder { use std::io::Write; // The `regex` crate adds `.*` to the start and end of expressions if there @@ -764,7 +761,18 @@ // # Safety // This is safe because we escaped all non-ASCII bytes. let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; - let re = regex::bytes::RegexBuilder::new(&pattern_string) + regex::bytes::RegexBuilder::new(&pattern_string) +} + +/// Returns a function that matches an `HgPath` against the given regex +/// pattern. +/// +/// This can fail when the pattern is invalid or not supported by the +/// underlying engine (the `regex` crate), for instance anything with +/// back-references. +#[logging_timer::time("trace")] +fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { + let re = re_bytes_builder(pattern) .unicode(false) // Big repos with big `.hgignore` will hit the default limit and // incur a significant performance hit. One repo's `hg status` hit