# HG changeset patch # User Georges Racinet # Date 1710160585 -3600 # Node ID 5633de951d34976a44f0047ee7e6fa046d87fdd3 # Parent 406b413e3cf2964685ee639ce9597b35343e8144 rust-matchers: raw regular expression builder Extracting this `re_builder()` from `re_matcher()` makes it reusable in more general cases than matching `HgPath` instances and would help reducing code duplication in RHGitaly. diff -r 406b413e3cf2 -r 5633de951d34 rust/hg-core/src/matchers.rs --- a/rust/hg-core/src/matchers.rs Mon Mar 11 13:23:18 2024 +0100 +++ b/rust/hg-core/src/matchers.rs Mon Mar 11 13:36:25 2024 +0100 @@ -737,14 +737,11 @@ } } -/// Returns a function that matches an `HgPath` against the given regex -/// pattern. +/// Return a `RegexBuilder` from a bytes pattern /// -/// This can fail when the pattern is invalid or not supported by the -/// underlying engine (the `regex` crate), for instance anything with -/// back-references. -#[logging_timer::time("trace")] -fn re_matcher(pattern: &[u8]) -> PatternResult { +/// This works around the fact that even if it works on byte haysacks, +/// [`regex::bytes::Regex`] still uses UTF-8 patterns. +pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder { use std::io::Write; // The `regex` crate adds `.*` to the start and end of expressions if there @@ -764,7 +761,18 @@ // # Safety // This is safe because we escaped all non-ASCII bytes. let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; - let re = regex::bytes::RegexBuilder::new(&pattern_string) + regex::bytes::RegexBuilder::new(&pattern_string) +} + +/// Returns a function that matches an `HgPath` against the given regex +/// pattern. +/// +/// This can fail when the pattern is invalid or not supported by the +/// underlying engine (the `regex` crate), for instance anything with +/// back-references. +#[logging_timer::time("trace")] +fn re_matcher(pattern: &[u8]) -> PatternResult { + let re = re_bytes_builder(pattern) .unicode(false) // Big repos with big `.hgignore` will hit the default limit and // incur a significant performance hit. One repo's `hg status` hit