stringutil: add function to compile stringmatcher pattern into regexp
Prepares for adding a revset predicate for "grep --diff". The grep logic
needs a regexp object instead of a match function.
--- a/mercurial/utils/stringutil.py Wed Oct 14 22:10:48 2020 +0900
+++ b/mercurial/utils/stringutil.py Mon Oct 05 20:40:39 2020 +0900
@@ -376,6 +376,58 @@
raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
+def substringregexp(pattern, flags=0):
+ """Build a regexp object from a string pattern possibly starting with
+ 're:' or 'literal:' prefix.
+
+ helper for tests:
+ >>> def test(pattern, *tests):
+ ... regexp = substringregexp(pattern)
+ ... return [bool(regexp.search(t)) for t in tests]
+ >>> def itest(pattern, *tests):
+ ... regexp = substringregexp(pattern, remod.I)
+ ... return [bool(regexp.search(t)) for t in tests]
+
+ substring matching (no prefix):
+ >>> test(b'bcde', b'abc', b'def', b'abcdefg')
+ [False, False, True]
+
+ substring pattern should be escaped:
+ >>> substringregexp(b'.bc').pattern
+ '\\\\.bc'
+ >>> test(b'.bc', b'abc', b'def', b'abcdefg')
+ [False, False, False]
+
+ regex matching ('re:' prefix)
+ >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
+ [False, False, True]
+
+ force substring matches ('literal:' prefix)
+ >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
+ [False, True]
+
+ case insensitive literal matches
+ >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
+ [False, False, True]
+
+ case insensitive regex matches
+ >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
+ [False, False, True]
+ """
+ kind, pattern = _splitpattern(pattern)
+ if kind == b're':
+ try:
+ return remod.compile(pattern, flags)
+ except remod.error as e:
+ raise error.ParseError(
+ _(b'invalid regular expression: %s') % forcebytestr(e)
+ )
+ elif kind == b'literal':
+ return remod.compile(remod.escape(pattern), flags)
+
+ raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
+
+
def shortuser(user):
"""Return a short representation of a user name or email address."""
f = user.find(b'@')