stringutil: add function to compile stringmatcher pattern into regexp
authorYuya Nishihara <yuya@tcha.org>
Mon, 05 Oct 2020 20:40:39 +0900
changeset 45724 ac39a8a214b1
parent 45723 edfc5820aae7
child 45725 99b8b73eb622
stringutil: add function to compile stringmatcher pattern into regexp Prepares for adding a revset predicate for "grep --diff". The grep logic needs a regexp object instead of a match function.
mercurial/utils/stringutil.py
--- a/mercurial/utils/stringutil.py	Wed Oct 14 22:10:48 2020 +0900
+++ b/mercurial/utils/stringutil.py	Mon Oct 05 20:40:39 2020 +0900
@@ -376,6 +376,58 @@
     raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
 
 
+def substringregexp(pattern, flags=0):
+    """Build a regexp object from a string pattern possibly starting with
+    're:' or 'literal:' prefix.
+
+    helper for tests:
+    >>> def test(pattern, *tests):
+    ...     regexp = substringregexp(pattern)
+    ...     return [bool(regexp.search(t)) for t in tests]
+    >>> def itest(pattern, *tests):
+    ...     regexp = substringregexp(pattern, remod.I)
+    ...     return [bool(regexp.search(t)) for t in tests]
+
+    substring matching (no prefix):
+    >>> test(b'bcde', b'abc', b'def', b'abcdefg')
+    [False, False, True]
+
+    substring pattern should be escaped:
+    >>> substringregexp(b'.bc').pattern
+    '\\\\.bc'
+    >>> test(b'.bc', b'abc', b'def', b'abcdefg')
+    [False, False, False]
+
+    regex matching ('re:' prefix)
+    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
+    [False, False, True]
+
+    force substring matches ('literal:' prefix)
+    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
+    [False, True]
+
+    case insensitive literal matches
+    >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
+    [False, False, True]
+
+    case insensitive regex matches
+    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
+    [False, False, True]
+    """
+    kind, pattern = _splitpattern(pattern)
+    if kind == b're':
+        try:
+            return remod.compile(pattern, flags)
+        except remod.error as e:
+            raise error.ParseError(
+                _(b'invalid regular expression: %s') % forcebytestr(e)
+            )
+    elif kind == b'literal':
+        return remod.compile(remod.escape(pattern), flags)
+
+    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
+
+
 def shortuser(user):
     """Return a short representation of a user name or email address."""
     f = user.find(b'@')