Mercurial > hg-stable
changeset 45724:ac39a8a214b1
stringutil: add function to compile stringmatcher pattern into regexp
Prepares for adding a revset predicate for "grep --diff". The grep logic
needs a regexp object instead of a match function.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Mon, 05 Oct 2020 20:40:39 +0900 |
parents | edfc5820aae7 |
children | 99b8b73eb622 |
files | mercurial/utils/stringutil.py |
diffstat | 1 files changed, 52 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/utils/stringutil.py Wed Oct 14 22:10:48 2020 +0900 +++ b/mercurial/utils/stringutil.py Mon Oct 05 20:40:39 2020 +0900 @@ -376,6 +376,58 @@ raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) +def substringregexp(pattern, flags=0): + """Build a regexp object from a string pattern possibly starting with + 're:' or 'literal:' prefix. + + helper for tests: + >>> def test(pattern, *tests): + ... regexp = substringregexp(pattern) + ... return [bool(regexp.search(t)) for t in tests] + >>> def itest(pattern, *tests): + ... regexp = substringregexp(pattern, remod.I) + ... return [bool(regexp.search(t)) for t in tests] + + substring matching (no prefix): + >>> test(b'bcde', b'abc', b'def', b'abcdefg') + [False, False, True] + + substring pattern should be escaped: + >>> substringregexp(b'.bc').pattern + '\\\\.bc' + >>> test(b'.bc', b'abc', b'def', b'abcdefg') + [False, False, False] + + regex matching ('re:' prefix) + >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') + [False, False, True] + + force substring matches ('literal:' prefix) + >>> test(b'literal:re:foobar', b'foobar', b're:foobar') + [False, True] + + case insensitive literal matches + >>> itest(b'BCDE', b'abc', b'def', b'abcdefg') + [False, False, True] + + case insensitive regex matches + >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') + [False, False, True] + """ + kind, pattern = _splitpattern(pattern) + if kind == b're': + try: + return remod.compile(pattern, flags) + except remod.error as e: + raise error.ParseError( + _(b'invalid regular expression: %s') % forcebytestr(e) + ) + elif kind == b'literal': + return remod.compile(remod.escape(pattern), flags) + + raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) + + def shortuser(user): """Return a short representation of a user name or email address.""" f = user.find(b'@')