# HG changeset patch # User Pierre-Yves David # Date 1616780245 -3600 # Node ID 112826b5947685d7554ec7bb8e445f696a5e089a # Parent 86b47ec1960abd22143665bcd84f5d28390f70aa re2: feed unicode string to re2 module when necessary My previous test were using the `pyre2` Python project, that wrap the Google RE2 library in python as a `re2` module and accept bytes as input. However the `fb-re2` Python project is also offering a wrapping of the Google RE2 library in python as a `re2` module ans accept only unicode on python3. So we detect this situation and convert thing to unicode when necessary. Hooray… We should consider using a rust wrapping for regexp handling. We needs regexps in Rust anyway and this give use more control with less variants and more sanity. Differential Revision: https://phab.mercurial-scm.org/D10284 diff -r 86b47ec1960a -r 112826b59476 mercurial/util.py --- a/mercurial/util.py Sat Mar 13 02:09:23 2021 -0500 +++ b/mercurial/util.py Fri Mar 26 18:37:25 2021 +0100 @@ -2157,6 +2157,7 @@ return True +_re2_input = lambda x: x try: import re2 # pytype: disable=import-error @@ -2168,11 +2169,21 @@ class _re(object): def _checkre2(self): global _re2 + global _re2_input try: # check if match works, see issue3964 - _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]')) + check_pattern = br'\[([^\[]+)\]' + check_input = b'[ui]' + _re2 = bool(re2.match(check_pattern, check_input)) except ImportError: _re2 = False + except TypeError: + # the `pyre-2` project provides a re2 module that accept bytes + # the `fb-re2` project provides a re2 module that acccept sysstr + check_pattern = pycompat.sysstr(check_pattern) + check_input = pycompat.sysstr(check_input) + _re2 = bool(re2.match(check_pattern, check_input)) + _re2_input = pycompat.sysstr def compile(self, pat, flags=0): """Compile a regular expression, using re2 if possible @@ -2188,7 +2199,7 @@ if flags & remod.MULTILINE: pat = b'(?m)' + pat try: - return re2.compile(pat) + return re2.compile(_re2_input(pat)) except re2.error: pass return remod.compile(pat, flags)