Mercurial > hg
changeset 46802:112826b59476 stable
re2: feed unicode string to re2 module when necessary
My previous test were using the `pyre2` Python project, that wrap the Google RE2 library
in python as a `re2` module and accept bytes as input. However the `fb-re2`
Python project is also offering a wrapping of the Google RE2 library in python
as a `re2` module ans accept only unicode on python3. So we detect this
situation and convert thing to unicode when necessary.
Hooray…
We should consider using a rust wrapping for regexp handling. We needs regexps
in Rust anyway and this give use more control with less variants and more
sanity.
Differential Revision: https://phab.mercurial-scm.org/D10284
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 26 Mar 2021 18:37:25 +0100 |
parents | 86b47ec1960a |
children | bc268ea9f984 |
files | mercurial/util.py |
diffstat | 1 files changed, 13 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/util.py Sat Mar 13 02:09:23 2021 -0500 +++ b/mercurial/util.py Fri Mar 26 18:37:25 2021 +0100 @@ -2157,6 +2157,7 @@ return True +_re2_input = lambda x: x try: import re2 # pytype: disable=import-error @@ -2168,11 +2169,21 @@ class _re(object): def _checkre2(self): global _re2 + global _re2_input try: # check if match works, see issue3964 - _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]')) + check_pattern = br'\[([^\[]+)\]' + check_input = b'[ui]' + _re2 = bool(re2.match(check_pattern, check_input)) except ImportError: _re2 = False + except TypeError: + # the `pyre-2` project provides a re2 module that accept bytes + # the `fb-re2` project provides a re2 module that acccept sysstr + check_pattern = pycompat.sysstr(check_pattern) + check_input = pycompat.sysstr(check_input) + _re2 = bool(re2.match(check_pattern, check_input)) + _re2_input = pycompat.sysstr def compile(self, pat, flags=0): """Compile a regular expression, using re2 if possible @@ -2188,7 +2199,7 @@ if flags & remod.MULTILINE: pat = b'(?m)' + pat try: - return re2.compile(pat) + return re2.compile(_re2_input(pat)) except re2.error: pass return remod.compile(pat, flags)