re2: feed unicode string to re2 module when necessary stable
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 26 Mar 2021 18:37:25 +0100
branchstable
changeset 46802 112826b59476
parent 46793 86b47ec1960a
child 46810 bc268ea9f984
re2: feed unicode string to re2 module when necessary My previous test were using the `pyre2` Python project, that wrap the Google RE2 library in python as a `re2` module and accept bytes as input. However the `fb-re2` Python project is also offering a wrapping of the Google RE2 library in python as a `re2` module ans accept only unicode on python3. So we detect this situation and convert thing to unicode when necessary. Hooray… We should consider using a rust wrapping for regexp handling. We needs regexps in Rust anyway and this give use more control with less variants and more sanity. Differential Revision: https://phab.mercurial-scm.org/D10284
mercurial/util.py
--- a/mercurial/util.py	Sat Mar 13 02:09:23 2021 -0500
+++ b/mercurial/util.py	Fri Mar 26 18:37:25 2021 +0100
@@ -2157,6 +2157,7 @@
         return True
 
 
+_re2_input = lambda x: x
 try:
     import re2  # pytype: disable=import-error
 
@@ -2168,11 +2169,21 @@
 class _re(object):
     def _checkre2(self):
         global _re2
+        global _re2_input
         try:
             # check if match works, see issue3964
-            _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
+            check_pattern = br'\[([^\[]+)\]'
+            check_input = b'[ui]'
+            _re2 = bool(re2.match(check_pattern, check_input))
         except ImportError:
             _re2 = False
+        except TypeError:
+            # the `pyre-2` project provides a re2 module that accept bytes
+            # the `fb-re2` project provides a re2 module that acccept sysstr
+            check_pattern = pycompat.sysstr(check_pattern)
+            check_input = pycompat.sysstr(check_input)
+            _re2 = bool(re2.match(check_pattern, check_input))
+            _re2_input = pycompat.sysstr
 
     def compile(self, pat, flags=0):
         """Compile a regular expression, using re2 if possible
@@ -2188,7 +2199,7 @@
             if flags & remod.MULTILINE:
                 pat = b'(?m)' + pat
             try:
-                return re2.compile(pat)
+                return re2.compile(_re2_input(pat))
             except re2.error:
                 pass
         return remod.compile(pat, flags)