comparison mercurial/util.py @ 46802:112826b59476 stable

re2: feed unicode string to re2 module when necessary My previous test were using the `pyre2` Python project, that wrap the Google RE2 library in python as a `re2` module and accept bytes as input. However the `fb-re2` Python project is also offering a wrapping of the Google RE2 library in python as a `re2` module ans accept only unicode on python3. So we detect this situation and convert thing to unicode when necessary. Hooray… We should consider using a rust wrapping for regexp handling. We needs regexps in Rust anyway and this give use more control with less variants and more sanity. Differential Revision: https://phab.mercurial-scm.org/D10284
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 26 Mar 2021 18:37:25 +0100
parents ebc6d3484fdd
children 856820b497fc
comparison
equal deleted inserted replaced
46793:86b47ec1960a 46802:112826b59476
2155 return True 2155 return True
2156 except OSError: 2156 except OSError:
2157 return True 2157 return True
2158 2158
2159 2159
2160 _re2_input = lambda x: x
2160 try: 2161 try:
2161 import re2 # pytype: disable=import-error 2162 import re2 # pytype: disable=import-error
2162 2163
2163 _re2 = None 2164 _re2 = None
2164 except ImportError: 2165 except ImportError:
2166 2167
2167 2168
2168 class _re(object): 2169 class _re(object):
2169 def _checkre2(self): 2170 def _checkre2(self):
2170 global _re2 2171 global _re2
2172 global _re2_input
2171 try: 2173 try:
2172 # check if match works, see issue3964 2174 # check if match works, see issue3964
2173 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]')) 2175 check_pattern = br'\[([^\[]+)\]'
2176 check_input = b'[ui]'
2177 _re2 = bool(re2.match(check_pattern, check_input))
2174 except ImportError: 2178 except ImportError:
2175 _re2 = False 2179 _re2 = False
2180 except TypeError:
2181 # the `pyre-2` project provides a re2 module that accept bytes
2182 # the `fb-re2` project provides a re2 module that acccept sysstr
2183 check_pattern = pycompat.sysstr(check_pattern)
2184 check_input = pycompat.sysstr(check_input)
2185 _re2 = bool(re2.match(check_pattern, check_input))
2186 _re2_input = pycompat.sysstr
2176 2187
2177 def compile(self, pat, flags=0): 2188 def compile(self, pat, flags=0):
2178 """Compile a regular expression, using re2 if possible 2189 """Compile a regular expression, using re2 if possible
2179 2190
2180 For best performance, use only re2-compatible regexp features. The 2191 For best performance, use only re2-compatible regexp features. The
2186 if flags & remod.IGNORECASE: 2197 if flags & remod.IGNORECASE:
2187 pat = b'(?i)' + pat 2198 pat = b'(?i)' + pat
2188 if flags & remod.MULTILINE: 2199 if flags & remod.MULTILINE:
2189 pat = b'(?m)' + pat 2200 pat = b'(?m)' + pat
2190 try: 2201 try:
2191 return re2.compile(pat) 2202 return re2.compile(_re2_input(pat))
2192 except re2.error: 2203 except re2.error:
2193 pass 2204 pass
2194 return remod.compile(pat, flags) 2205 return remod.compile(pat, flags)
2195 2206
2196 @propertycache 2207 @propertycache