changeset 40684:e6c9ef5e11a0

match: provide and use a quick way to escape a single byte The previous function has a lot of overhead (including being a function). In the `_globre` case, we always escape a single byte. So we provide a dictionary dedicated to this use case. We directly use the dictionary to avoid a function call, these are expensive in Python. Again, this raise a very significant performance gain: Before: ! wall 0.059793 comb 0.060000 user 0.060000 sys 0.000000 (median of 100) After: ! wall 0.020390 comb 0.020000 user 0.020000 sys 0.000000 (median of 146) Total improvement for the full series: Before: ! wall 0.153153 comb 0.150000 user 0.150000 sys 0.000000 (median of 66) After: ! wall 0.020390 comb 0.020000 user 0.020000 sys 0.000000 (median of 146)
author Boris Feld <boris.feld@octobus.net>
date Mon, 19 Nov 2018 18:54:44 +0000
parents d7936a9dad47
children e41f6c2e69c4
files mercurial/match.py mercurial/utils/stringutil.py
diffstat 2 files changed, 6 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/match.py	Mon Nov 19 16:50:21 2018 +0000
+++ b/mercurial/match.py	Mon Nov 19 18:54:44 2018 +0000
@@ -1057,14 +1057,14 @@
     i, n = 0, len(pat)
     res = ''
     group = 0
-    escape = util.stringutil.reescape
+    escape = util.stringutil.regexbytesescapemap.get
     def peek():
         return i < n and pat[i:i + 1]
     while i < n:
         c = pat[i:i + 1]
         i += 1
         if c not in '*?[{},\\':
-            res += escape(c)
+            res += escape(c, c)
         elif c == '*':
             if peek() == '*':
                 i += 1
@@ -1105,11 +1105,11 @@
             p = peek()
             if p:
                 i += 1
-                res += escape(p)
+                res += escape(p, p)
             else:
-                res += escape(c)
+                res += escape(c, c)
         else:
-            res += escape(c)
+            res += escape(c, c)
     return res
 
 def _regex(kind, pat, globsuffix):
--- a/mercurial/utils/stringutil.py	Mon Nov 19 16:50:21 2018 +0000
+++ b/mercurial/utils/stringutil.py	Mon Nov 19 18:54:44 2018 +0000
@@ -28,6 +28,7 @@
 # which was part of Python 3.7.
 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
+regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
 
 def reescape(pat):
     """Drop-in replacement for re.escape."""