encoding: add fast path of from/toutf8b() for ASCII strings
See the previous patch for why.
The added test seems not making much sense because ASCII strings should
never contain "\xed" and be valid UTF-8.
(with mercurial repo)
$ export HGRCPATH=/dev/null HGPLAIN=
$ hg log --time --config experimental.stabilization=all -Tjson > /dev/null
(original)
time: real 6.830 secs (user 6.740+0.000 sys 0.080+0.000)
time: real 6.690 secs (user 6.650+0.000 sys 0.040+0.000)
time: real 6.700 secs (user 6.640+0.000 sys 0.060+0.000)
(fast jsonescape)
time: real 5.630 secs (user 5.550+0.000 sys 0.070+0.000)
time: real 5.700 secs (user 5.650+0.000 sys 0.050+0.000)
time: real 5.690 secs (user 5.640+0.000 sys 0.050+0.000)
(this patch)
time: real 5.190 secs (user 5.120+0.000 sys 0.070+0.000)
time: real 5.230 secs (user 5.170+0.000 sys 0.050+0.000)
time: real 5.220 secs (user 5.150+0.000 sys 0.070+0.000)
--- a/mercurial/encoding.py Sun Apr 23 13:06:23 2017 +0900
+++ b/mercurial/encoding.py Sun Apr 23 13:08:58 2017 +0900
@@ -494,6 +494,8 @@
internal surrogate encoding as a UTF-8 string.)
'''
+ if not isinstance(s, localstr) and isasciistr(s):
+ return s
if "\xed" not in s:
if isinstance(s, localstr):
return s._utf8
@@ -544,6 +546,8 @@
True
'''
+ if isasciistr(s):
+ return s
# fast path - look for uDxxx prefixes in s
if "\xed" not in s:
return s
--- a/tests/test-encoding-func.py Sun Apr 23 13:06:23 2017 +0900
+++ b/tests/test-encoding-func.py Sun Apr 23 13:08:58 2017 +0900
@@ -34,6 +34,12 @@
self.assertTrue(s is encoding.tolocal(s))
self.assertTrue(s is encoding.fromlocal(s))
+class Utf8bEncodingTest(unittest.TestCase):
+ def testasciifastpath(self):
+ s = b'\0' * 100
+ self.assertTrue(s is encoding.toutf8b(s))
+ self.assertTrue(s is encoding.fromutf8b(s))
+
if __name__ == '__main__':
import silenttestrunner
silenttestrunner.main(__name__)