# HG changeset patch
# User Arseniy Alekseyev <aalekseyev@janestreet.com>
# Date 1678102077 0
# Node ID 95acba2c29f6e90a7c19da0585a7b52efb416082
# Parent  bcf54837241d2efa0d4e0b727d7fe9b6433e3448
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings

Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.

The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.

diff -r bcf54837241d -r 95acba2c29f6 mercurial/encoding.py
--- a/mercurial/encoding.py	Wed Mar 08 11:01:11 2023 +0100
+++ b/mercurial/encoding.py	Mon Mar 06 11:27:57 2023 +0000
@@ -657,7 +657,7 @@
             pass
 
     s = pycompat.bytestr(s)
-    r = b""
+    r = bytearray()
     pos = 0
     l = len(s)
     while pos < l:
@@ -673,7 +673,7 @@
             c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict)
             pos += 1
         r += c
-    return r
+    return bytes(r)
 
 
 def fromutf8b(s):
@@ -712,7 +712,7 @@
     # helper again to walk the string without "decoding" it.
 
     s = pycompat.bytestr(s)
-    r = b""
+    r = bytearray()
     pos = 0
     l = len(s)
     while pos < l:
@@ -722,4 +722,4 @@
         if b"\xed\xb0\x80" <= c <= b"\xed\xb3\xbf":
             c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF)
         r += c
-    return r
+    return bytes(r)