Mercurial > hg
view tests/test-minifileset.py @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children |
line wrap: on
line source
from mercurial import minifileset def check(text, truecases, falsecases): f = minifileset.compile(text) for args in truecases: if not f(*args): print('unexpected: %r should include %r' % (text, args)) for args in falsecases: if f(*args): print('unexpected: %r should exclude %r' % (text, args)) check(b'all()', [(b'a.php', 123), (b'b.txt', 0)], []) check(b'none()', [], [(b'a.php', 123), (b'b.txt', 0)]) check(b'!!!!((!(!!all())))', [], [(b'a.php', 123), (b'b.txt', 0)]) check( b'"path:a" & (**.b | **.c)', [(b'a/b.b', 0), (b'a/c.c', 0)], [(b'b/c.c', 0)] ) check( b'(path:a & **.b) | **.c', [(b'a/b.b', 0), (b'a/c.c', 0), (b'b/c.c', 0)], [] ) check( b'**.bin - size("<20B")', [(b'b.bin', 21)], [(b'a.bin', 11), (b'b.txt', 21)] ) check( b'!!**.bin or size(">20B") + "path:bin" or !size(">10")', [(b'a.bin', 11), (b'b.txt', 21), (b'bin/abc', 11)], [(b'a.notbin', 11), (b'b.txt', 11), (b'bin2/abc', 11)], ) check( b'(**.php and size(">10KB")) | **.zip | ("path:bin" & !"path:bin/README") ' b' | size(">1M")', [(b'a.php', 15000), (b'a.zip', 0), (b'bin/a', 0), (b'bin/README', 1e7)], [(b'a.php', 5000), (b'b.zip2', 0), (b't/bin/a', 0), (b'bin/README', 1)], )