Mercurial > hg
view contrib/fuzz/fm1readmarkers_corpus.py @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children |
line wrap: on
line source
import argparse import zipfile ap = argparse.ArgumentParser() ap.add_argument("out", metavar="some.zip", type=str, nargs=1) args = ap.parse_args() with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: zf.writestr( 'smallish_obsstore', ( # header: fm1readmarkers should start at offset 1, and # read until byte 597. '1\x00597\x00' # body of obsstore file '\x01\x00\x00\x00vA\xd7\x02+C\x1a<)\x01,\x00\x00\x01\x03\x03\xe6' '\x92\xde)x\x16\xd1Xph\xc7\xa7[\xe5\xe2\x1a\xab\x1e6e\xaf\xc2\xae' '\xe7\xbc\x83\xe1\x88\xa5\xda\xce>O\xbd\x04\xe9\x03\xc4o\xeb\x03' '\x01\t\x05\x04\x1fef18operationamenduserAugie Fackler <raf@duri' 'n42.com>\x00\x00\x00vA\xd7\x02-\x8aD\xaf-\x01,\x00\x00\x01\x03\x03' '\x17*\xca\x8f\x9e}i\xe0i\xbb\xdf\x9fb\x03\xd2XG?\xd3h\x98\x89\x1a' '=2\xeb\xc3\xc5<\xb3\x9e\xcc\x0e;#\xee\xc3\x10ux\x03\x01\t\x05\x04' '\x1fef18operationamenduserAugie Fackler <raf@durin42.com>\x00\x00' '\x00vA\xd7\x02Mn\xd9%\xea\x01,\x00\x00\x01\x03\x03\x98\x89\x1a=' '2\xeb\xc3\xc5<\xb3\x9e\xcc\x0e;#\xee\xc3\x10ux\xe0*\xcaT\x86Z8J' '\x85)\x97\xff7\xcc)\xc1\x7f\x19\x0c\x01\x03\x01\t\x05\x04\x1fef' '18operationamenduserAugie Fackler <raf@durin42.com>\x00\x00\x00' 'yA\xd7\x02MtA\xbfj\x01,\x00\x00\x01\x03\x03\xe0*\xcaT\x86Z8J\x85' ')\x97\xff7\xcc)\xc1\x7f\x19\x0c\x01\x00\x94\x01\xa9\n\xf80\x92\xa3' 'j\xc5X\xb1\xc9:\xd51\xb8*\xa9\x03\x01\t\x08\x04\x1fef11operatio' 'nhistedituserAugie Fackler <raf@durin42.com>\x00\x00\x00yA\xd7\x02' 'MtA\xd4\xe1\x01,\x00\x00\x01\x03\x03"\xa5\xcb\x86\xb6\xf4\xbaO\xa0' 'sH\xe7?\xcb\x9b\xc2n\xcfI\x9e\x14\xf0D\xf0!\x18DN\xcd\x97\x016\xa5' '\xef\xa06\xcb\x884\x8a\x03\x01\t\x08\x04\x1fef14operationhisted' ), )