view contrib/fuzz/dirs_corpus.py @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents 6000f5b25c9b
children
line wrap: on
line source

import argparse
import zipfile

ap = argparse.ArgumentParser()
ap.add_argument("out", metavar="some.zip", type=str, nargs=1)
args = ap.parse_args()

with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf:
    zf.writestr(
        "greek-tree",
        "\n".join(
            [
                "iota",
                "A/mu",
                "A/B/lambda",
                "A/B/E/alpha",
                "A/B/E/beta",
                "A/D/gamma",
                "A/D/G/pi",
                "A/D/G/rho",
                "A/D/G/tau",
                "A/D/H/chi",
                "A/D/H/omega",
                "A/D/H/psi",
            ]
        ),
    )