view contrib/fuzz/dirstate.cc @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents 8766728dbce6
children
line wrap: on
line source

#include <Python.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>

#include <string>

#include "pyutil.h"

extern "C" {

static PYCODETYPE *code;

extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
	contrib::initpy(*argv[0]);
	code = (PYCODETYPE *)Py_CompileString(R"py(
try:
    dmap = {}
    copymap = {}
    p = parsers.parse_dirstate(dmap, copymap, data)
except Exception as e:
    pass
    # uncomment this print if you're editing this Python code
    # to debug failures.
    # print e
)py",
	                                      "fuzzer", Py_file_input);
	return 0;
}

int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
{
	PyObject *text =
	    PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
	PyObject *locals = PyDict_New();
	PyDict_SetItemString(locals, "data", text);
	PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals);
	if (!res) {
		PyErr_Print();
	}
	Py_XDECREF(res);
	Py_DECREF(locals);
	Py_DECREF(text);
	return 0; // Non-zero return values are reserved for future use.
}
}