view contrib/fuzz/pyutil.cc @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents ee5f27d7b9fb
children
line wrap: on
line source

#include "pyutil.h"

#include <iostream>
#include <string>

namespace contrib
{

#if PY_MAJOR_VERSION >= 3
#define HG_FUZZER_PY3 1
PyMODINIT_FUNC PyInit_parsers(void);
#else
PyMODINIT_FUNC initparsers(void);
#endif

static char cpypath[8192] = "\0";

static PyObject *mainmod;
static PyObject *globals;

void initpy(const char *cselfpath)
{
#ifdef HG_FUZZER_PY3
	const std::string subdir = "/sanpy/lib/python3.8";
#else
	const std::string subdir = "/sanpy/lib/python2.7";
#endif

	/* HACK ALERT: we need a full Python installation built without
	   pymalloc and with ASAN, so we dump one in
	   $OUT/sanpy/lib/python2.7. This helps us wire that up. */
	std::string selfpath(cselfpath);
	std::string pypath;
	auto pos = selfpath.rfind("/");
	if (pos == std::string::npos) {
		char wd[8192];
		if (!getcwd(wd, 8192)) {
			std::cerr << "Failed to call getcwd: errno " << errno
			          << std::endl;
			exit(1);
		}
		pypath = std::string(wd) + subdir;
	} else {
		pypath = selfpath.substr(0, pos) + subdir;
	}
	strncpy(cpypath, pypath.c_str(), pypath.size());
	setenv("PYTHONPATH", cpypath, 1);
	setenv("PYTHONNOUSERSITE", "1", 1);
	/* prevent Python from looking up users in the fuzz environment */
	setenv("PYTHONUSERBASE", cpypath, 1);
#ifdef HG_FUZZER_PY3
	std::wstring wcpypath(pypath.begin(), pypath.end());
	Py_SetPythonHome(wcpypath.c_str());
#else
	Py_SetPythonHome(cpypath);
#endif
	Py_InitializeEx(0);
	mainmod = PyImport_AddModule("__main__");
	globals = PyModule_GetDict(mainmod);

#ifdef HG_FUZZER_PY3
	PyObject *mod = PyInit_parsers();
#else
	initparsers();
	PyObject *mod = PyImport_ImportModule("parsers");
#endif

	PyDict_SetItemString(globals, "parsers", mod);
}

PyObject *pyglobals()
{
	return globals;
}

} // namespace contrib