Mercurial > hg
changeset 43859:8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
This was more of a hairball than I hoped, but it appears to work. The hg-py3
branch of my oss-fuzz fork on github has the remaining changes to switch us to
Python 3, but we may as well retain Python 2 fuzzing support for at least a
little while.
Differential Revision: https://phab.mercurial-scm.org/D7592
author | Augie Fackler <augie@google.com> |
---|---|
date | Mon, 09 Dec 2019 22:20:35 -0500 |
parents | b0867b7751ba |
children | 5e0f6451e2d2 |
files | contrib/fuzz/dirs.cc contrib/fuzz/dirstate.cc contrib/fuzz/fm1readmarkers.cc contrib/fuzz/fncache.cc contrib/fuzz/jsonescapeu8fast.cc contrib/fuzz/manifest.cc contrib/fuzz/pyutil.cc contrib/fuzz/pyutil.h contrib/fuzz/revlog.cc |
diffstat | 9 files changed, 64 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/fuzz/dirs.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/dirs.cc Mon Dec 09 22:20:35 2019 -0500 @@ -9,16 +9,15 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import dirs + code = (PYCODETYPE *)Py_CompileString(R"py( try: files = mdata.split('\n') - d = dirs(files) + d = parsers.dirs(files) list(d) 'a' in d if files: @@ -29,7 +28,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; }
--- a/contrib/fuzz/dirstate.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/dirstate.cc Mon Dec 09 22:20:35 2019 -0500 @@ -9,24 +9,23 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import parse_dirstate + code = (PYCODETYPE *)Py_CompileString(R"py( try: dmap = {} copymap = {} - p = parse_dirstate(dmap, copymap, data) + p = parsers.parse_dirstate(dmap, copymap, data) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; }
--- a/contrib/fuzz/fm1readmarkers.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/fm1readmarkers.cc Mon Dec 09 22:20:35 2019 -0500 @@ -9,13 +9,12 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import fm1readmarkers + code = (PYCODETYPE *)Py_CompileString(R"py( def maybeint(s, default): try: return int(s) @@ -31,14 +30,14 @@ else: offset = stop = 0 offset, stop = maybeint(offset, 0), maybeint(stop, len(data)) - fm1readmarkers(data, offset, stop) + parsers.fm1readmarkers(data, offset, stop) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; }
--- a/contrib/fuzz/fncache.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/fncache.cc Mon Dec 09 22:20:35 2019 -0500 @@ -10,29 +10,20 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import ( - isasciistr, - asciilower, - asciiupper, - encodedir, - pathencode, - lowerencode, -) - + code = (PYCODETYPE *)Py_CompileString(R"py( try: for fn in ( - isasciistr, - asciilower, - asciiupper, - encodedir, - pathencode, - lowerencode, + parsers.isasciistr, + parsers.asciilower, + parsers.asciiupper, + parsers.encodedir, + parsers.pathencode, + parsers.lowerencode, ): try: fn(data) @@ -53,7 +44,7 @@ # to debug failures. # print(e) )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); if (!code) { std::cerr << "failed to compile Python code!" << std::endl; }
--- a/contrib/fuzz/jsonescapeu8fast.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/jsonescapeu8fast.cc Mon Dec 09 22:20:35 2019 -0500 @@ -11,23 +11,21 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import jsonescapeu8fast - + code = (PYCODETYPE *)Py_CompileString(R"py( try: - jsonescapeu8fast(data, paranoid) + parsers.jsonescapeu8fast(data, paranoid) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print(e) )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); if (!code) { std::cerr << "failed to compile Python code!" << std::endl; }
--- a/contrib/fuzz/manifest.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/manifest.cc Mon Dec 09 22:20:35 2019 -0500 @@ -9,15 +9,14 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import lazymanifest + code = (PYCODETYPE *)Py_CompileString(R"py( try: - lm = lazymanifest(mdata) + lm = parsers.lazymanifest(mdata) # iterate the whole thing, which causes the code to fully parse # every line in the manifest for e, _, _ in lm.iterentries(): @@ -41,7 +40,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; }
--- a/contrib/fuzz/pyutil.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/pyutil.cc Mon Dec 09 22:20:35 2019 -0500 @@ -6,17 +6,26 @@ namespace contrib { +#if PY_MAJOR_VERSION >= 3 +#define HG_FUZZER_PY3 1 +PyMODINIT_FUNC PyInit_parsers(void); +#else +PyMODINIT_FUNC initparsers(void); +#endif + static char cpypath[8192] = "\0"; static PyObject *mainmod; static PyObject *globals; -/* TODO: use Python 3 for this fuzzing? */ -PyMODINIT_FUNC initparsers(void); - void initpy(const char *cselfpath) { +#ifdef HG_FUZZER_PY3 + const std::string subdir = "/sanpy/lib/python3.7"; +#else const std::string subdir = "/sanpy/lib/python2.7"; +#endif + /* HACK ALERT: we need a full Python installation built without pymalloc and with ASAN, so we dump one in $OUT/sanpy/lib/python2.7. This helps us wire that up. */ @@ -39,11 +48,24 @@ setenv("PYTHONNOUSERSITE", "1", 1); /* prevent Python from looking up users in the fuzz environment */ setenv("PYTHONUSERBASE", cpypath, 1); +#ifdef HG_FUZZER_PY3 + std::wstring wcpypath(pypath.begin(), pypath.end()); + Py_SetPythonHome(wcpypath.c_str()); +#else Py_SetPythonHome(cpypath); +#endif Py_InitializeEx(0); mainmod = PyImport_AddModule("__main__"); globals = PyModule_GetDict(mainmod); + +#ifdef HG_FUZZER_PY3 + PyObject *mod = PyInit_parsers(); +#else initparsers(); + PyObject *mod = PyImport_ImportModule("parsers"); +#endif + + PyDict_SetItemString(globals, "parsers", mod); } PyObject *pyglobals()
--- a/contrib/fuzz/pyutil.h Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/pyutil.h Mon Dec 09 22:20:35 2019 -0500 @@ -1,5 +1,11 @@ #include <Python.h> +#if PY_MAJOR_VERSION >= 3 +#define PYCODETYPE PyObject +#else +#define PYCODETYPE PyCodeObject +#endif + namespace contrib {
--- a/contrib/fuzz/revlog.cc Fri Nov 22 23:43:59 2019 -0500 +++ b/contrib/fuzz/revlog.cc Mon Dec 09 22:20:35 2019 -0500 @@ -9,16 +9,15 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import parse_index2 + code = (PYCODETYPE *)Py_CompileString(R"py( for inline in (True, False): try: - index, cache = parse_index2(data, inline) + index, cache = parsers.parse_index2(data, inline) index.slicechunktodensity(list(range(len(index))), 0.5, 262144) index.stats() index.findsnapshots({}, 0) @@ -35,7 +34,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; }