fuzz: add support for fuzzing under either Python 2 or 3
This was more of a hairball than I hoped, but it appears to work. The hg-py3
branch of my oss-fuzz fork on github has the remaining changes to switch us to
Python 3, but we may as well retain Python 2 fuzzing support for at least a
little while.
Differential Revision: https://phab.mercurial-scm.org/D7592
--- a/contrib/fuzz/dirs.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/dirs.cc Mon Dec 09 22:20:35 2019 -0500
@@ -9,16 +9,15 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import dirs
+ code = (PYCODETYPE *)Py_CompileString(R"py(
try:
files = mdata.split('\n')
- d = dirs(files)
+ d = parsers.dirs(files)
list(d)
'a' in d
if files:
@@ -29,7 +28,7 @@
# to debug failures.
# print e
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
return 0;
}
--- a/contrib/fuzz/dirstate.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/dirstate.cc Mon Dec 09 22:20:35 2019 -0500
@@ -9,24 +9,23 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_dirstate
+ code = (PYCODETYPE *)Py_CompileString(R"py(
try:
dmap = {}
copymap = {}
- p = parse_dirstate(dmap, copymap, data)
+ p = parsers.parse_dirstate(dmap, copymap, data)
except Exception as e:
pass
# uncomment this print if you're editing this Python code
# to debug failures.
# print e
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
return 0;
}
--- a/contrib/fuzz/fm1readmarkers.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/fm1readmarkers.cc Mon Dec 09 22:20:35 2019 -0500
@@ -9,13 +9,12 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import fm1readmarkers
+ code = (PYCODETYPE *)Py_CompileString(R"py(
def maybeint(s, default):
try:
return int(s)
@@ -31,14 +30,14 @@
else:
offset = stop = 0
offset, stop = maybeint(offset, 0), maybeint(stop, len(data))
- fm1readmarkers(data, offset, stop)
+ parsers.fm1readmarkers(data, offset, stop)
except Exception as e:
pass
# uncomment this print if you're editing this Python code
# to debug failures.
# print e
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
return 0;
}
--- a/contrib/fuzz/fncache.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/fncache.cc Mon Dec 09 22:20:35 2019 -0500
@@ -10,29 +10,20 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import (
- isasciistr,
- asciilower,
- asciiupper,
- encodedir,
- pathencode,
- lowerencode,
-)
-
+ code = (PYCODETYPE *)Py_CompileString(R"py(
try:
for fn in (
- isasciistr,
- asciilower,
- asciiupper,
- encodedir,
- pathencode,
- lowerencode,
+ parsers.isasciistr,
+ parsers.asciilower,
+ parsers.asciiupper,
+ parsers.encodedir,
+ parsers.pathencode,
+ parsers.lowerencode,
):
try:
fn(data)
@@ -53,7 +44,7 @@
# to debug failures.
# print(e)
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
if (!code) {
std::cerr << "failed to compile Python code!" << std::endl;
}
--- a/contrib/fuzz/jsonescapeu8fast.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/jsonescapeu8fast.cc Mon Dec 09 22:20:35 2019 -0500
@@ -11,23 +11,21 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import jsonescapeu8fast
-
+ code = (PYCODETYPE *)Py_CompileString(R"py(
try:
- jsonescapeu8fast(data, paranoid)
+ parsers.jsonescapeu8fast(data, paranoid)
except Exception as e:
pass
# uncomment this print if you're editing this Python code
# to debug failures.
# print(e)
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
if (!code) {
std::cerr << "failed to compile Python code!" << std::endl;
}
--- a/contrib/fuzz/manifest.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/manifest.cc Mon Dec 09 22:20:35 2019 -0500
@@ -9,15 +9,14 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import lazymanifest
+ code = (PYCODETYPE *)Py_CompileString(R"py(
try:
- lm = lazymanifest(mdata)
+ lm = parsers.lazymanifest(mdata)
# iterate the whole thing, which causes the code to fully parse
# every line in the manifest
for e, _, _ in lm.iterentries():
@@ -41,7 +40,7 @@
# to debug failures.
# print e
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
return 0;
}
--- a/contrib/fuzz/pyutil.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/pyutil.cc Mon Dec 09 22:20:35 2019 -0500
@@ -6,17 +6,26 @@
namespace contrib
{
+#if PY_MAJOR_VERSION >= 3
+#define HG_FUZZER_PY3 1
+PyMODINIT_FUNC PyInit_parsers(void);
+#else
+PyMODINIT_FUNC initparsers(void);
+#endif
+
static char cpypath[8192] = "\0";
static PyObject *mainmod;
static PyObject *globals;
-/* TODO: use Python 3 for this fuzzing? */
-PyMODINIT_FUNC initparsers(void);
-
void initpy(const char *cselfpath)
{
+#ifdef HG_FUZZER_PY3
+ const std::string subdir = "/sanpy/lib/python3.7";
+#else
const std::string subdir = "/sanpy/lib/python2.7";
+#endif
+
/* HACK ALERT: we need a full Python installation built without
pymalloc and with ASAN, so we dump one in
$OUT/sanpy/lib/python2.7. This helps us wire that up. */
@@ -39,11 +48,24 @@
setenv("PYTHONNOUSERSITE", "1", 1);
/* prevent Python from looking up users in the fuzz environment */
setenv("PYTHONUSERBASE", cpypath, 1);
+#ifdef HG_FUZZER_PY3
+ std::wstring wcpypath(pypath.begin(), pypath.end());
+ Py_SetPythonHome(wcpypath.c_str());
+#else
Py_SetPythonHome(cpypath);
+#endif
Py_InitializeEx(0);
mainmod = PyImport_AddModule("__main__");
globals = PyModule_GetDict(mainmod);
+
+#ifdef HG_FUZZER_PY3
+ PyObject *mod = PyInit_parsers();
+#else
initparsers();
+ PyObject *mod = PyImport_ImportModule("parsers");
+#endif
+
+ PyDict_SetItemString(globals, "parsers", mod);
}
PyObject *pyglobals()
--- a/contrib/fuzz/pyutil.h Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/pyutil.h Mon Dec 09 22:20:35 2019 -0500
@@ -1,5 +1,11 @@
#include <Python.h>
+#if PY_MAJOR_VERSION >= 3
+#define PYCODETYPE PyObject
+#else
+#define PYCODETYPE PyCodeObject
+#endif
+
namespace contrib
{
--- a/contrib/fuzz/revlog.cc Fri Nov 22 23:43:59 2019 -0500
+++ b/contrib/fuzz/revlog.cc Mon Dec 09 22:20:35 2019 -0500
@@ -9,16 +9,15 @@
extern "C" {
-static PyCodeObject *code;
+static PYCODETYPE *code;
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
contrib::initpy(*argv[0]);
- code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_index2
+ code = (PYCODETYPE *)Py_CompileString(R"py(
for inline in (True, False):
try:
- index, cache = parse_index2(data, inline)
+ index, cache = parsers.parse_index2(data, inline)
index.slicechunktodensity(list(range(len(index))), 0.5, 262144)
index.stats()
index.findsnapshots({}, 0)
@@ -35,7 +34,7 @@
# to debug failures.
# print e
)py",
- "fuzzer", Py_file_input);
+ "fuzzer", Py_file_input);
return 0;
}