changeset 40053:8c692a6b5ad1

fuzz: new fuzzer for cext/manifest.c This is a bit messy, because lazymanifest is tightly coupled to the cpython API for performance reasons. As a result, we have to build a whole Python without pymalloc (so ASAN can help us out) and link against that. Then we have to use an embedded Python interpreter. We could manually drive the lazymanifest in C from that point, but experimentally just using PyEval_EvalCode isn't really any slower so we may as well do that and write the innermost guts of the fuzzer in Python. Leak detection is currently disabled for this fuzzer because there are a few global-lifetime things in our extensions that we more or less intentionally leak and I didn't want to take the detour to work around that for now. This should not be pushed to our repo until https://github.com/google/oss-fuzz/pull/1853 is merged, as this depends on having the Python tarball around. Differential Revision: https://phab.mercurial-scm.org/D4879
author Augie Fackler <augie@google.com>
date Thu, 06 Sep 2018 02:36:25 -0400
parents 55db747a21ad
children 801ccd8e67c0
files contrib/fuzz/Makefile contrib/fuzz/manifest.cc contrib/fuzz/manifest_corpus.py contrib/fuzz/manifest_fuzzer.options
diffstat 4 files changed, 159 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/fuzz/Makefile	Wed Oct 03 10:32:21 2018 -0700
+++ b/contrib/fuzz/Makefile	Thu Sep 06 02:36:25 2018 -0400
@@ -70,12 +70,62 @@
 	  fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \
 	  -lFuzzingEngine -o $$OUT/xdiff_fuzzer
 
+# TODO use the $OUT env var instead of hardcoding /out
+/out/sanpy/bin/python:
+	cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan  && ASAN_OPTIONS=detect_leaks=0 make && make install
+
+sanpy: /out/sanpy/bin/python
+
+manifest.o: sanpy ../../mercurial/cext/manifest.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o manifest.o ../../mercurial/cext/manifest.c
+
+charencode.o: sanpy ../../mercurial/cext/charencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o charencode.o ../../mercurial/cext/charencode.c
+
+parsers.o: sanpy ../../mercurial/cext/parsers.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o parsers.o ../../mercurial/cext/parsers.c
+
+dirs.o: sanpy ../../mercurial/cext/dirs.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o dirs.o ../../mercurial/cext/dirs.c
+
+pathencode.o: sanpy ../../mercurial/cext/pathencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o pathencode.o ../../mercurial/cext/pathencode.c
+
+revlog.o: sanpy ../../mercurial/cext/revlog.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o revlog.o ../../mercurial/cext/revlog.c
+
+manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o
+	$(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -Wno-register -Wno-macro-redefined \
+	  -I../../mercurial manifest.cc \
+	  manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \
+	  -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \
+	  -o $$OUT/manifest_fuzzer
+
+manifest_corpus.zip:
+	python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip
+
+copy_options:
+	cp *.options $$OUT
+
 clean:
 	$(RM) *.o *_fuzzer \
 	  bdiff \
 	  mpatch \
 	  xdiff
 
-oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer
+oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options
 
-.PHONY: all clean oss-fuzz
+.PHONY: all clean oss-fuzz sanpy copy_options
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/manifest.cc	Thu Sep 06 02:36:25 2018 -0400
@@ -0,0 +1,75 @@
+#include <Python.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <string>
+
+extern "C" {
+
+/* TODO: use Python 3 for this fuzzing? */
+PyMODINIT_FUNC initparsers(void);
+
+static char cpypath[8192] = "\0";
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
+{
+	const std::string subdir = "/sanpy/lib/python2.7";
+	/* HACK ALERT: we need a full Python installation built without
+	   pymalloc and with ASAN, so we dump one in
+	   $OUT/sanpy/lib/python2.7. This helps us wire that up. */
+	std::string selfpath(*argv[0]);
+	std::string pypath;
+	auto pos = selfpath.rfind("/");
+	if (pos == std::string::npos) {
+		char wd[8192];
+		getcwd(wd, 8192);
+		pypath = std::string(wd) + subdir;
+	} else {
+		pypath = selfpath.substr(0, pos) + subdir;
+	}
+	strncpy(cpypath, pypath.c_str(), pypath.size());
+	setenv("PYTHONPATH", cpypath, 1);
+	Py_SetPythonHome(cpypath);
+	return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
+{
+	Py_InitializeEx(0);
+	initparsers();
+	PyObject *mtext =
+	    PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
+	PyObject *mainmod = PyImport_AddModule("__main__");
+	PyObject *globals = PyModule_GetDict(mainmod);
+	PyObject *locals = PyDict_New();
+	PyDict_SetItemString(locals, "mdata", mtext);
+	PyCodeObject *code =
+	    (PyCodeObject *)Py_CompileString(R"py(
+from parsers import lazymanifest
+lm = lazymanifest(mdata)
+try:
+  # iterate the whole thing, which causes the code to fully parse
+  # every line in the manifest
+  list(lm.iterentries())
+  lm[b'xyzzy'] = (b'\0' * 20, 'x')
+  # do an insert, text should change
+  assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata)
+  del lm[b'xyzzy']
+  # should be back to the same
+  assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata)
+except Exception as e:
+  pass
+  # uncomment this print if you're editing this Python code
+  # to debug failures.
+  # print e
+)py",
+	                                     "fuzzer", Py_file_input);
+	PyEval_EvalCode(code, globals, locals);
+	Py_DECREF(code);
+	Py_DECREF(locals);
+	Py_DECREF(mtext);
+	Py_Finalize();
+	return 0; // Non-zero return values are reserved for future use.
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/manifest_corpus.py	Thu Sep 06 02:36:25 2018 -0400
@@ -0,0 +1,30 @@
+from __future__ import absolute_import, print_function
+
+import argparse
+import zipfile
+
+ap = argparse.ArgumentParser()
+ap.add_argument("out", metavar="some.zip", type=str, nargs=1)
+args = ap.parse_args()
+
+with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf:
+    zf.writestr("manifest_zero",
+'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a
+README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3
+hg\0b6444347c629cc058d478023905cfb83b7f5bb9d
+mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db
+mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed
+mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba
+mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a
+mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3
+mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b
+mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa
+notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb
+setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9
+tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7
+''')
+    zf.writestr("badmanifest_shorthashes",
+                "narf\0aa\nnarf2\0aaa\n")
+    zf.writestr("badmanifest_nonull",
+                "narf\0cccccccccccccccccccccccccccccccccccccccc\n"
+                "narf2aaaaaaaaaaaaaaaaaaaa\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/manifest_fuzzer.options	Thu Sep 06 02:36:25 2018 -0400
@@ -0,0 +1,2 @@
+[libfuzzer]
+detect_leaks = 0