Mercurial > hg-stable
changeset 40054:8c692a6b5ad1
fuzz: new fuzzer for cext/manifest.c
This is a bit messy, because lazymanifest is tightly coupled to the
cpython API for performance reasons. As a result, we have to build a
whole Python without pymalloc (so ASAN can help us out) and link
against that. Then we have to use an embedded Python interpreter. We
could manually drive the lazymanifest in C from that point, but
experimentally just using PyEval_EvalCode isn't really any slower so
we may as well do that and write the innermost guts of the fuzzer in
Python.
Leak detection is currently disabled for this fuzzer because there are
a few global-lifetime things in our extensions that we more or less
intentionally leak and I didn't want to take the detour to work around
that for now.
This should not be pushed to our repo until
https://github.com/google/oss-fuzz/pull/1853 is merged, as this
depends on having the Python tarball around.
Differential Revision: https://phab.mercurial-scm.org/D4879
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 06 Sep 2018 02:36:25 -0400 |
parents | 55db747a21ad |
children | 801ccd8e67c0 |
files | contrib/fuzz/Makefile contrib/fuzz/manifest.cc contrib/fuzz/manifest_corpus.py contrib/fuzz/manifest_fuzzer.options |
diffstat | 4 files changed, 159 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/fuzz/Makefile Wed Oct 03 10:32:21 2018 -0700 +++ b/contrib/fuzz/Makefile Thu Sep 06 02:36:25 2018 -0400 @@ -70,12 +70,62 @@ fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \ -lFuzzingEngine -o $$OUT/xdiff_fuzzer +# TODO use the $OUT env var instead of hardcoding /out +/out/sanpy/bin/python: + cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan && ASAN_OPTIONS=detect_leaks=0 make && make install + +sanpy: /out/sanpy/bin/python + +manifest.o: sanpy ../../mercurial/cext/manifest.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o manifest.o ../../mercurial/cext/manifest.c + +charencode.o: sanpy ../../mercurial/cext/charencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o charencode.o ../../mercurial/cext/charencode.c + +parsers.o: sanpy ../../mercurial/cext/parsers.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o parsers.o ../../mercurial/cext/parsers.c + +dirs.o: sanpy ../../mercurial/cext/dirs.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o dirs.o ../../mercurial/cext/dirs.c + +pathencode.o: sanpy ../../mercurial/cext/pathencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o pathencode.o ../../mercurial/cext/pathencode.c + +revlog.o: sanpy ../../mercurial/cext/revlog.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o revlog.o ../../mercurial/cext/revlog.c + +manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o + $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -Wno-register -Wno-macro-redefined \ + -I../../mercurial manifest.cc \ + manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \ + -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + -o $$OUT/manifest_fuzzer + +manifest_corpus.zip: + python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip + +copy_options: + cp *.options $$OUT + clean: $(RM) *.o *_fuzzer \ bdiff \ mpatch \ xdiff -oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer +oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options -.PHONY: all clean oss-fuzz +.PHONY: all clean oss-fuzz sanpy copy_options
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/manifest.cc Thu Sep 06 02:36:25 2018 -0400 @@ -0,0 +1,75 @@ +#include <Python.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> + +#include <string> + +extern "C" { + +/* TODO: use Python 3 for this fuzzing? */ +PyMODINIT_FUNC initparsers(void); + +static char cpypath[8192] = "\0"; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + const std::string subdir = "/sanpy/lib/python2.7"; + /* HACK ALERT: we need a full Python installation built without + pymalloc and with ASAN, so we dump one in + $OUT/sanpy/lib/python2.7. This helps us wire that up. */ + std::string selfpath(*argv[0]); + std::string pypath; + auto pos = selfpath.rfind("/"); + if (pos == std::string::npos) { + char wd[8192]; + getcwd(wd, 8192); + pypath = std::string(wd) + subdir; + } else { + pypath = selfpath.substr(0, pos) + subdir; + } + strncpy(cpypath, pypath.c_str(), pypath.size()); + setenv("PYTHONPATH", cpypath, 1); + Py_SetPythonHome(cpypath); + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) +{ + Py_InitializeEx(0); + initparsers(); + PyObject *mtext = + PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size); + PyObject *mainmod = PyImport_AddModule("__main__"); + PyObject *globals = PyModule_GetDict(mainmod); + PyObject *locals = PyDict_New(); + PyDict_SetItemString(locals, "mdata", mtext); + PyCodeObject *code = + (PyCodeObject *)Py_CompileString(R"py( +from parsers import lazymanifest +lm = lazymanifest(mdata) +try: + # iterate the whole thing, which causes the code to fully parse + # every line in the manifest + list(lm.iterentries()) + lm[b'xyzzy'] = (b'\0' * 20, 'x') + # do an insert, text should change + assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata) + del lm[b'xyzzy'] + # should be back to the same + assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata) +except Exception as e: + pass + # uncomment this print if you're editing this Python code + # to debug failures. + # print e +)py", + "fuzzer", Py_file_input); + PyEval_EvalCode(code, globals, locals); + Py_DECREF(code); + Py_DECREF(locals); + Py_DECREF(mtext); + Py_Finalize(); + return 0; // Non-zero return values are reserved for future use. +} +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/manifest_corpus.py Thu Sep 06 02:36:25 2018 -0400 @@ -0,0 +1,30 @@ +from __future__ import absolute_import, print_function + +import argparse +import zipfile + +ap = argparse.ArgumentParser() +ap.add_argument("out", metavar="some.zip", type=str, nargs=1) +args = ap.parse_args() + +with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: + zf.writestr("manifest_zero", +'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a +README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3 +hg\0b6444347c629cc058d478023905cfb83b7f5bb9d +mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db +mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed +mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba +mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a +mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3 +mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b +mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa +notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb +setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9 +tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7 +''') + zf.writestr("badmanifest_shorthashes", + "narf\0aa\nnarf2\0aaa\n") + zf.writestr("badmanifest_nonull", + "narf\0cccccccccccccccccccccccccccccccccccccccc\n" + "narf2aaaaaaaaaaaaaaaaaaaa\n")