view contrib/fuzz/manifest.cc @ 43394:d359dfc15aca stable

fsmonitor: handle unicode keys in tuples In Python 3, keys in the bset tuple are typically str, not bytes. PyBytes_AsString() would return NULL. But we weren't checking the return value and this would lead to a segfault. This commit makes the code type and Python version aware. The Python version specific code is to allow us to utilize a modern API for converting str -> char* without having to allocate an extra PyObject. FWIW I wanted to assume that keys were always str. However, there appear to be some bytes keys in some cases. I haven't debugged this further. Differential Revision: https://phab.mercurial-scm.org/D7210
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 02 Nov 2019 14:17:48 -0700
parents d60bd5c71cbb
children 8766728dbce6
line wrap: on
line source

#include <Python.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>

#include "pyutil.h"

#include <string>

extern "C" {

static PyCodeObject *code;

extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
	contrib::initpy(*argv[0]);
	code = (PyCodeObject *)Py_CompileString(R"py(
from parsers import lazymanifest
try:
  lm = lazymanifest(mdata)
  # iterate the whole thing, which causes the code to fully parse
  # every line in the manifest
  for e, _, _ in lm.iterentries():
      # also exercise __getitem__ et al
      lm[e]
      e in lm
      (e + 'nope') in lm
  lm[b'xyzzy'] = (b'\0' * 20, 'x')
  # do an insert, text should change
  assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata)
  cloned = lm.filtercopy(lambda x: x != 'xyzzy')
  assert cloned.text() == mdata, 'cloned text should equal mdata'
  cloned.diff(lm)
  del lm[b'xyzzy']
  cloned.diff(lm)
  # should be back to the same
  assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata)
except Exception as e:
  pass
  # uncomment this print if you're editing this Python code
  # to debug failures.
  # print e
)py",
	                                        "fuzzer", Py_file_input);
	return 0;
}

int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
{
	// Don't allow fuzzer inputs larger than 100k, since we'll just bog
	// down and not accomplish much.
	if (Size > 100000) {
		return 0;
	}
	PyObject *mtext =
	    PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
	PyObject *locals = PyDict_New();
	PyDict_SetItemString(locals, "mdata", mtext);
	PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals);
	if (!res) {
		PyErr_Print();
	}
	Py_XDECREF(res);
	Py_DECREF(locals);
	Py_DECREF(mtext);
	return 0; // Non-zero return values are reserved for future use.
}
}