annotate contrib/fuzz/fncache.cc @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents 8766728dbce6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
43152
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
1 #include <Python.h>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
2 #include <assert.h>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
3 #include <stdlib.h>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
4 #include <unistd.h>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
5
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
6 #include "pyutil.h"
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
7
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
8 #include <iostream>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
9 #include <string>
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
10
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
11 extern "C" {
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
12
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
13 static PYCODETYPE *code;
43152
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
14
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
15 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
16 {
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
17 contrib::initpy(*argv[0]);
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
18 code = (PYCODETYPE *)Py_CompileString(R"py(
43152
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
19 try:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
20 for fn in (
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
21 parsers.isasciistr,
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
22 parsers.asciilower,
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
23 parsers.asciiupper,
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
24 parsers.encodedir,
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
25 parsers.pathencode,
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
26 parsers.lowerencode,
43152
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
27 ):
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
28 try:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
29 fn(data)
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
30 except UnicodeDecodeError:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
31 pass # some functions emit this exception
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
32 except AttributeError:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
33 # pathencode needs hashlib, which fails to import because the time
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
34 # module fails to import. We should try and fix that some day, but
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
35 # for now we at least get coverage on non-hashencoded codepaths.
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
36 if fn != pathencode:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
37 raise
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
38 # uncomment this for debugging exceptions
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
39 # except Exception as e:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
40 # raise Exception('%r: %r' % (fn, e))
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
41 except Exception as e:
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
42 pass
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
43 # uncomment this print if you're editing this Python code
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
44 # to debug failures.
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
45 # print(e)
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
46 )py",
43859
8766728dbce6 fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents: 43152
diff changeset
47 "fuzzer", Py_file_input);
43152
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
48 if (!code) {
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
49 std::cerr << "failed to compile Python code!" << std::endl;
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
50 }
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
51 return 0;
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
52 }
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
53
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
54 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
55 {
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
56 PyObject *mtext =
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
57 PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
58 PyObject *locals = PyDict_New();
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
59 PyDict_SetItemString(locals, "data", mtext);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
60 PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
61 if (!res) {
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
62 PyErr_Print();
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
63 }
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
64 Py_XDECREF(res);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
65 Py_DECREF(locals);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
66 Py_DECREF(mtext);
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
67 return 0; // Non-zero return values are reserved for future use.
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
68 }
b37dd26935ee fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff changeset
69 }