Mercurial > hg
annotate contrib/fuzz/fncache.cc @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 8766728dbce6 |
children |
rev | line source |
---|---|
43152
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
1 #include <Python.h> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
2 #include <assert.h> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
3 #include <stdlib.h> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
4 #include <unistd.h> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
5 |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
6 #include "pyutil.h" |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
7 |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
8 #include <iostream> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
9 #include <string> |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
10 |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
11 extern "C" { |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
12 |
43859
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
13 static PYCODETYPE *code; |
43152
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
14 |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
15 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
16 { |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
17 contrib::initpy(*argv[0]); |
43859
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
18 code = (PYCODETYPE *)Py_CompileString(R"py( |
43152
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
19 try: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
20 for fn in ( |
43859
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
21 parsers.isasciistr, |
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
22 parsers.asciilower, |
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
23 parsers.asciiupper, |
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
24 parsers.encodedir, |
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
25 parsers.pathencode, |
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
26 parsers.lowerencode, |
43152
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
27 ): |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
28 try: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
29 fn(data) |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
30 except UnicodeDecodeError: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
31 pass # some functions emit this exception |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
32 except AttributeError: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
33 # pathencode needs hashlib, which fails to import because the time |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
34 # module fails to import. We should try and fix that some day, but |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
35 # for now we at least get coverage on non-hashencoded codepaths. |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
36 if fn != pathencode: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
37 raise |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
38 # uncomment this for debugging exceptions |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
39 # except Exception as e: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
40 # raise Exception('%r: %r' % (fn, e)) |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
41 except Exception as e: |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
42 pass |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
43 # uncomment this print if you're editing this Python code |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
44 # to debug failures. |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
45 # print(e) |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
46 )py", |
43859
8766728dbce6
fuzz: add support for fuzzing under either Python 2 or 3
Augie Fackler <augie@google.com>
parents:
43152
diff
changeset
|
47 "fuzzer", Py_file_input); |
43152
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
48 if (!code) { |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
49 std::cerr << "failed to compile Python code!" << std::endl; |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
50 } |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
51 return 0; |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
52 } |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
53 |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
54 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
55 { |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
56 PyObject *mtext = |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
57 PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
58 PyObject *locals = PyDict_New(); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
59 PyDict_SetItemString(locals, "data", mtext); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
60 PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
61 if (!res) { |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
62 PyErr_Print(); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
63 } |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
64 Py_XDECREF(res); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
65 Py_DECREF(locals); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
66 Py_DECREF(mtext); |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
67 return 0; // Non-zero return values are reserved for future use. |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
68 } |
b37dd26935ee
fuzz: new fuzzer for fncache-related functions
Augie Fackler <augie@google.com>
parents:
diff
changeset
|
69 } |