Mercurial > hg
view contrib/fuzz/xdiff.cc @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | d37658efbec2 |
children |
line wrap: on
line source
/* * xdiff.cc - fuzzer harness for thirdparty/xdiff * * Copyright 2018, Google Inc. * * This software may be used and distributed according to the terms of * the GNU General Public License, incorporated herein by reference. */ #include "thirdparty/xdiff/xdiff.h" #include <inttypes.h> #include <stdlib.h> #include "FuzzedDataProvider.h" extern "C" { int LLVMFuzzerInitialize(int *argc, char ***argv) { return 0; } int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) { // TODO: probably also test returning -1 from this when things break? return 0; } int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { // Don't allow fuzzer inputs larger than 100k, since we'll just bog // down and not accomplish much. if (Size > 100000) { return 0; } FuzzedDataProvider provider(Data, Size); std::string left = provider.ConsumeRandomLengthString(Size); std::string right = provider.ConsumeRemainingBytesAsString(); mmfile_t a, b; a.ptr = (char *)left.c_str(); a.size = left.size(); b.ptr = (char *)right.c_str(); b.size = right.size(); xpparam_t xpp = { XDF_INDENT_HEURISTIC, /* flags */ }; xdemitconf_t xecfg = { XDL_EMIT_BDIFFHUNK, /* flags */ hunk_consumer, /* hunk_consume_func */ }; xdemitcb_t ecb = { NULL, /* priv */ }; xdl_diff(&a, &b, &xpp, &xecfg, &ecb); return 0; // Non-zero return values are reserved for future use. } } // extern "C"