Mercurial > hg
changeset 38173:fa0ddd5e8fff
fuzz: extract some common utilities and use modern C++ idioms
Alex Gaynor suggested we should probably copy the left and right sides
of diffs to new blocks so we can detect over-reads in the diffing
code, and I agree. Once I got into that, I realized we should do
things with C++17 idioms rather than keep using malloc() and
free(). This change is the result. I tried to split it more than this
and failed.
Everything still compiles and works in the oss-fuzz container, so I
think we can count on C++17 being available!
Differential Revision: https://phab.mercurial-scm.org/D3675
author | Augie Fackler <augie@google.com> |
---|---|
date | Sat, 28 Apr 2018 22:18:50 -0400 |
parents | 24cc2969abae |
children | 36d55f90e2a3 |
files | contrib/fuzz/Makefile contrib/fuzz/bdiff.cc contrib/fuzz/fuzzutil.cc contrib/fuzz/fuzzutil.h contrib/fuzz/xdiff.cc |
diffstat | 5 files changed, 87 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/fuzz/Makefile Sat Apr 28 22:13:33 2018 -0400 +++ b/contrib/fuzz/Makefile Sat Apr 28 22:18:50 2018 -0400 @@ -1,36 +1,42 @@ +fuzzutil.o: fuzzutil.cc fuzzutil.h + $$CXX $$CXXFLAGS -g -O1 -fsanitize=fuzzer-no-link,address \ + -std=c++17 \ + -I../../mercurial -c -o fuzzutil.o fuzzutil.cc + bdiff.o: ../../mercurial/bdiff.c - clang -g -O1 -fsanitize=fuzzer-no-link,address -c -o bdiff.o \ + $$CC $$CFLAGS -fsanitize=fuzzer-no-link,address -c -o bdiff.o \ ../../mercurial/bdiff.c -bdiff: bdiff.cc bdiff.o - clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ - -I../../mercurial bdiff.cc bdiff.o -o bdiff +bdiff: bdiff.cc bdiff.o fuzzutil.o + $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ + -std=c++17 \ + -I../../mercurial bdiff.cc bdiff.o fuzzutil.o -o bdiff bdiff-oss-fuzz.o: ../../mercurial/bdiff.c $$CC $$CFLAGS -c -o bdiff-oss-fuzz.o ../../mercurial/bdiff.c -bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o - $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial bdiff.cc \ - bdiff-oss-fuzz.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer +bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o fuzzutil.o + $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial bdiff.cc \ + bdiff-oss-fuzz.o fuzzutil.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h - clang -g -O1 -fsanitize=fuzzer-no-link,address -c \ + $$CC -g -O1 -fsanitize=fuzzer-no-link,address -c \ -o $@ \ $< -xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o - clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ +xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o fuzzutil.o + $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ -I../../mercurial xdiff.cc \ - xdiffi.o xprepare.o xutils.o -o xdiff + xdiffi.o xprepare.o xutils.o fuzzutil.o -o xdiff fuzz-x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h $$CC $$CFLAGS -c \ -o $@ \ $< -xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o - $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial xdiff.cc \ - fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o \ +xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o + $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial xdiff.cc \ + fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o \ -lFuzzingEngine -o $$OUT/xdiff_fuzzer all: bdiff xdiff
--- a/contrib/fuzz/bdiff.cc Sat Apr 28 22:13:33 2018 -0400 +++ b/contrib/fuzz/bdiff.cc Sat Apr 28 22:18:50 2018 -0400 @@ -6,30 +6,25 @@ * This software may be used and distributed according to the terms of * the GNU General Public License, incorporated herein by reference. */ +#include <memory> #include <stdlib.h> +#include "fuzzutil.h" + extern "C" { #include "bdiff.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (!Size) { + auto maybe_inputs = SplitInputs(Data, Size); + if (!maybe_inputs) { return 0; } - // figure out a random point in [0, Size] to split our input. - size_t split = Data[0] / 255.0 * Size; - - // left input to diff is data[1:split] - const uint8_t *left = Data + 1; - // which has len split-1 - size_t left_size = split - 1; - // right starts at the next byte after left ends - const uint8_t *right = left + left_size; - size_t right_size = Size - split; + auto inputs = std::move(maybe_inputs.value()); struct bdiff_line *a, *b; - int an = bdiff_splitlines((const char *)left, split - 1, &a); - int bn = bdiff_splitlines((const char *)right, right_size, &b); + int an = bdiff_splitlines(inputs.left.get(), inputs.left_size, &a); + int bn = bdiff_splitlines(inputs.right.get(), inputs.right_size, &b); struct bdiff_hunk l; bdiff_diff(a, an, b, bn, &l); free(a);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/fuzzutil.cc Sat Apr 28 22:18:50 2018 -0400 @@ -0,0 +1,26 @@ +#include "fuzzutil.h" + +#include <utility> + +std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size) +{ + if (!Size) { + return std::nullopt; + } + // figure out a random point in [0, Size] to split our input. + size_t left_size = (Data[0] / 255.0) * (Size - 1); + + // Copy inputs to new allocations so if bdiff over-reads + // AddressSanitizer can detect it. + std::unique_ptr<char[]> left(new char[left_size]); + memcpy(left.get(), Data + 1, left_size); + // right starts at the next byte after left ends + size_t right_size = Size - (left_size + 1); + std::unique_ptr<char[]> right(new char[right_size]); + memcpy(right.get(), Data + 1 + left_size, right_size); + LOG(2) << "inputs are " << left_size << " and " << right_size + << " bytes" << std::endl; + two_inputs result = {std::move(right), right_size, std::move(left), + left_size}; + return result; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/fuzzutil.h Sat Apr 28 22:18:50 2018 -0400 @@ -0,0 +1,24 @@ +#ifndef CONTRIB_FUZZ_FUZZUTIL_H +#define CONTRIB_FUZZ_FUZZUTIL_H +#include <iostream> +#include <memory> +#include <optional> +#include <stdint.h> + +/* set DEBUG to 1 for a few debugging prints, or 2 for a lot */ +#define DEBUG 0 +#define LOG(level) \ + if (level <= DEBUG) \ + std::cout + +struct two_inputs { + std::unique_ptr<char[]> right; + size_t right_size; + std::unique_ptr<char[]> left; + size_t left_size; +}; + +/* Split a non-zero-length input into two inputs. */ +std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size); + +#endif /* CONTRIB_FUZZ_FUZZUTIL_H */
--- a/contrib/fuzz/xdiff.cc Sat Apr 28 22:13:33 2018 -0400 +++ b/contrib/fuzz/xdiff.cc Sat Apr 28 22:18:50 2018 -0400 @@ -10,6 +10,8 @@ #include <inttypes.h> #include <stdlib.h> +#include "fuzzutil.h" + extern "C" { int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) @@ -20,21 +22,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (!Size) { + auto maybe_inputs = SplitInputs(Data, Size); + if (!maybe_inputs) { return 0; } - // figure out a random point in [0, Size] to split our input. - size_t split = Data[0] / 255.0 * Size; - + auto inputs = std::move(maybe_inputs.value()); mmfile_t a, b; - // `a` input to diff is data[1:split] - a.ptr = (char *)Data + 1; - // which has len split-1 - a.size = split - 1; - // `b` starts at the next byte after `a` ends - b.ptr = a.ptr + a.size; - b.size = Size - split; + a.ptr = inputs.left.get(); + a.size = inputs.left_size; + b.ptr = inputs.right.get(); + b.size = inputs.right_size; xpparam_t xpp = { XDF_INDENT_HEURISTIC, /* flags */ };