fuzz: extract some common utilities and use modern C++ idioms
Alex Gaynor suggested we should probably copy the left and right sides
of diffs to new blocks so we can detect over-reads in the diffing
code, and I agree. Once I got into that, I realized we should do
things with C++17 idioms rather than keep using malloc() and
free(). This change is the result. I tried to split it more than this
and failed.
Everything still compiles and works in the oss-fuzz container, so I
think we can count on C++17 being available!
Differential Revision: https://phab.mercurial-scm.org/D3675
--- a/contrib/fuzz/Makefile Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/Makefile Sat Apr 28 22:18:50 2018 -0400
@@ -1,36 +1,42 @@
+fuzzutil.o: fuzzutil.cc fuzzutil.h
+ $$CXX $$CXXFLAGS -g -O1 -fsanitize=fuzzer-no-link,address \
+ -std=c++17 \
+ -I../../mercurial -c -o fuzzutil.o fuzzutil.cc
+
bdiff.o: ../../mercurial/bdiff.c
- clang -g -O1 -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
+ $$CC $$CFLAGS -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
../../mercurial/bdiff.c
-bdiff: bdiff.cc bdiff.o
- clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
- -I../../mercurial bdiff.cc bdiff.o -o bdiff
+bdiff: bdiff.cc bdiff.o fuzzutil.o
+ $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
+ -std=c++17 \
+ -I../../mercurial bdiff.cc bdiff.o fuzzutil.o -o bdiff
bdiff-oss-fuzz.o: ../../mercurial/bdiff.c
$$CC $$CFLAGS -c -o bdiff-oss-fuzz.o ../../mercurial/bdiff.c
-bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o
- $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial bdiff.cc \
- bdiff-oss-fuzz.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
+bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o fuzzutil.o
+ $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial bdiff.cc \
+ bdiff-oss-fuzz.o fuzzutil.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h
- clang -g -O1 -fsanitize=fuzzer-no-link,address -c \
+ $$CC -g -O1 -fsanitize=fuzzer-no-link,address -c \
-o $@ \
$<
-xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o
- clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
+xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o fuzzutil.o
+ $$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
-I../../mercurial xdiff.cc \
- xdiffi.o xprepare.o xutils.o -o xdiff
+ xdiffi.o xprepare.o xutils.o fuzzutil.o -o xdiff
fuzz-x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h
$$CC $$CFLAGS -c \
-o $@ \
$<
-xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o
- $$CXX $$CXXFLAGS -std=c++11 -I../../mercurial xdiff.cc \
- fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o \
+xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o
+ $$CXX $$CXXFLAGS -std=c++17 -I../../mercurial xdiff.cc \
+ fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o \
-lFuzzingEngine -o $$OUT/xdiff_fuzzer
all: bdiff xdiff
--- a/contrib/fuzz/bdiff.cc Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/bdiff.cc Sat Apr 28 22:18:50 2018 -0400
@@ -6,30 +6,25 @@
* This software may be used and distributed according to the terms of
* the GNU General Public License, incorporated herein by reference.
*/
+#include <memory>
#include <stdlib.h>
+#include "fuzzutil.h"
+
extern "C" {
#include "bdiff.h"
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
{
- if (!Size) {
+ auto maybe_inputs = SplitInputs(Data, Size);
+ if (!maybe_inputs) {
return 0;
}
- // figure out a random point in [0, Size] to split our input.
- size_t split = Data[0] / 255.0 * Size;
-
- // left input to diff is data[1:split]
- const uint8_t *left = Data + 1;
- // which has len split-1
- size_t left_size = split - 1;
- // right starts at the next byte after left ends
- const uint8_t *right = left + left_size;
- size_t right_size = Size - split;
+ auto inputs = std::move(maybe_inputs.value());
struct bdiff_line *a, *b;
- int an = bdiff_splitlines((const char *)left, split - 1, &a);
- int bn = bdiff_splitlines((const char *)right, right_size, &b);
+ int an = bdiff_splitlines(inputs.left.get(), inputs.left_size, &a);
+ int bn = bdiff_splitlines(inputs.right.get(), inputs.right_size, &b);
struct bdiff_hunk l;
bdiff_diff(a, an, b, bn, &l);
free(a);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/fuzzutil.cc Sat Apr 28 22:18:50 2018 -0400
@@ -0,0 +1,26 @@
+#include "fuzzutil.h"
+
+#include <utility>
+
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size)
+{
+ if (!Size) {
+ return std::nullopt;
+ }
+ // figure out a random point in [0, Size] to split our input.
+ size_t left_size = (Data[0] / 255.0) * (Size - 1);
+
+ // Copy inputs to new allocations so if bdiff over-reads
+ // AddressSanitizer can detect it.
+ std::unique_ptr<char[]> left(new char[left_size]);
+ memcpy(left.get(), Data + 1, left_size);
+ // right starts at the next byte after left ends
+ size_t right_size = Size - (left_size + 1);
+ std::unique_ptr<char[]> right(new char[right_size]);
+ memcpy(right.get(), Data + 1 + left_size, right_size);
+ LOG(2) << "inputs are " << left_size << " and " << right_size
+ << " bytes" << std::endl;
+ two_inputs result = {std::move(right), right_size, std::move(left),
+ left_size};
+ return result;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/fuzzutil.h Sat Apr 28 22:18:50 2018 -0400
@@ -0,0 +1,24 @@
+#ifndef CONTRIB_FUZZ_FUZZUTIL_H
+#define CONTRIB_FUZZ_FUZZUTIL_H
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <stdint.h>
+
+/* set DEBUG to 1 for a few debugging prints, or 2 for a lot */
+#define DEBUG 0
+#define LOG(level) \
+ if (level <= DEBUG) \
+ std::cout
+
+struct two_inputs {
+ std::unique_ptr<char[]> right;
+ size_t right_size;
+ std::unique_ptr<char[]> left;
+ size_t left_size;
+};
+
+/* Split a non-zero-length input into two inputs. */
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size);
+
+#endif /* CONTRIB_FUZZ_FUZZUTIL_H */
--- a/contrib/fuzz/xdiff.cc Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/xdiff.cc Sat Apr 28 22:18:50 2018 -0400
@@ -10,6 +10,8 @@
#include <inttypes.h>
#include <stdlib.h>
+#include "fuzzutil.h"
+
extern "C" {
int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
@@ -20,21 +22,17 @@
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
{
- if (!Size) {
+ auto maybe_inputs = SplitInputs(Data, Size);
+ if (!maybe_inputs) {
return 0;
}
- // figure out a random point in [0, Size] to split our input.
- size_t split = Data[0] / 255.0 * Size;
-
+ auto inputs = std::move(maybe_inputs.value());
mmfile_t a, b;
- // `a` input to diff is data[1:split]
- a.ptr = (char *)Data + 1;
- // which has len split-1
- a.size = split - 1;
- // `b` starts at the next byte after `a` ends
- b.ptr = a.ptr + a.size;
- b.size = Size - split;
+ a.ptr = inputs.left.get();
+ a.size = inputs.left_size;
+ b.ptr = inputs.right.get();
+ b.size = inputs.right_size;
xpparam_t xpp = {
XDF_INDENT_HEURISTIC, /* flags */
};