changeset 38206:fa0ddd5e8fff

fuzz: extract some common utilities and use modern C++ idioms Alex Gaynor suggested we should probably copy the left and right sides of diffs to new blocks so we can detect over-reads in the diffing code, and I agree. Once I got into that, I realized we should do things with C++17 idioms rather than keep using malloc() and free(). This change is the result. I tried to split it more than this and failed. Everything still compiles and works in the oss-fuzz container, so I think we can count on C++17 being available! Differential Revision: https://phab.mercurial-scm.org/D3675
author Augie Fackler <augie@google.com>
date Sat, 28 Apr 2018 22:18:50 -0400
parents 24cc2969abae
children 36d55f90e2a3
files contrib/fuzz/Makefile contrib/fuzz/bdiff.cc contrib/fuzz/fuzzutil.cc contrib/fuzz/fuzzutil.h contrib/fuzz/xdiff.cc
diffstat 5 files changed, 87 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/fuzz/Makefile	Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/Makefile	Sat Apr 28 22:18:50 2018 -0400
@@ -1,36 +1,42 @@
+fuzzutil.o: fuzzutil.cc fuzzutil.h
+	$$CXX $$CXXFLAGS -g -O1 -fsanitize=fuzzer-no-link,address \
+	  -std=c++17 \
+	  -I../../mercurial -c -o fuzzutil.o fuzzutil.cc
+
 bdiff.o: ../../mercurial/bdiff.c
-	clang -g -O1 -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
+	$$CC $$CFLAGS -fsanitize=fuzzer-no-link,address -c -o bdiff.o \
 	  ../../mercurial/bdiff.c
 
-bdiff: bdiff.cc bdiff.o
-	clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
-	  -I../../mercurial bdiff.cc bdiff.o -o bdiff
+bdiff: bdiff.cc bdiff.o fuzzutil.o
+	$$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
+	  -std=c++17 \
+	  -I../../mercurial bdiff.cc bdiff.o fuzzutil.o -o bdiff
 
 bdiff-oss-fuzz.o: ../../mercurial/bdiff.c
 	$$CC $$CFLAGS -c -o bdiff-oss-fuzz.o ../../mercurial/bdiff.c
 
-bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o
-	$$CXX $$CXXFLAGS -std=c++11 -I../../mercurial bdiff.cc \
-	  bdiff-oss-fuzz.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
+bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o fuzzutil.o
+	$$CXX $$CXXFLAGS -std=c++17 -I../../mercurial bdiff.cc \
+	  bdiff-oss-fuzz.o fuzzutil.o -lFuzzingEngine -o $$OUT/bdiff_fuzzer
 
 x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h
-	clang -g -O1 -fsanitize=fuzzer-no-link,address -c \
+	$$CC -g -O1 -fsanitize=fuzzer-no-link,address -c \
 	  -o $@ \
 	  $<
 
-xdiff: xdiff.cc xdiffi.o xprepare.o  xutils.o
-	clang -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
+xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o fuzzutil.o
+	$$CXX $$CXXFLAGS -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \
 	  -I../../mercurial xdiff.cc \
-	  xdiffi.o xprepare.o xutils.o -o xdiff
+	  xdiffi.o xprepare.o xutils.o fuzzutil.o -o xdiff
 
 fuzz-x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h
 	$$CC $$CFLAGS -c \
 	  -o $@ \
 	  $<
 
-xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o  fuzz-xutils.o
-	$$CXX $$CXXFLAGS -std=c++11 -I../../mercurial xdiff.cc \
-	  fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o \
+xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o
+	$$CXX $$CXXFLAGS -std=c++17 -I../../mercurial xdiff.cc \
+	  fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil.o \
 	  -lFuzzingEngine -o $$OUT/xdiff_fuzzer
 
 all: bdiff xdiff
--- a/contrib/fuzz/bdiff.cc	Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/bdiff.cc	Sat Apr 28 22:18:50 2018 -0400
@@ -6,30 +6,25 @@
  * This software may be used and distributed according to the terms of
  * the GNU General Public License, incorporated herein by reference.
  */
+#include <memory>
 #include <stdlib.h>
 
+#include "fuzzutil.h"
+
 extern "C" {
 #include "bdiff.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
 {
-	if (!Size) {
+	auto maybe_inputs = SplitInputs(Data, Size);
+	if (!maybe_inputs) {
 		return 0;
 	}
-	// figure out a random point in [0, Size] to split our input.
-	size_t split = Data[0] / 255.0 * Size;
-
-	// left input to diff is data[1:split]
-	const uint8_t *left = Data + 1;
-	// which has len split-1
-	size_t left_size = split - 1;
-	// right starts at the next byte after left ends
-	const uint8_t *right = left + left_size;
-	size_t right_size = Size - split;
+	auto inputs = std::move(maybe_inputs.value());
 
 	struct bdiff_line *a, *b;
-	int an = bdiff_splitlines((const char *)left, split - 1, &a);
-	int bn = bdiff_splitlines((const char *)right, right_size, &b);
+	int an = bdiff_splitlines(inputs.left.get(), inputs.left_size, &a);
+	int bn = bdiff_splitlines(inputs.right.get(), inputs.right_size, &b);
 	struct bdiff_hunk l;
 	bdiff_diff(a, an, b, bn, &l);
 	free(a);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/fuzzutil.cc	Sat Apr 28 22:18:50 2018 -0400
@@ -0,0 +1,26 @@
+#include "fuzzutil.h"
+
+#include <utility>
+
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size)
+{
+	if (!Size) {
+		return std::nullopt;
+	}
+	// figure out a random point in [0, Size] to split our input.
+	size_t left_size = (Data[0] / 255.0) * (Size - 1);
+
+	// Copy inputs to new allocations so if bdiff over-reads
+	// AddressSanitizer can detect it.
+	std::unique_ptr<char[]> left(new char[left_size]);
+	memcpy(left.get(), Data + 1, left_size);
+	// right starts at the next byte after left ends
+	size_t right_size = Size - (left_size + 1);
+	std::unique_ptr<char[]> right(new char[right_size]);
+	memcpy(right.get(), Data + 1 + left_size, right_size);
+	LOG(2) << "inputs are  " << left_size << " and " << right_size
+	       << " bytes" << std::endl;
+	two_inputs result = {std::move(right), right_size, std::move(left),
+	                     left_size};
+	return result;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/fuzz/fuzzutil.h	Sat Apr 28 22:18:50 2018 -0400
@@ -0,0 +1,24 @@
+#ifndef CONTRIB_FUZZ_FUZZUTIL_H
+#define CONTRIB_FUZZ_FUZZUTIL_H
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <stdint.h>
+
+/* set DEBUG to 1 for a few debugging prints, or 2 for a lot */
+#define DEBUG 0
+#define LOG(level)                                                             \
+	if (level <= DEBUG)                                                    \
+	std::cout
+
+struct two_inputs {
+	std::unique_ptr<char[]> right;
+	size_t right_size;
+	std::unique_ptr<char[]> left;
+	size_t left_size;
+};
+
+/* Split a non-zero-length input into two inputs. */
+std::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size);
+
+#endif /* CONTRIB_FUZZ_FUZZUTIL_H */
--- a/contrib/fuzz/xdiff.cc	Sat Apr 28 22:13:33 2018 -0400
+++ b/contrib/fuzz/xdiff.cc	Sat Apr 28 22:18:50 2018 -0400
@@ -10,6 +10,8 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+#include "fuzzutil.h"
+
 extern "C" {
 
 int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
@@ -20,21 +22,17 @@
 
 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
 {
-	if (!Size) {
+	auto maybe_inputs = SplitInputs(Data, Size);
+	if (!maybe_inputs) {
 		return 0;
 	}
-	// figure out a random point in [0, Size] to split our input.
-	size_t split = Data[0] / 255.0 * Size;
-
+	auto inputs = std::move(maybe_inputs.value());
 	mmfile_t a, b;
 
-	// `a` input to diff is data[1:split]
-	a.ptr = (char *)Data + 1;
-	// which has len split-1
-	a.size = split - 1;
-	// `b` starts at the next byte after `a` ends
-	b.ptr = a.ptr + a.size;
-	b.size = Size - split;
+	a.ptr = inputs.left.get();
+	a.size = inputs.left_size;
+	b.ptr = inputs.right.get();
+	b.size = inputs.right_size;
 	xpparam_t xpp = {
 	    XDF_INDENT_HEURISTIC, /* flags */
 	};