view contrib/fuzz/fuzzutil.cc @ 40978:42f59d3f714d

delta: exclude base candidate much smaller than the target If a revision's full text is that much bigger than a base candidate full text, we no longer consider that candidate. This solves a pathological case we encountered on a very specify repository. It contains a long series of changesets with a very small manifest (one file) co-existing with others changesets using a very large manifest. Without this filtering, we ended up considering a large number of tiny full snapshots as a potential base. It resulted in very large delta (the size of the full text) and mercurial spending 99% of its time compressing these deltas. The timing of a commit moved from about 400s to about 10s (still slow, but not ridiculously slow).
author Boris Feld <boris.feld@octobus.net>
date Mon, 17 Dec 2018 10:42:19 +0100
parents a1c0873a9990
children
line wrap: on
line source

#include "fuzzutil.h"

#include <cstring>
#include <utility>

contrib::optional<two_inputs> SplitInputs(const uint8_t *Data, size_t Size)
{
	if (!Size) {
		return contrib::nullopt;
	}
	// figure out a random point in [0, Size] to split our input.
	size_t left_size = (Data[0] / 255.0) * (Size - 1);

	// Copy inputs to new allocations so if bdiff over-reads
	// AddressSanitizer can detect it.
	std::unique_ptr<char[]> left(new char[left_size]);
	std::memcpy(left.get(), Data + 1, left_size);
	// right starts at the next byte after left ends
	size_t right_size = Size - (left_size + 1);
	std::unique_ptr<char[]> right(new char[right_size]);
	std::memcpy(right.get(), Data + 1 + left_size, right_size);
	LOG(2) << "inputs are  " << left_size << " and " << right_size
	       << " bytes" << std::endl;
	two_inputs result = {std::move(right), right_size, std::move(left),
	                     left_size};
	return result;
}