view mercurial/pure/bdiff.py @ 43813:5a9e2ae9899b

fuzz: use a more standard approach to allow local builds of fuzzers This is taken from the (improved since we started fuzzing) guide on ideal integrations. Rather than have our own wonky targets for building outside the fuzzer universe, we have a driver program we carry along and use when we're not using LibFuzzer. This will let us jettison a fair amount of goo. contrib/fuzz/standalone_fuzz_target_runner.cc is https://github.com/google/oss-fuzz/ file projects/example/my-api-repo/standalone from git revision c4579d9358a73ea5dbcc99cb985de1f2bf76dcf7, reformatted with out clang-format settings and a no-check-code comment added. It allows running a single test input through a fuzzer, rather than performing ongoing fuzzing as libfuzzer would. contrib/fuzz/FuzzedDataProvider.h is https://github.com/llvm/llvm-project/ file /compiler-rt/include/fuzzer/FuzzedDataProvider.h from git revision a44ef027ebca1598892ea9b104d6189aeb3bc2f0, reformatted with our clang-format settings and a no-check-code comment added. We can discard this if we instead want to add an hghave check for a new enough llvm that includes FuzzedDataProvder.h in the fuzzer headers. Differential Revision: https://phab.mercurial-scm.org/D7564
author Augie Fackler <augie@google.com>
date Fri, 06 Dec 2019 15:19:47 -0500
parents 687b865b95ad
children 68aedad4c11c
line wrap: on
line source

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import difflib
import re
import struct


def splitnewlines(text):
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + b'\n' for l in text.split(b'\n')]
    if lines:
        if lines[-1] == b'\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines


def _normalizeblocks(a, b, blocks):
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (
                a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        elif b1end == b2:
            while (
                b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift
    r.append(prev)
    return r


def bdiff(a, b):
    a = bytes(a).splitlines(True)
    b = bytes(b).splitlines(True)

    if not a:
        s = b"".join(b)
        return s and (struct.pack(b">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a:
        p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = b"".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return b"".join(bin)


def blocks(a, b):
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]


def fixws(text, allws):
    if allws:
        text = re.sub(b'[ \t\r]+', b'', text)
    else:
        text = re.sub(b'[ \t\r]+', b' ', text)
        text = text.replace(b' \n', b'\n')
    return text