view mercurial/pure/bdiff.py @ 32064:f82cdd5ee57e stable

tests: work around flaky test failure on FreeBSD and relatives test-http-bad-server has a flaky output case that reproduces fairly often (20% or more of runs on my Mac) on BSD-family OSes. So that tests can be green for the 4.2 release, let's more or less give up on this output, and plan to pursue it in the future.
author Augie Fackler <augie@google.com>
date Sat, 22 Apr 2017 12:24:47 -0400
parents f2b334e6c7e0
children 3b88a7fa97d8
line wrap: on
line source

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import difflib
import re
import struct

from . import policy
policynocffi = policy.policynocffi
modulepolicy = policy.policy

def splitnewlines(text):
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + '\n' for l in text.split('\n')]
    if lines:
        if lines[-1] == '\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines

def _normalizeblocks(a, b, blocks):
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (a1end + shift < a2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        elif b1end == b2:
            while (b1end + shift < b2end and
                   a[a1end + shift] == b[b1end + shift]):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift
    r.append(prev)
    return r

def bdiff(a, b):
    a = bytes(a).splitlines(True)
    b = bytes(b).splitlines(True)

    if not a:
        s = "".join(b)
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a: p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = "".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return "".join(bin)

def blocks(a, b):
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]

def fixws(text, allws):
    if allws:
        text = re.sub('[ \t\r]+', '', text)
    else:
        text = re.sub('[ \t\r]+', ' ', text)
        text = text.replace(' \n', '\n')
    return text

if modulepolicy not in policynocffi:
    try:
        from _bdiff_cffi import ffi, lib
    except ImportError:
        if modulepolicy == 'cffi': # strict cffi import
            raise
    else:
        def blocks(sa, sb):
            a = ffi.new("struct bdiff_line**")
            b = ffi.new("struct bdiff_line**")
            ac = ffi.new("char[]", str(sa))
            bc = ffi.new("char[]", str(sb))
            l = ffi.new("struct bdiff_hunk*")
            try:
                an = lib.bdiff_splitlines(ac, len(sa), a)
                bn = lib.bdiff_splitlines(bc, len(sb), b)
                if not a[0] or not b[0]:
                    raise MemoryError
                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                if count < 0:
                    raise MemoryError
                rl = [None] * count
                h = l.next
                i = 0
                while h:
                    rl[i] = (h.a1, h.a2, h.b1, h.b2)
                    h = h.next
                    i += 1
            finally:
                lib.free(a[0])
                lib.free(b[0])
                lib.bdiff_freehunks(l.next)
            return rl

        def bdiff(sa, sb):
            a = ffi.new("struct bdiff_line**")
            b = ffi.new("struct bdiff_line**")
            ac = ffi.new("char[]", str(sa))
            bc = ffi.new("char[]", str(sb))
            l = ffi.new("struct bdiff_hunk*")
            try:
                an = lib.bdiff_splitlines(ac, len(sa), a)
                bn = lib.bdiff_splitlines(bc, len(sb), b)
                if not a[0] or not b[0]:
                    raise MemoryError
                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                if count < 0:
                    raise MemoryError
                rl = []
                h = l.next
                la = lb = 0
                while h:
                    if h.a1 != la or h.b1 != lb:
                        lgt = (b[0] + h.b1).l - (b[0] + lb).l
                        rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
                            (a[0] + h.a1).l - a[0].l, lgt))
                        rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
                    la = h.a2
                    lb = h.b2
                    h = h.next

            finally:
                lib.free(a[0])
                lib.free(b[0])
                lib.bdiff_freehunks(l.next)
            return "".join(rl)