mercurial/pure/bdiff.py
author Mads Kiilerich <madski@unity3d.com>
Tue, 15 Nov 2016 21:56:49 +0100
changeset 30432 3633403888ae
parent 30042 d24e03da24b5
child 31640 6d30699729dd
permissions -rw-r--r--
bdiff: give slight preference to appending lines [This change could be folded into the previous changeset to minimize the repo churn ...] The general preference to matches in the middle of bdiff ranges helps getting balanced recursion and efficient computation. But, as previous changes have shown, it might also give diffs that seems "obviously wrong". To mitigate that: If the best match on the A side starts at the beginning of the bdiff range, don't aim for the middle-most B side match but for the earliest. This will make the matches balanced (by both sides being "early") even though the bisection will be less balanced. Still, this case only apply if the *best* and middle-most match was fully unbalanced on the A side. Each recursion will thus even in this worst case reduce the problem significantly and we are not re-introducing the problem that was fixed in f1ca249696ed. The bundle size for 4.0 (hg bundle --base null -r 4.0 x.hg) happens to go from 22806817 to 22807275 bytes - a 0.002% increase. This make the recent test-bdiff.py changes give a more pretty output ... but they no longer show that the recursion is around middle matches (because it in these cases isn't).
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
     6
# GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
     8
from __future__ import absolute_import
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
     9
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    10
import array
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    11
import difflib
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    12
import re
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    13
import struct
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    14
29833
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
    15
from . import policy
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
    16
policynocffi = policy.policynocffi
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
    17
modulepolicy = policy.policy
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
    18
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    19
def splitnewlines(text):
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    20
    '''like str.splitlines, but only split on newlines.'''
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    21
    lines = [l + '\n' for l in text.split('\n')]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    22
    if lines:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    23
        if lines[-1] == '\n':
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    24
            lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    25
        else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    26
            lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    27
    return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    28
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    29
def _normalizeblocks(a, b, blocks):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    30
    prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    31
    r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    32
    for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
        if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
            prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
            continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    36
        shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    37
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    38
        a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
        a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
        b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    41
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
        a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
        a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
        b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
        if a1end == a2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    46
            while (a1end + shift < a2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    47
                   a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
        elif b1end == b2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    50
            while (b1end + shift < b2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    51
                   a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
                shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    53
        r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    54
        prev = a2 + shift, b2 + shift, l2 - shift
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    55
    r.append(prev)
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    56
    return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    57
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    58
def _tostring(c):
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    59
    if type(c) is array.array:
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    60
        # this copy overhead isn't ideal
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    61
        return c.tostring()
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    62
    return str(c)
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    63
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
def bdiff(a, b):
28389
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    65
    a = _tostring(a).splitlines(True)
9ab45fbe045e bdiff: (pure) support array.array arrays (issue5130)
timeless <timeless@mozdev.org>
parents: 27335
diff changeset
    66
    b = _tostring(b).splitlines(True)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
    if not a:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
        s = "".join(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    70
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    71
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    72
    bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    73
    p = [0]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    74
    for i in a: p.append(p[-1] + len(i))
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    75
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    76
    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    77
    d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    78
    la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    79
    lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    80
    for am, bm, size in d:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    81
        s = "".join(b[lb:bm])
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    82
        if am > la or s:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    83
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    84
        la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    85
        lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    86
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    87
    return "".join(bin)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    88
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    89
def blocks(a, b):
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    90
    an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    91
    bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    92
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    93
    d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    94
    return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    95
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    96
def fixws(text, allws):
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    97
    if allws:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    98
        text = re.sub('[ \t\r]+', '', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    99
    else:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   100
        text = re.sub('[ \t\r]+', ' ', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   101
        text = text.replace(' \n', '\n')
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   102
    return text
29833
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   103
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   104
if modulepolicy not in policynocffi:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   105
    try:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   106
        from _bdiff_cffi import ffi, lib
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   107
    except ImportError:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   108
        if modulepolicy == 'cffi': # strict cffi import
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   109
            raise
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   110
    else:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   111
        def blocks(sa, sb):
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   112
            a = ffi.new("struct bdiff_line**")
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   113
            b = ffi.new("struct bdiff_line**")
30042
d24e03da24b5 lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski <fijall@gmail.com>
parents: 29834
diff changeset
   114
            ac = ffi.new("char[]", str(sa))
d24e03da24b5 lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski <fijall@gmail.com>
parents: 29834
diff changeset
   115
            bc = ffi.new("char[]", str(sb))
29834
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   116
            l = ffi.new("struct bdiff_hunk*")
29833
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   117
            try:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   118
                an = lib.bdiff_splitlines(ac, len(sa), a)
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   119
                bn = lib.bdiff_splitlines(bc, len(sb), b)
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   120
                if not a[0] or not b[0]:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   121
                    raise MemoryError
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   122
                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   123
                if count < 0:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   124
                    raise MemoryError
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   125
                rl = [None] * count
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   126
                h = l.next
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   127
                i = 0
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   128
                while h:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   129
                    rl[i] = (h.a1, h.a2, h.b1, h.b2)
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   130
                    h = h.next
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   131
                    i += 1
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   132
            finally:
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   133
                lib.free(a[0])
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   134
                lib.free(b[0])
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   135
                lib.bdiff_freehunks(l.next)
a8933d992a71 bdiff: implement cffi version of blocks
Maciej Fijalkowski <fijall@gmail.com>
parents: 28389
diff changeset
   136
            return rl
29834
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   137
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   138
        def bdiff(sa, sb):
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   139
            a = ffi.new("struct bdiff_line**")
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   140
            b = ffi.new("struct bdiff_line**")
30042
d24e03da24b5 lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski <fijall@gmail.com>
parents: 29834
diff changeset
   141
            ac = ffi.new("char[]", str(sa))
d24e03da24b5 lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Maciej Fijalkowski <fijall@gmail.com>
parents: 29834
diff changeset
   142
            bc = ffi.new("char[]", str(sb))
29834
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   143
            l = ffi.new("struct bdiff_hunk*")
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   144
            try:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   145
                an = lib.bdiff_splitlines(ac, len(sa), a)
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   146
                bn = lib.bdiff_splitlines(bc, len(sb), b)
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   147
                if not a[0] or not b[0]:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   148
                    raise MemoryError
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   149
                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   150
                if count < 0:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   151
                    raise MemoryError
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   152
                rl = []
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   153
                h = l.next
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   154
                la = lb = 0
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   155
                while h:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   156
                    if h.a1 != la or h.b1 != lb:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   157
                        lgt = (b[0] + h.b1).l - (b[0] + lb).l
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   158
                        rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   159
                            (a[0] + h.a1).l - a[0].l, lgt))
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   160
                        rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   161
                    la = h.a2
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   162
                    lb = h.b2
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   163
                    h = h.next
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   164
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   165
            finally:
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   166
                lib.free(a[0])
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   167
                lib.free(b[0])
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   168
                lib.bdiff_freehunks(l.next)
1ea77b75d266 bdiff: implement cffi version of bdiff
Maciej Fijalkowski <fijall@gmail.com>
parents: 29833
diff changeset
   169
            return "".join(rl)