mercurial/pure/bdiff.py
author Martin Geisler <mg@lazybytes.net>
Sun, 27 Sep 2009 10:12:02 +0200
changeset 9485 7d6ac5d7917c
parent 8225 46293a0c7e9f
child 10263 25e572394f5c
permissions -rw-r--r--
test-gendoc: add tests for all languages This ensures that we catch errors in the reST syntax early and for all languages. The only change needed in gendoc.py was to correct the computation of section underlines for Asian languages.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
     5
# This software may be used and distributed according to the terms of the
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
     6
# GNU General Public License version 2, incorporated herein by reference.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     8
import struct, difflib
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
     9
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    10
def splitnewlines(text):
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    11
    '''like str.splitlines, but only split on newlines.'''
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    12
    lines = [l + '\n' for l in text.split('\n')]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    13
    if lines:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    14
        if lines[-1] == '\n':
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    15
            lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    16
        else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    17
            lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    18
    return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    19
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    20
def _normalizeblocks(a, b, blocks):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    21
    prev = None
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    22
    for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    23
        if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    24
            prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    25
            continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    26
        shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    27
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    28
        a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    29
        a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    30
        b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    31
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    32
        a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
        a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
        b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
        if a1end == a2:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    36
            while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    37
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    38
        elif b1end == b2:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
            while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    41
        yield a1, b1, l1+shift
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
        prev = a2+shift, b2+shift, l2-shift
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
    yield prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
def bdiff(a, b):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    46
    a = str(a).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    47
    b = str(b).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
    if not a:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
        s = "".join(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
    bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    54
    p = [0]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    55
    for i in a: p.append(p[-1] + len(i))
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    56
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    57
    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    58
    d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    59
    la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    60
    lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    61
    for am, bm, size in d:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    62
        s = "".join(b[lb:bm])
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    63
        if am > la or s:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    65
        la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    66
        lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
    return "".join(bin)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    70
def blocks(a, b):
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    71
    an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    72
    bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    73
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    74
    d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    75
    return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    76