mercurial/pure/bdiff.py
author Steve Borho <steve@borho.org>
Wed, 28 Jan 2009 20:06:59 -0600
changeset 7752 998fc8f62539
parent 7703 9044d3567f6d
child 7944 e9b48afd0e78
permissions -rw-r--r--
close sockets on httprepository deletion (issue1487) With this destructor in place, I no longer see tracebacks on the server after using win32 clients for pull, push, etc.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     5
# This software may be used and distributed according to the terms
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     6
# of the GNU General Public License, incorporated herein by reference.
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     8
import struct, difflib
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     9
# mdiff import moved to bottom due to import cycle
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    10
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    11
def _normalizeblocks(a, b, blocks):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    12
    prev = None
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    13
    for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    14
        if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    15
            prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    16
            continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    17
        shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    18
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    19
        a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    20
        a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    21
        b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    22
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    23
        a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    24
        a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    25
        b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    26
        if a1end == a2:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    27
            while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    28
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    29
        elif b1end == b2:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    30
            while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    31
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    32
        yield a1, b1, l1+shift
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
        prev = a2+shift, b2+shift, l2-shift
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
    yield prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    36
def bdiff(a, b):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    37
    a = str(a).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    38
    b = str(b).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
    if not a:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    41
        s = "".join(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
    bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
    p = [0]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    46
    for i in a: p.append(p[-1] + len(i))
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    47
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
    d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
    la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
    lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
    for am, bm, size in d:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
        s = "".join(b[lb:bm])
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    54
        if am > la or s:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    55
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    56
        la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    57
        lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    58
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    59
    return "".join(bin)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    60
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    61
def blocks(a, b):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    62
    an = mdiff.splitnewlines(a)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    63
    bn = mdiff.splitnewlines(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    65
    d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    66
    return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
# this breaks an import cycle
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
import mdiff