mercurial/pure/bdiff.py
author Matt Harbison <matt_harbison@yahoo.com>
Sat, 31 Jan 2015 12:39:44 -0500
changeset 24051 7956d17431bc
parent 15530 eeac5e179243
child 27335 c4e3ff497f89
permissions -rw-r--r--
windows: seek to the end of posixfile when opening in append mode The position is implementation defined when opening in append mode, and it seems like Linux sets it to EOF while Windows keeps it at zero. This has caused problems in the past when a file is opened and tell() is immediately called, such as 48c232873a54 and 6bf93440a717. Since the only caller of osutil.posixfile is this windows module, this seems like a better place to fix the issue than in osutil.c and pure.osutil.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
     6
# GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
     8
import struct, difflib, re
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
     9
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    10
def splitnewlines(text):
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    11
    '''like str.splitlines, but only split on newlines.'''
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    12
    lines = [l + '\n' for l in text.split('\n')]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    13
    if lines:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    14
        if lines[-1] == '\n':
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    15
            lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    16
        else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    17
            lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    18
    return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    19
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    20
def _normalizeblocks(a, b, blocks):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    21
    prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    22
    r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    23
    for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    24
        if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    25
            prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    26
            continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    27
        shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    28
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    29
        a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    30
        a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    31
        b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    32
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
        a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
        a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
        b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    36
        if a1end == a2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    37
            while (a1end + shift < a2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    38
                   a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
        elif b1end == b2:
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    41
            while (b1end + shift < b2end and
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    42
                   a[a1end + shift] == b[b1end + shift]):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
                shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    44
        r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    45
        prev = a2 + shift, b2 + shift, l2 - shift
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    46
    r.append(prev)
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    47
    return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
def bdiff(a, b):
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
    a = str(a).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
    b = str(b).splitlines(True)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
    if not a:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    54
        s = "".join(b)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    55
        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    56
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    57
    bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    58
    p = [0]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    59
    for i in a: p.append(p[-1] + len(i))
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    60
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    61
    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    62
    d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    63
    la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
    lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    65
    for am, bm, size in d:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    66
        s = "".join(b[lb:bm])
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
        if am > la or s:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
        la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    70
        lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    71
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    72
    return "".join(bin)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    73
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    74
def blocks(a, b):
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    75
    an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    76
    bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    77
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    78
    d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    79
    return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    80
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    81
def fixws(text, allws):
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    82
    if allws:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    83
        text = re.sub('[ \t\r]+', '', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    84
    else:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    85
        text = re.sub('[ \t\r]+', ' ', text)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    86
        text = text.replace(' \n', '\n')
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
    87
    return text