author | Martin Geisler <mg@lazybytes.net> |
Sun, 27 Sep 2009 10:12:02 +0200 | |
changeset 9485 | 7d6ac5d7917c |
parent 8225 | 46293a0c7e9f |
child 10263 | 25e572394f5c |
permissions | -rw-r--r-- |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
1 |
# bdiff.py - Python implementation of bdiff.c |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
2 |
# |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
3 |
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
4 |
# |
8225
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7944
diff
changeset
|
5 |
# This software may be used and distributed according to the terms of the |
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7944
diff
changeset
|
6 |
# GNU General Public License version 2, incorporated herein by reference. |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
7 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
8 |
import struct, difflib |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
9 |
|
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
10 |
def splitnewlines(text): |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
11 |
'''like str.splitlines, but only split on newlines.''' |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
12 |
lines = [l + '\n' for l in text.split('\n')] |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
13 |
if lines: |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
14 |
if lines[-1] == '\n': |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
15 |
lines.pop() |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
16 |
else: |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
17 |
lines[-1] = lines[-1][:-1] |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
18 |
return lines |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
19 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
20 |
def _normalizeblocks(a, b, blocks): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
21 |
prev = None |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
22 |
for curr in blocks: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
23 |
if prev is None: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
24 |
prev = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
25 |
continue |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
26 |
shift = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
27 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
28 |
a1, b1, l1 = prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
29 |
a1end = a1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
30 |
b1end = b1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
31 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
32 |
a2, b2, l2 = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
33 |
a2end = a2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
34 |
b2end = b2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
35 |
if a1end == a2: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
36 |
while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
37 |
shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
38 |
elif b1end == b2: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
39 |
while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
40 |
shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
41 |
yield a1, b1, l1+shift |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
42 |
prev = a2+shift, b2+shift, l2-shift |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
43 |
yield prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
44 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
45 |
def bdiff(a, b): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
46 |
a = str(a).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
47 |
b = str(b).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
48 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
49 |
if not a: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
50 |
s = "".join(b) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
51 |
return s and (struct.pack(">lll", 0, 0, len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
52 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
53 |
bin = [] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
54 |
p = [0] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
55 |
for i in a: p.append(p[-1] + len(i)) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
56 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
57 |
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
58 |
d = _normalizeblocks(a, b, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
59 |
la = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
60 |
lb = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
61 |
for am, bm, size in d: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
62 |
s = "".join(b[lb:bm]) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
63 |
if am > la or s: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
64 |
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
65 |
la = am + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
66 |
lb = bm + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
67 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
68 |
return "".join(bin) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
69 |
|
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
70 |
def blocks(a, b): |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
71 |
an = splitnewlines(a) |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
72 |
bn = splitnewlines(b) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
73 |
d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
74 |
d = _normalizeblocks(an, bn, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
75 |
return [(i, i + n, j, j + n) for (i, j, n) in d] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
76 |