Mercurial > hg
annotate mercurial/pure/bdiff.py @ 17525:e327fd6edb14
spelling: transferred
author | timeless@mozdev.org |
---|---|
date | Fri, 17 Aug 2012 13:58:19 -0700 |
parents | eeac5e179243 |
children | c4e3ff497f89 |
rev | line source |
---|---|
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
1 # bdiff.py - Python implementation of bdiff.c |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
2 # |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
4 # |
8225
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7944
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
10263 | 6 # GNU General Public License version 2 or any later version. |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
7 |
15530
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
8 import struct, difflib, re |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
9 |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
10 def splitnewlines(text): |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
11 '''like str.splitlines, but only split on newlines.''' |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
12 lines = [l + '\n' for l in text.split('\n')] |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
13 if lines: |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
14 if lines[-1] == '\n': |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
15 lines.pop() |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
16 else: |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
17 lines[-1] = lines[-1][:-1] |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
18 return lines |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
19 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
20 def _normalizeblocks(a, b, blocks): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
21 prev = None |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
22 r = [] |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
23 for curr in blocks: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
24 if prev is None: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
25 prev = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
26 continue |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
27 shift = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
28 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
29 a1, b1, l1 = prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
30 a1end = a1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
31 b1end = b1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
32 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
33 a2, b2, l2 = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
34 a2end = a2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
35 b2end = b2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
36 if a1end == a2: |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
37 while (a1end + shift < a2end and |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
38 a[a1end + shift] == b[b1end + shift]): |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
39 shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
40 elif b1end == b2: |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
41 while (b1end + shift < b2end and |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
42 a[a1end + shift] == b[b1end + shift]): |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
43 shift += 1 |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
44 r.append((a1, b1, l1 + shift)) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
45 prev = a2 + shift, b2 + shift, l2 - shift |
14066
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
46 r.append(prev) |
14fac6c0536a
pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents:
10282
diff
changeset
|
47 return r |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
48 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
49 def bdiff(a, b): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
50 a = str(a).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
51 b = str(b).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
52 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
53 if not a: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
54 s = "".join(b) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
55 return s and (struct.pack(">lll", 0, 0, len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
56 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
57 bin = [] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
58 p = [0] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
59 for i in a: p.append(p[-1] + len(i)) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
60 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
61 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
62 d = _normalizeblocks(a, b, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
63 la = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
64 lb = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
65 for am, bm, size in d: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
66 s = "".join(b[lb:bm]) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
67 if am > la or s: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
68 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
69 la = am + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
70 lb = bm + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
71 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
72 return "".join(bin) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
73 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
74 def blocks(a, b): |
7944
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
75 an = splitnewlines(a) |
e9b48afd0e78
pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents:
7703
diff
changeset
|
76 bn = splitnewlines(b) |
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
77 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
78 d = _normalizeblocks(an, bn, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
79 return [(i, i + n, j, j + n) for (i, j, n) in d] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
80 |
15530
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
81 def fixws(text, allws): |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
82 if allws: |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
83 text = re.sub('[ \t\r]+', '', text) |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
84 else: |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
85 text = re.sub('[ \t\r]+', ' ', text) |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
86 text = text.replace(' \n', '\n') |
eeac5e179243
mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents:
14066
diff
changeset
|
87 return text |