Mercurial > hg
annotate mercurial/pure/bdiff.py @ 7703:9044d3567f6d
pure Python implementation of bdiff.c
author | Martin Geisler <mg@daimi.au.dk> |
---|---|
date | Sat, 24 Jan 2009 00:12:20 +0100 |
parents | |
children | e9b48afd0e78 |
rev | line source |
---|---|
7703
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
1 # bdiff.py - Python implementation of bdiff.c |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
2 # |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
4 # |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
6 # of the GNU General Public License, incorporated herein by reference. |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
7 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
8 import struct, difflib |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
9 # mdiff import moved to bottom due to import cycle |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
10 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
11 def _normalizeblocks(a, b, blocks): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
12 prev = None |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
13 for curr in blocks: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
14 if prev is None: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
15 prev = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
16 continue |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
17 shift = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
18 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
19 a1, b1, l1 = prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
20 a1end = a1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
21 b1end = b1 + l1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
22 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
23 a2, b2, l2 = curr |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
24 a2end = a2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
25 b2end = b2 + l2 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
26 if a1end == a2: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
27 while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
28 shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
29 elif b1end == b2: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
30 while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
31 shift += 1 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
32 yield a1, b1, l1+shift |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
33 prev = a2+shift, b2+shift, l2-shift |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
34 yield prev |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
35 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
36 def bdiff(a, b): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
37 a = str(a).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
38 b = str(b).splitlines(True) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
39 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
40 if not a: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
41 s = "".join(b) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
42 return s and (struct.pack(">lll", 0, 0, len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
43 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
44 bin = [] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
45 p = [0] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
46 for i in a: p.append(p[-1] + len(i)) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
47 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
48 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
49 d = _normalizeblocks(a, b, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
50 la = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
51 lb = 0 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
52 for am, bm, size in d: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
53 s = "".join(b[lb:bm]) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
54 if am > la or s: |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
55 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
56 la = am + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
57 lb = bm + size |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
58 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
59 return "".join(bin) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
60 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
61 def blocks(a, b): |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
62 an = mdiff.splitnewlines(a) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
63 bn = mdiff.splitnewlines(b) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
64 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
65 d = _normalizeblocks(an, bn, d) |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
66 return [(i, i + n, j, j + n) for (i, j, n) in d] |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
67 |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
68 # this breaks an import cycle |
9044d3567f6d
pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff
changeset
|
69 import mdiff |