Mercurial > hg
annotate mercurial/mdiff.py @ 184:697f05bfe976
Improved binary diff from Christopher Li
This is more intelligent/efficient by combining neighboring inserts,
replaces and deletes. Passes test of converting kernel repo, but
doesn't appear to substantially affect compression or performance.
author | mpm@selenic.com |
---|---|
date | Fri, 27 May 2005 19:38:34 -0800 |
parents | e6c621a825f2 |
children | 75840796e8e2 afe895fcc0d0 |
rev | line source |
---|---|
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
1 #!/usr/bin/python |
127 | 2 import difflib, struct, mmap |
3 from mercurial.mpatch import * | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
4 |
64 | 5 def unidiff(a, ad, b, bd, fn): |
35 | 6 if not a and not b: return "" |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
7 a = a.splitlines(1) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
8 b = b.splitlines(1) |
64 | 9 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) |
170 | 10 |
11 for ln in xrange(len(l)): | |
12 if l[ln][-1] != '\n': | |
13 l[ln] += "\n\ No newline at end of file\n" | |
14 | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
15 return "".join(l) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
16 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
17 def textdiff(a, b): |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
18 return diff(a.splitlines(1), b.splitlines(1)) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
19 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
20 def sortdiff(a, b): |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
21 la = lb = 0 |
184 | 22 lena = len(a) |
23 lenb = len(b) | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
24 while 1: |
184 | 25 am, bm, = la, lb |
26 while lb < lenb and la < len and a[la] == b[lb] : | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
27 la += 1 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
28 lb += 1 |
184 | 29 if la>am: yield (am, bm, la-am) |
30 while lb < lenb and b[lb] < a[la]: lb += 1 | |
31 if lb>=lenb: break | |
32 while la < lena and b[lb] > a[la]: la += 1 | |
33 if la>=lena: break | |
34 yield (lena, lenb, 0) | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
35 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
36 def diff(a, b, sorted=0): |
184 | 37 if not a: |
38 s = "".join(b) | |
39 return s and (struct.pack(">lll", 0, 0, len(s)) + s) | |
40 | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
41 bin = [] |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
42 p = [0] |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
43 for i in a: p.append(p[-1] + len(i)) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
44 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
45 if sorted: |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
46 d = sortdiff(a, b) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
47 else: |
184 | 48 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
49 la = 0 | |
50 lb = 0 | |
51 for am, bm, size in d: | |
52 s = "".join(b[lb:bm]) | |
53 if am > la or s: | |
54 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | |
55 la = am + size | |
56 lb = bm + size | |
57 | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
58 return "".join(bin) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
59 |
120
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
60 def patchtext(bin): |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
61 pos = 0 |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
62 t = [] |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
63 while pos < len(bin): |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
64 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
65 pos += 12 |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
66 t.append(bin[pos:pos + l]) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
67 pos += l |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
68 return "".join(t) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
69 |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
70 def patch(a, bin): |
72 | 71 return patches(a, [bin]) |