Mercurial > hg
view mercurial/mdiff.py @ 184:697f05bfe976
Improved binary diff from Christopher Li
This is more intelligent/efficient by combining neighboring inserts,
replaces and deletes. Passes test of converting kernel repo, but
doesn't appear to substantially affect compression or performance.
author | mpm@selenic.com |
---|---|
date | Fri, 27 May 2005 19:38:34 -0800 |
parents | e6c621a825f2 |
children | 75840796e8e2 afe895fcc0d0 |
line wrap: on
line source
#!/usr/bin/python import difflib, struct, mmap from mercurial.mpatch import * def unidiff(a, ad, b, bd, fn): if not a and not b: return "" a = a.splitlines(1) b = b.splitlines(1) l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd)) for ln in xrange(len(l)): if l[ln][-1] != '\n': l[ln] += "\n\ No newline at end of file\n" return "".join(l) def textdiff(a, b): return diff(a.splitlines(1), b.splitlines(1)) def sortdiff(a, b): la = lb = 0 lena = len(a) lenb = len(b) while 1: am, bm, = la, lb while lb < lenb and la < len and a[la] == b[lb] : la += 1 lb += 1 if la>am: yield (am, bm, la-am) while lb < lenb and b[lb] < a[la]: lb += 1 if lb>=lenb: break while la < lena and b[lb] > a[la]: la += 1 if la>=lena: break yield (lena, lenb, 0) def diff(a, b, sorted=0): if not a: s = "".join(b) return s and (struct.pack(">lll", 0, 0, len(s)) + s) bin = [] p = [0] for i in a: p.append(p[-1] + len(i)) if sorted: d = sortdiff(a, b) else: d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() la = 0 lb = 0 for am, bm, size in d: s = "".join(b[lb:bm]) if am > la or s: bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) la = am + size lb = bm + size return "".join(bin) def patchtext(bin): pos = 0 t = [] while pos < len(bin): p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) pos += 12 t.append(bin[pos:pos + l]) pos += l return "".join(t) def patch(a, bin): return patches(a, [bin])