pure Python implementation of bdiff.c
authorMartin Geisler <mg@daimi.au.dk>
Sat, 24 Jan 2009 00:12:20 +0100
changeset 7703 9044d3567f6d
parent 7702 f6bb40554e34
child 7704 30d1d313370b
pure Python implementation of bdiff.c
mercurial/pure/bdiff.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/pure/bdiff.py	Sat Jan 24 00:12:20 2009 +0100
@@ -0,0 +1,69 @@
+# bdiff.py - Python implementation of bdiff.c
+#
+# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import struct, difflib
+# mdiff import moved to bottom due to import cycle
+
+def _normalizeblocks(a, b, blocks):
+    prev = None
+    for curr in blocks:
+        if prev is None:
+            prev = curr
+            continue
+        shift = 0
+
+        a1, b1, l1 = prev
+        a1end = a1 + l1
+        b1end = b1 + l1
+
+        a2, b2, l2 = curr
+        a2end = a2 + l2
+        b2end = b2 + l2
+        if a1end == a2:
+            while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
+                shift += 1
+        elif b1end == b2:
+            while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
+                shift += 1
+        yield a1, b1, l1+shift
+        prev = a2+shift, b2+shift, l2-shift
+    yield prev
+
+def bdiff(a, b):
+    a = str(a).splitlines(True)
+    b = str(b).splitlines(True)
+
+    if not a:
+        s = "".join(b)
+        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
+
+    bin = []
+    p = [0]
+    for i in a: p.append(p[-1] + len(i))
+
+    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
+    d = _normalizeblocks(a, b, d)
+    la = 0
+    lb = 0
+    for am, bm, size in d:
+        s = "".join(b[lb:bm])
+        if am > la or s:
+            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
+        la = am + size
+        lb = bm + size
+
+    return "".join(bin)
+
+def blocks(a, b):
+    an = mdiff.splitnewlines(a)
+    bn = mdiff.splitnewlines(b)
+    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
+    d = _normalizeblocks(an, bn, d)
+    return [(i, i + n, j, j + n) for (i, j, n) in d]
+
+# this breaks an import cycle
+import mdiff