mdiff: reinstate new algorithm
authormpm@selenic.com
Mon, 13 Jun 2005 13:43:48 -0800
changeset 325 ad87e19854a6
parent 324 ce81bdd91d06
child 326 235443668bea
mdiff: reinstate new algorithm -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 mdiff: reinstate new algorithm This unreverts the new algorithm with a fix from Chris (s/len/lena) and adds some comments on what it's doing. manifest hash: 75fc1acee1926e57d495f67a44cd88d9555f2356 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.0 (GNU/Linux) iD8DBQFCrf4UywK+sNU5EO8RAoRzAKCA2vpUAGNqTkDeba3YHo6XXht7VgCfXQK0 /j5yv5cucnsYezCdclpftOA= =FNMD -----END PGP SIGNATURE-----
mercurial/mdiff.py
--- a/mercurial/mdiff.py	Mon Jun 13 12:04:04 2005 -0800
+++ b/mercurial/mdiff.py	Mon Jun 13 13:43:48 2005 -0800
@@ -43,28 +43,41 @@
 
 def sortdiff(a, b):
     la = lb = 0
-
+    lena = len(a)
+    lenb = len(b)
+    
     while 1:
-        if la >= len(a) or lb >= len(b): break
-        if b[lb] < a[la]:
-            si = lb
-            while lb < len(b) and b[lb] < a[la] : lb += 1
-            yield "insert", la, la, si, lb
-        elif a[la] < b[lb]:
-            si = la
-            while la < len(a) and a[la] < b[lb]: la += 1
-            yield "delete", si, la, lb, lb
-        else:
+        am, bm, = la, lb
+
+        # walk over matching lines
+        while lb < lenb and la < lenb and a[la] == b[lb] :
             la += 1
             lb += 1
 
-    if lb < len(b):
-        yield "insert", la, la, lb, len(b)
+        if la > am:
+            yield (am, bm, la - am) # return a match
+
+        # skip mismatched lines from b
+        while lb < lenb and b[lb] < a[la]:
+            lb += 1
 
-    if la < len(a):
-        yield "delete", la, len(a), lb, lb
+        if lb >= lenb:
+            break
+        
+        # skip mismatched lines from a
+        while la < lena and b[lb] > a[la]:
+            la += 1
+
+        if la >= lena:
+            break
+        
+    yield (lena, lenb, 0)
 
 def diff(a, b, sorted=0):
+    if not a:
+        s = "".join(b)
+        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
+
     bin = []
     p = [0]
     for i in a: p.append(p[-1] + len(i))
@@ -76,13 +89,16 @@
             print a, b
             raise
     else:
-        d = difflib.SequenceMatcher(None, a, b).get_opcodes()
-
-    for o, m, n, s, t in d:
-        if o == 'equal': continue
-        s = "".join(b[s:t])
-        bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
-
+        d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
+    la = 0
+    lb = 0
+    for am, bm, size in d:
+        s = "".join(b[lb:bm])
+        if am > la or s:
+            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
+        la = am + size
+        lb = bm + size
+    
     return "".join(bin)
 
 def patchtext(bin):