bdiff: fix latent normalization bug stable
authorMatt Mackall <mpm@selenic.com>
Thu, 21 Apr 2016 21:53:18 -0500
branchstable
changeset 29012 4bd67ae7d75a
parent 29011 8bcda4c76820
child 29013 9a8363d23419
bdiff: fix latent normalization bug This bug is hidden by the current bias towards matches at the beginning of the file. When this bias is tweaked later to address recursion balancing, the normalization code could cause the next block to shrink to a negative length, thus creating invalid delta chunks. We add checks here to disallow that. This bug requires test cases that are an awkwardly large size for the test suite, but is very rapidly picked up by the included torture tester.
contrib/bdiff-torture.py
mercurial/bdiff.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/bdiff-torture.py	Thu Apr 21 21:53:18 2016 -0500
@@ -0,0 +1,98 @@
+# Randomized torture test generation for bdiff
+
+from __future__ import absolute_import, print_function
+import random, sys
+from mercurial import (
+    bdiff,
+    mpatch,
+)
+
+def reducetest(a, b):
+    tries = 0
+    reductions = 0
+    print("reducing...")
+    while tries < 1000:
+        a2 = "\n".join(l for l in a.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        b2 = "\n".join(l for l in b.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        if a2 == a and b2 == b:
+            continue
+        if a2 == b2:
+            continue
+        tries += 1
+
+        try:
+            test1(a, b)
+        except Exception as inst:
+            reductions += 1
+            tries = 0
+            a = a2
+            b = b2
+
+    print("reduced:", reductions, len(a) + len(b),
+          repr(a), repr(b))
+    try:
+        test1(a, b)
+    except Exception as inst:
+        print("failed:", inst)
+
+    sys.exit(0)
+
+def test1(a, b):
+    d = bdiff.bdiff(a, b)
+    if not d:
+        raise ValueError("empty")
+    c = mpatch.patches(a, [d])
+    if c != b:
+        raise ValueError("bad")
+
+def testwrap(a, b):
+    try:
+        test1(a, b)
+        return
+    except Exception as inst:
+        pass
+    print("exception:", inst)
+    reducetest(a, b)
+
+def test(a, b):
+    testwrap(a, b)
+    testwrap(b, a)
+
+def rndtest(size, noise):
+    a = []
+    src = "                aaaaaaaabbbbccd"
+    for x in xrange(size):
+        a.append(src[random.randint(0, len(src) - 1)])
+
+    while True:
+        b = [c for c in a if random.randint(0, 99) > noise]
+        b2 = []
+        for c in b:
+            b2.append(c)
+            while random.randint(0, 99) < noise:
+                b2.append(src[random.randint(0, len(src) - 1)])
+        if b2 != a:
+            break
+
+    a = "\n".join(a) + "\n"
+    b = "\n".join(b2) + "\n"
+
+    test(a, b)
+
+maxvol = 10000
+startsize = 2
+while True:
+    size = startsize
+    count = 0
+    while size < maxvol:
+        print(size)
+        volume = 0
+        while volume < maxvol:
+            rndtest(size, 2)
+            volume += size
+            count += 2
+        size *= 2
+    maxvol *= 4
+    startsize *= 4
--- a/mercurial/bdiff.c	Thu Apr 21 21:46:31 2016 -0500
+++ b/mercurial/bdiff.c	Thu Apr 21 21:53:18 2016 -0500
@@ -265,6 +265,8 @@
 
 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
 			while (curr->a2 < an && curr->b2 < bn
+			       && next->a1 < next->a2
+			       && next->b1 < next->b2
 			       && !cmp(a + curr->a2, b + curr->b2)) {
 				curr->a2++;
 				next->a1++;