bdiff: tweaks for large files
authorMatt Mackall <mpm@selenic.com>
Thu, 11 Oct 2007 00:46:56 -0500
changeset 5452 82b4ff3abbcd
parent 5451 0a43875677b1
child 5453 9d77f2b47eb7
bdiff: tweaks for large files - adjust the common line threshold to .1% this speeds up a delta of 7M lines of source from 10m to 40s - adjust the scaling of the hash array down a bit as it was raising the peak memory usage significantly
mercurial/bdiff.c
--- a/mercurial/bdiff.c	Thu Oct 11 00:46:54 2007 -0500
+++ b/mercurial/bdiff.c	Thu Oct 11 00:46:56 2007 -0500
@@ -106,19 +106,19 @@
 
 static int equatelines(struct line *a, int an, struct line *b, int bn)
 {
-	int i, j, buckets = 1, t;
-	int scale = 32;
-	struct pos *h;
+	int i, j, buckets = 1, t, scale;
+	struct pos *h = NULL;
 
 	/* build a hash table of the next highest power of 2 */
 	while (buckets < bn + 1)
 		buckets *= 2;
 
 	/* try to allocate a large hash table to avoid collisions */
-	do {
-		scale /= 2;
+	for (scale = 4; scale; scale /= 2) {
 		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
-	} while (!h && scale != 1);
+		if (h)
+			break;
+	}
 
 	if (!h)
 		return 0;
@@ -147,7 +147,7 @@
 	}
 
 	/* compute popularity threshold */
-	t = (bn >= 200) ? bn / 100 : bn + 1;
+	t = (bn >= 4000) ? bn / 1000 : bn + 1;
 
 	/* match items in a to their equivalence class in b */
 	for (i = 0; i < an; i++) {