# HG changeset patch # User Christoph Spiel # Date 1190955477 18000 # Node ID 058e93c3d07d07bf9665bf1d3f8439516c0cb314 # Parent f87685355c9c168eabeaacf74ef4f9e7c4a1100d I have spotted the biggest bottleneck in "bdiff.c". Actually it was pretty easy to find after I recompiled the python interpreter and mercurial for profiling. In "bdiff.c" function "equatelines" allocates the minimum hash table size, which can lead to tons of collisions. I introduced an "overcommit" factor of 16, this is, I allocate 16 times more memory than the minimum value. Overcommiting 128 times does not improve the performance over the 16-times case. diff -r f87685355c9c -r 058e93c3d07d mercurial/bdiff.c --- a/mercurial/bdiff.c Wed Sep 26 01:58:45 2007 -0300 +++ b/mercurial/bdiff.c Thu Sep 27 23:57:57 2007 -0500 @@ -117,17 +117,24 @@ static int equatelines(struct line *a, int an, struct line *b, int bn) { int i, j, buckets = 1, t; + int scale = 32; struct pos *h; /* build a hash table of the next highest power of 2 */ while (buckets < bn + 1) buckets *= 2; - h = (struct pos *)malloc(buckets * sizeof(struct pos)); - buckets = buckets - 1; + /* try to allocate a large hash table to avoid collisions */ + do { + scale /= 2; + h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); + } while (!h && scale != 1); + if (!h) return 0; + buckets = buckets * scale - 1; + /* clear the hash table */ for (i = 0; i <= buckets; i++) { h[i].pos = -1;