bdiff: switch to lyhash
lyhash is a very simple and fast hash function that had the fewest
hash collisions on a 3.9M line text corpus and 190k line binary corpus
and should have significantly fewer collisions than the current hash
function.
--- a/mercurial/bdiff.c Thu Sep 27 23:59:02 2007 -0500
+++ b/mercurial/bdiff.c Thu Sep 27 23:59:18 2007 -0500
@@ -59,14 +59,9 @@
struct hunk *base, *head;
};
-static inline uint32_t rol32(uint32_t word, unsigned int shift)
-{
- return (word << shift) | (word >> (32 - shift));
-}
-
int splitlines(const char *a, int len, struct line **lr)
{
- int g, h, i;
+ int h, i;
const char *p, *b = a;
const char * const plast = a + len - 1;
struct line *l;
@@ -84,24 +79,17 @@
/* build the line array and calculate hashes */
h = 0;
for (p = a; p < a + len; p++) {
- /*
- * a simple hash from GNU diff, with better collision
- * resistance from hashpjw. this slows down common
- * case by 10%, but speeds up worst case by 100x.
- */
- h = *p + rol32(h, 7);
- if ((g = h & 0xf0000000)) {
- h ^= g >> 24;
- h ^= g;
- }
+ /* Leonid Yuriev's hash */
+ h = (h * 1664525) + *p + 1013904223;
+
if (*p == '\n' || p == plast) {
+ l->h = h;
+ h = 0;
l->len = p - b + 1;
- l->h = h * l->len;
l->l = b;
l->n = INT_MAX;
l++;
b = p + 1;
- h = 0;
}
}