Mercurial > hg
comparison mercurial/bdiff.c @ 5452:82b4ff3abbcd
bdiff: tweaks for large files
- adjust the common line threshold to .1%
this speeds up a delta of 7M lines of source from 10m to 40s
- adjust the scaling of the hash array down a bit as it was raising the peak
memory usage significantly
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 11 Oct 2007 00:46:56 -0500 |
parents | d0c48891dd4a |
children | f84bb2e1cc3a 5133e9f61700 |
comparison
equal
deleted
inserted
replaced
5451:0a43875677b1 | 5452:82b4ff3abbcd |
---|---|
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); | 104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
105 } | 105 } |
106 | 106 |
107 static int equatelines(struct line *a, int an, struct line *b, int bn) | 107 static int equatelines(struct line *a, int an, struct line *b, int bn) |
108 { | 108 { |
109 int i, j, buckets = 1, t; | 109 int i, j, buckets = 1, t, scale; |
110 int scale = 32; | 110 struct pos *h = NULL; |
111 struct pos *h; | |
112 | 111 |
113 /* build a hash table of the next highest power of 2 */ | 112 /* build a hash table of the next highest power of 2 */ |
114 while (buckets < bn + 1) | 113 while (buckets < bn + 1) |
115 buckets *= 2; | 114 buckets *= 2; |
116 | 115 |
117 /* try to allocate a large hash table to avoid collisions */ | 116 /* try to allocate a large hash table to avoid collisions */ |
118 do { | 117 for (scale = 4; scale; scale /= 2) { |
119 scale /= 2; | |
120 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); | 118 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); |
121 } while (!h && scale != 1); | 119 if (h) |
120 break; | |
121 } | |
122 | 122 |
123 if (!h) | 123 if (!h) |
124 return 0; | 124 return 0; |
125 | 125 |
126 buckets = buckets * scale - 1; | 126 buckets = buckets * scale - 1; |
145 h[j].pos = i; | 145 h[j].pos = i; |
146 h[j].len++; /* keep track of popularity */ | 146 h[j].len++; /* keep track of popularity */ |
147 } | 147 } |
148 | 148 |
149 /* compute popularity threshold */ | 149 /* compute popularity threshold */ |
150 t = (bn >= 200) ? bn / 100 : bn + 1; | 150 t = (bn >= 4000) ? bn / 1000 : bn + 1; |
151 | 151 |
152 /* match items in a to their equivalence class in b */ | 152 /* match items in a to their equivalence class in b */ |
153 for (i = 0; i < an; i++) { | 153 for (i = 0; i < an; i++) { |
154 /* find the equivalence class */ | 154 /* find the equivalence class */ |
155 for (j = a[i].h & buckets; h[j].pos != INT_MAX; | 155 for (j = a[i].h & buckets; h[j].pos != INT_MAX; |