Mercurial > hg
view mercurial/bdiff.c @ 43257:675c776fbcd1
sidedatacopies: directly fetch copies information from sidedata
When using the sidedata mode, we don't need a complicated and expensive
`context` object. Instead we directly fetch copies information from the
sidedata (through a changelogrevision object). More optimisations coming.
revision: large amount; added files: large amount; rename small amount; c3b14617fbd7 9ba6ab77fd29
filelog: ! wall 3.679613 comb 3.680000 user 3.580000 sys 0.100000 (median of 3)
base: ! wall 8.884369 comb 8.880000 user 8.850000 sys 0.030000 (median of 3)
before: ! wall 4.681985 comb 4.680000 user 4.640000 sys 0.040000 (median of 3)
after: ! wall 3.955894 comb 3.950000 user 3.940000 sys 0.010000 (median of 3)
revision: large amount; added files: small amount; rename small amount; c3b14617fbd7 f650a9b140d2
filelog: ! wall 0.003357 comb 0.010000 user 0.010000 sys 0.000000 (median of 781)
base: ! wall 12.398524 comb 12.400000 user 12.330000 sys 0.070000 (median of 3)
before: ! wall 6.459592 comb 6.470000 user 6.390000 sys 0.080000 (median of 3)
after: ! wall 5.505774 comb 5.500000 user 5.410000 sys 0.090000 (median of 3)
revision: large amount; added files: large amount; rename large amount; 08ea3258278e d9fa043f30c0
filelog: ! wall 2.754687 comb 2.760000 user 2.650000 sys 0.110000 (median of 4)
base: ! wall 1.423166 comb 1.420000 user 1.400000 sys 0.020000 (median of 8)
before: ! wall 0.961048 comb 0.960000 user 0.940000 sys 0.020000 (median of 11)
after: ! wall 0.882950 comb 0.880000 user 0.880000 sys 0.000000 (median of 11)
revision: small amount; added files: large amount; rename large amount; df6f7a526b60 a83dc6a2d56f
filelog: ! wall 1.552293 comb 1.550000 user 1.510000 sys 0.040000 (median of 6
base: ! wall 0.022662 comb 0.020000 user 0.020000 sys 0.000000 (median of 128)
before: ! wall 0.021649 comb 0.020000 user 0.020000 sys 0.000000 (median of 135)
after: ! wall 0.020951 comb 0.020000 user 0.020000 sys 0.000000 (median of 141)
revision: small amount; added files: large amount; rename small amount; 4aa4e1f8e19a 169138063d63
filelog: ! wall 1.500983 comb 1.500000 user 1.420000 sys 0.080000 (median of 7)
base: ! wall 0.006956 comb 0.010000 user 0.010000 sys 0.000000 (median of 392)
before: ! wall 0.004022 comb 0.000000 user 0.000000 sys 0.000000 (median of 735)
after: ! wall 0.003988 comb 0.000000 user 0.000000 sys 0.000000 (median of 736)
revision: small amount; added files: small amount; rename small amount; 4bc173b045a6 964879152e2e
filelog: ! wall 0.011745 comb 0.020000 user 0.020000 sys 0.000000 (median of 250)
base: ! wall 0.000156 comb 0.000000 user 0.000000 sys 0.000000 (median of 17180)
before: ! wall 0.000118 comb 0.000000 user 0.000000 sys 0.000000 (median of 19170)
after: ! wall 0.000097 comb 0.000000 user 0.000000 sys 0.000000 (median of 27276)
revision: medium amount; added files: large amount; rename medium amount; c95f1ced15f2 2c68e87c3efe
filelog: ! wall 3.228230 comb 3.230000 user 3.110000 sys 0.120000 (median of 4)
base: ! wall 0.997640 comb 1.000000 user 0.980000 sys 0.020000 (median of 10)
before: ! wall 0.679500 comb 0.680000 user 0.680000 sys 0.000000 (median of 15)
after: ! wall 0.596779 comb 0.600000 user 0.600000 sys 0.000000 (median of 17)
revision: medium amount; added files: medium amount; rename small amount; d343da0c55a8 d7746d32bf9d
filelog: ! wall 1.052501 comb 1.060000 user 1.040000 sys 0.020000 (median of 10
base: ! wall 0.214519 comb 0.220000 user 0.220000 sys 0.000000 (median of 45)
before: ! wall 0.149675 comb 0.150000 user 0.150000 sys 0.000000 (median of 66)
after: ! wall 0.130786 comb 0.130000 user 0.130000 sys 0.000000 (median of 75)
Differential Revision: https://phab.mercurial-scm.org/D7072
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 02 Oct 2019 17:53:47 -0400 |
parents | 763b45bc4483 |
children | d4ba4d51f85f |
line wrap: on
line source
/* bdiff.c - efficient binary diff extension for Mercurial Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> This software may be used and distributed according to the terms of the GNU General Public License, incorporated herein by reference. Based roughly on Python difflib */ #include <limits.h> #include <stdlib.h> #include <string.h> #include "bdiff.h" #include "bitmanipulation.h" #include "compat.h" /* Hash implementation from diffutils */ #define ROL(v, n) ((v) << (n) | (v) >> (sizeof(v) * CHAR_BIT - (n))) #define HASH(h, c) ((c) + ROL(h, 7)) struct pos { int pos, len; }; int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr) { unsigned hash; int i; const char *p, *b = a; const char *const plast = a + len - 1; struct bdiff_line *l; /* count the lines */ i = 1; /* extra line for sentinel */ for (p = a; p < plast; p++) { if (*p == '\n') { i++; } } if (p == plast) { i++; } *lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line)); if (!l) { return -1; } /* build the line array and calculate hashes */ hash = 0; for (p = a; p < plast; p++) { hash = HASH(hash, *p); if (*p == '\n') { l->hash = hash; hash = 0; l->len = p - b + 1; l->l = b; l->n = INT_MAX; l++; b = p + 1; } } if (p == plast) { hash = HASH(hash, *p); l->hash = hash; l->len = p - b + 1; l->l = b; l->n = INT_MAX; l++; } /* set up a sentinel */ l->hash = 0; l->len = 0; l->l = a + len; return i - 1; } static inline int cmp(struct bdiff_line *a, struct bdiff_line *b) { return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len); } static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b, int bn) { int i, j, buckets = 1, t, scale; struct pos *h = NULL; /* build a hash table of the next highest power of 2 */ while (buckets < bn + 1) { buckets *= 2; } /* try to allocate a large hash table to avoid collisions */ for (scale = 4; scale; scale /= 2) { h = (struct pos *)calloc(buckets, scale * sizeof(struct pos)); if (h) { break; } } if (!h) { return 0; } buckets = buckets * scale - 1; /* clear the hash table */ for (i = 0; i <= buckets; i++) { h[i].pos = -1; h[i].len = 0; } /* add lines to the hash table chains */ for (i = 0; i < bn; i++) { /* find the equivalence class */ for (j = b[i].hash & buckets; h[j].pos != -1; j = (j + 1) & buckets) { if (!cmp(b + i, b + h[j].pos)) { break; } } /* add to the head of the equivalence class */ b[i].n = h[j].pos; b[i].e = j; h[j].pos = i; h[j].len++; /* keep track of popularity */ } /* compute popularity threshold */ t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1); /* match items in a to their equivalence class in b */ for (i = 0; i < an; i++) { /* find the equivalence class */ for (j = a[i].hash & buckets; h[j].pos != -1; j = (j + 1) & buckets) { if (!cmp(a + i, b + h[j].pos)) { break; } } a[i].e = j; /* use equivalence class for quick compare */ if (h[j].len <= t) { a[i].n = h[j].pos; /* point to head of match list */ } else { a[i].n = -1; /* too popular */ } } /* discard hash tables */ free(h); return 1; } static int longest_match(struct bdiff_line *a, struct bdiff_line *b, struct pos *pos, int a1, int a2, int b1, int b2, int *omi, int *omj) { int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf; /* window our search on large regions to better bound worst-case performance. by choosing a window at the end, we reduce skipping overhead on the b chains. */ if (a2 - a1 > 30000) { a1 = a2 - 30000; } half = (a1 + a2 - 1) / 2; bhalf = (b1 + b2 - 1) / 2; for (i = a1; i < a2; i++) { /* skip all lines in b after the current block */ for (j = a[i].n; j >= b2; j = b[j].n) { ; } /* loop through all lines match a[i] in b */ for (; j >= b1; j = b[j].n) { /* does this extend an earlier match? */ for (k = 1; j - k >= b1 && i - k >= a1; k++) { /* reached an earlier match? */ if (pos[j - k].pos == i - k) { k += pos[j - k].len; break; } /* previous line mismatch? */ if (a[i - k].e != b[j - k].e) { break; } } pos[j].pos = i; pos[j].len = k; /* best match so far? we prefer matches closer to the middle to balance recursion */ if (k > mk) { /* a longer match */ mi = i; mj = j; mk = k; } else if (k == mk) { if (i > mi && i <= half && j > b1) { /* same match but closer to half */ mi = i; mj = j; } else if (i == mi && (mj > bhalf || i == a1)) { /* same i but best earlier j */ mj = j; } } } } if (mk) { mi = mi - mk + 1; mj = mj - mk + 1; } /* expand match to include subsequent popular lines */ while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e) { mk++; } *omi = mi; *omj = mj; return mk; } static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b, struct pos *pos, int a1, int a2, int b1, int b2, struct bdiff_hunk *l) { int i, j, k; while (1) { /* find the longest match in this chunk */ k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); if (!k) { return l; } /* and recurse on the remaining chunks on either side */ l = recurse(a, b, pos, a1, i, b1, j, l); if (!l) { return NULL; } l->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); if (!l->next) { return NULL; } l = l->next; l->a1 = i; l->a2 = i + k; l->b1 = j; l->b2 = j + k; l->next = NULL; /* tail-recursion didn't happen, so do equivalent iteration */ a1 = i + k; b1 = j + k; } } int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b, int bn, struct bdiff_hunk *base) { struct bdiff_hunk *curr; struct pos *pos; int t, count = 0; /* allocate and fill arrays */ t = equatelines(a, an, b, bn); pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos)); if (pos && t) { /* generate the matching block list */ curr = recurse(a, b, pos, 0, an, 0, bn, base); if (!curr) { return -1; } /* sentinel end hunk */ curr->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); if (!curr->next) { return -1; } curr = curr->next; curr->a1 = curr->a2 = an; curr->b1 = curr->b2 = bn; curr->next = NULL; } free(pos); /* normalize the hunk list, try to push each hunk towards the end */ for (curr = base->next; curr; curr = curr->next) { struct bdiff_hunk *next = curr->next; if (!next) { break; } if (curr->a2 == next->a1 || curr->b2 == next->b1) { while (curr->a2 < an && curr->b2 < bn && next->a1 < next->a2 && next->b1 < next->b2 && !cmp(a + curr->a2, b + curr->b2)) { curr->a2++; next->a1++; curr->b2++; next->b1++; } } } for (curr = base->next; curr; curr = curr->next) { count++; } return count; } /* deallocate list of hunks; l may be NULL */ void bdiff_freehunks(struct bdiff_hunk *l) { struct bdiff_hunk *n; for (; l; l = n) { n = l->next; free(l); } }