annotate mercurial/bdiff.c @ 30188:8a864844d5a0

checkcopies: add a sanity check against false-positive copies When grafting a copy backwards through a rename, a copy is wrongly detected, which causes the graft to be applied inappropriately, in a destructive way. Make sure that the old file name really exists in the common ancestor, and bail out if it doesn't. This fixes the aggravated case of bug 5343, although the basic issue (failure to duplicate the copy information) still occurs.
author Gábor Stefanik <gabor.stefanik@nng.com>
date Wed, 12 Oct 2016 21:33:45 +0200
parents 9631ff5ebbeb
children d500ddae7494
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
1 /*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
2 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
3
2859
345bac2bc4ec update copyrights.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2600
diff changeset
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
6 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
7 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
9 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
10 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
11
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
12 #include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
13 #include <string.h>
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
14 #include <limits.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
15
29539
666832b9e154 bdiff: use ssize_t in favor of Py_ssize_t in cpython-unaware locations
Maciej Fijalkowski <fijall@gmail.com>
parents: 29444
diff changeset
16 #include "compat.h"
29444
284d742e5611 internals: move the bitmanipulation routines into its own file
Maciej Fijalkowski <fijall@gmail.com>
parents: 29323
diff changeset
17 #include "bitmanipulation.h"
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
18 #include "bdiff.h"
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
19
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
20 struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
21 int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
22 };
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
23
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
24 int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
25 {
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
26 unsigned hash;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
27 int i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
28 const char *p, *b = a;
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
29 const char * const plast = a + len - 1;
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
30 struct bdiff_line *l;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
31
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
32 /* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
33 i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
34 for (p = a; p < a + len; p++)
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
35 if (*p == '\n' || p == plast)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
36 i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
37
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
38 *lr = l = (struct bdiff_line *)malloc(sizeof(struct bdiff_line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
39 if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
40 return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
41
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
42 /* build the line array and calculate hashes */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
43 hash = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
44 for (p = a; p < a + len; p++) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
45 /* Leonid Yuriev's hash */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
46 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
47
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
48 if (*p == '\n' || p == plast) {
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
49 l->hash = hash;
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
50 hash = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
51 l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
52 l->l = b;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
53 l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
54 l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
55 b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
56 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
57 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
58
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
59 /* set up a sentinel */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
60 l->hash = 0;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
61 l->len = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
62 l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
63 return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
64 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
65
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
66 static inline int cmp(struct bdiff_line *a, struct bdiff_line *b)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
67 {
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
68 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
69 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
70
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
71 static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b,
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
72 int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
73 {
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
74 int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
75 struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
76
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
77 /* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
78 while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
79 buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
80
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
81 /* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
82 for (scale = 4; scale; scale /= 2) {
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
83 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
84 if (h)
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
85 break;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
86 }
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
87
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
88 if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
89 return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
90
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
91 buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
92
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
93 /* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
94 for (i = 0; i <= buckets; i++) {
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
95 h[i].pos = -1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
96 h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
97 }
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
98
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
99 /* add lines to the hash table chains */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
100 for (i = 0; i < bn; i++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
101 /* find the equivalence class */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
102 for (j = b[i].hash & buckets; h[j].pos != -1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
103 j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
104 if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
105 break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
106
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
107 /* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
108 b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
109 b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
110 h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
111 h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
112 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
113
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
114 /* compute popularity threshold */
9534
8e202431d620 bdiff: gradually enable the popularity hack
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8858
diff changeset
115 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
116
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
117 /* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
118 for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
119 /* find the equivalence class */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
120 for (j = a[i].hash & buckets; h[j].pos != -1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
121 j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
122 if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
123 break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
124
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
125 a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
126 if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
127 a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
128 else
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
129 a[i].n = -1; /* too popular */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
130 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
131
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
132 /* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
133 free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
134 return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
135 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
136
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
137 static int longest_match(struct bdiff_line *a, struct bdiff_line *b,
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
138 struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
139 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
140 {
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
141 int mi = a1, mj = b1, mk = 0, i, j, k, half;
29015
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
142
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
143 /* window our search on large regions to better bound
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
144 worst-case performance. by choosing a window at the end, we
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
145 reduce skipping overhead on the b chains. */
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
146 if (a2 - a1 > 30000)
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
147 a1 = a2 - 30000;
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
148
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
149 half = (a1 + a2) / 2;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
150
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
151 for (i = a1; i < a2; i++) {
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
152 /* skip all lines in b after the current block */
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
153 for (j = a[i].n; j >= b2; j = b[j].n)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
154 ;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
155
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
156 /* loop through all lines match a[i] in b */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
157 for (; j >= b1; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
158 /* does this extend an earlier match? */
29322
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
159 for (k = 1; j - k >= b1 && i - k >= a1; k++) {
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
160 /* reached an earlier match? */
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
161 if (pos[j - k].pos == i - k) {
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
162 k += pos[j - k].len;
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
163 break;
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
164 }
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
165 /* previous line mismatch? */
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
166 if (a[i - k].e != b[j - k].e)
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
167 break;
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
168 }
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
169
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
170 pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
171 pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
172
29014
f1ca249696ed bdiff: balance recursion to avoid quadratic behavior (issue4704)
Matt Mackall <mpm@selenic.com>
parents: 29013
diff changeset
173 /* best match so far? we prefer matches closer
f1ca249696ed bdiff: balance recursion to avoid quadratic behavior (issue4704)
Matt Mackall <mpm@selenic.com>
parents: 29013
diff changeset
174 to the middle to balance recursion */
f1ca249696ed bdiff: balance recursion to avoid quadratic behavior (issue4704)
Matt Mackall <mpm@selenic.com>
parents: 29013
diff changeset
175 if (k > mk || (k == mk && (i <= mi || i < half))) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
176 mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
177 mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
178 mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
179 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
180 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
181 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
182
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
183 if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
184 mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
185 mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
186 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
187
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
188 /* expand match to include subsequent popular lines */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
189 while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
190 a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
191 mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
192
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
193 *omi = mi;
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
194 *omj = mj;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
195
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
196 return mk;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
197 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
198
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
199 static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b,
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
200 struct pos *pos,
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
201 int a1, int a2, int b1, int b2, struct bdiff_hunk *l)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
202 {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
203 int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
204
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
205 while (1) {
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
206 /* find the longest match in this chunk */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
207 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
208 if (!k)
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
209 return l;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
210
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
211 /* and recurse on the remaining chunks on either side */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
212 l = recurse(a, b, pos, a1, i, b1, j, l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
213 if (!l)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
214 return NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
215
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
216 l->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
217 if (!l->next)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
218 return NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
219
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
220 l = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
221 l->a1 = i;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
222 l->a2 = i + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
223 l->b1 = j;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
224 l->b2 = j + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
225 l->next = NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
226
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
227 /* tail-recursion didn't happen, so do equivalent iteration */
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
228 a1 = i + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
229 b1 = j + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
230 }
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
231 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
232
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
233 int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b,
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
234 int bn, struct bdiff_hunk *base)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
235 {
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
236 struct bdiff_hunk *curr;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
237 struct pos *pos;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
238 int t, count = 0;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
239
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
240 /* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
241 t = equatelines(a, an, b, bn);
5571
f84bb2e1cc3a fix calloc(0, ...) issue
Jim Hague <jim.hague@acm.org>
parents: 5452
diff changeset
242 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
243
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
244 if (pos && t) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
245 /* generate the matching block list */
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
246
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
247 curr = recurse(a, b, pos, 0, an, 0, bn, base);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
248 if (!curr)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
249 return -1;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
250
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
251 /* sentinel end hunk */
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
252 curr->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
253 if (!curr->next)
13090
c73745762f33 bdiff: Fix bogus NULL return
Matt Mackall <mpm@selenic.com>
parents: 13089
diff changeset
254 return -1;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
255 curr = curr->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
256 curr->a1 = curr->a2 = an;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
257 curr->b1 = curr->b2 = bn;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
258 curr->next = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
259 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
260
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
261 free(pos);
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
262
7625
930a2be7e875 bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7189
diff changeset
263 /* normalize the hunk list, try to push each hunk towards the end */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
264 for (curr = base->next; curr; curr = curr->next) {
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
265 struct bdiff_hunk *next = curr->next;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
266
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
267 if (!next)
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
268 break;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
269
29010
e868d8ee7c8f bdiff: unify duplicate normalize loops
Matt Mackall <mpm@selenic.com>
parents: 19962
diff changeset
270 if (curr->a2 == next->a1 || curr->b2 == next->b1)
29011
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
271 while (curr->a2 < an && curr->b2 < bn
29012
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents: 29011
diff changeset
272 && next->a1 < next->a2
4bd67ae7d75a bdiff: fix latent normalization bug
Matt Mackall <mpm@selenic.com>
parents: 29011
diff changeset
273 && next->b1 < next->b2
29011
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
274 && !cmp(a + curr->a2, b + curr->b2)) {
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
275 curr->a2++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
276 next->a1++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
277 curr->b2++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
278 next->b1++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
279 }
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
280 }
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
281
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
282 for (curr = base->next; curr; curr = curr->next)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
283 count++;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
284 return count;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
285 }
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
286
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
287 void bdiff_freehunks(struct bdiff_hunk *l)
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
288 {
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
289 struct bdiff_hunk *n;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
290 for (; l; l = n) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
291 n = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
292 free(l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
293 }
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
294 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
295
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
296