Mercurial > hg
annotate mercurial/thirdparty/xdiff/xdiffi.c @ 44011:c627f1b2f3c3
rust-index: handle `MixedIndex` in `pyindex_to_graph`
On the long run we will want to implement the Graph trait directly in Rust, but
for now we take the path with the least amount of change to focus on the coming
persistent NodeMap code.
We test this new code through with the lazy ancestors code.
Differential Revision: https://phab.mercurial-scm.org/D7657
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 12 Dec 2019 18:11:44 +0100 |
parents | d40b9e29c114 |
children |
rev | line source |
---|---|
36671 | 1 /* |
2 * LibXDiff by Davide Libenzi ( File Differential Library ) | |
3 * Copyright (C) 2003 Davide Libenzi | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2.1 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, see | |
17 * <http://www.gnu.org/licenses/>. | |
18 * | |
19 * Davide Libenzi <davidel@xmailserver.org> | |
20 * | |
21 */ | |
22 | |
23 #include "xinclude.h" | |
24 | |
25 | |
26 | |
27 #define XDL_MAX_COST_MIN 256 | |
28 #define XDL_HEUR_MIN_COST 256 | |
29 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) | |
30 #define XDL_SNAKE_CNT 20 | |
31 #define XDL_K_HEUR 4 | |
32 | |
36723
1f9bbd1d6b8a
xdiff: fix builds on Windows
Matt Harbison <matt_harbison@yahoo.com>
parents:
36674
diff
changeset
|
33 /* VC 2008 doesn't know about the inline keyword. */ |
1f9bbd1d6b8a
xdiff: fix builds on Windows
Matt Harbison <matt_harbison@yahoo.com>
parents:
36674
diff
changeset
|
34 #if defined(_MSC_VER) |
1f9bbd1d6b8a
xdiff: fix builds on Windows
Matt Harbison <matt_harbison@yahoo.com>
parents:
36674
diff
changeset
|
35 #define inline __forceinline |
1f9bbd1d6b8a
xdiff: fix builds on Windows
Matt Harbison <matt_harbison@yahoo.com>
parents:
36674
diff
changeset
|
36 #endif |
36671 | 37 |
38 | |
39 typedef struct s_xdpsplit { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
40 int64_t i1, i2; |
36671 | 41 int min_lo, min_hi; |
42 } xdpsplit_t; | |
43 | |
44 | |
45 | |
46 | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
47 static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
48 uint64_t const *ha2, int64_t off2, int64_t lim2, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
49 int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, |
36671 | 50 xdalgoenv_t *xenv); |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
51 static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2); |
36671 | 52 |
53 | |
54 | |
55 | |
56 | |
57 /* | |
58 * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. | |
59 * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both | |
60 * the forward diagonal starting from (off1, off2) and the backward diagonal | |
61 * starting from (lim1, lim2). If the K values on the same diagonal crosses | |
62 * returns the furthest point of reach. We might end up having to expensive | |
63 * cases using this algorithm is full, so a little bit of heuristic is needed | |
64 * to cut the search and to return a suboptimal point. | |
65 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
66 static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
67 uint64_t const *ha2, int64_t off2, int64_t lim2, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
68 int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, |
36671 | 69 xdalgoenv_t *xenv) { |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
70 int64_t dmin = off1 - lim2, dmax = lim1 - off2; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
71 int64_t fmid = off1 - off2, bmid = lim1 - lim2; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
72 int64_t odd = (fmid - bmid) & 1; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
73 int64_t fmin = fmid, fmax = fmid; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
74 int64_t bmin = bmid, bmax = bmid; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
75 int64_t ec, d, i1, i2, prev1, best, dd, v, k; |
36671 | 76 |
77 /* | |
78 * Set initial diagonal values for both forward and backward path. | |
79 */ | |
80 kvdf[fmid] = off1; | |
81 kvdb[bmid] = lim1; | |
82 | |
83 for (ec = 1;; ec++) { | |
84 int got_snake = 0; | |
85 | |
86 /* | |
87 * We need to extent the diagonal "domain" by one. If the next | |
88 * values exits the box boundaries we need to change it in the | |
89 * opposite direction because (max - min) must be a power of two. | |
90 * Also we initialize the external K value to -1 so that we can | |
91 * avoid extra conditions check inside the core loop. | |
92 */ | |
93 if (fmin > dmin) | |
94 kvdf[--fmin - 1] = -1; | |
95 else | |
96 ++fmin; | |
97 if (fmax < dmax) | |
98 kvdf[++fmax + 1] = -1; | |
99 else | |
100 --fmax; | |
101 | |
102 for (d = fmax; d >= fmin; d -= 2) { | |
103 if (kvdf[d - 1] >= kvdf[d + 1]) | |
104 i1 = kvdf[d - 1] + 1; | |
105 else | |
106 i1 = kvdf[d + 1]; | |
107 prev1 = i1; | |
108 i2 = i1 - d; | |
109 for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); | |
110 if (i1 - prev1 > xenv->snake_cnt) | |
111 got_snake = 1; | |
112 kvdf[d] = i1; | |
113 if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { | |
114 spl->i1 = i1; | |
115 spl->i2 = i2; | |
116 spl->min_lo = spl->min_hi = 1; | |
117 return ec; | |
118 } | |
119 } | |
120 | |
121 /* | |
122 * We need to extent the diagonal "domain" by one. If the next | |
123 * values exits the box boundaries we need to change it in the | |
124 * opposite direction because (max - min) must be a power of two. | |
125 * Also we initialize the external K value to -1 so that we can | |
126 * avoid extra conditions check inside the core loop. | |
127 */ | |
128 if (bmin > dmin) | |
129 kvdb[--bmin - 1] = XDL_LINE_MAX; | |
130 else | |
131 ++bmin; | |
132 if (bmax < dmax) | |
133 kvdb[++bmax + 1] = XDL_LINE_MAX; | |
134 else | |
135 --bmax; | |
136 | |
137 for (d = bmax; d >= bmin; d -= 2) { | |
138 if (kvdb[d - 1] < kvdb[d + 1]) | |
139 i1 = kvdb[d - 1]; | |
140 else | |
141 i1 = kvdb[d + 1] - 1; | |
142 prev1 = i1; | |
143 i2 = i1 - d; | |
144 for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); | |
145 if (prev1 - i1 > xenv->snake_cnt) | |
146 got_snake = 1; | |
147 kvdb[d] = i1; | |
148 if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { | |
149 spl->i1 = i1; | |
150 spl->i2 = i2; | |
151 spl->min_lo = spl->min_hi = 1; | |
152 return ec; | |
153 } | |
154 } | |
155 | |
156 if (need_min) | |
157 continue; | |
158 | |
159 /* | |
160 * If the edit cost is above the heuristic trigger and if | |
161 * we got a good snake, we sample current diagonals to see | |
162 * if some of the, have reached an "interesting" path. Our | |
163 * measure is a function of the distance from the diagonal | |
164 * corner (i1 + i2) penalized with the distance from the | |
165 * mid diagonal itself. If this value is above the current | |
166 * edit cost times a magic factor (XDL_K_HEUR) we consider | |
167 * it interesting. | |
168 */ | |
169 if (got_snake && ec > xenv->heur_min) { | |
170 for (best = 0, d = fmax; d >= fmin; d -= 2) { | |
171 dd = d > fmid ? d - fmid: fmid - d; | |
172 i1 = kvdf[d]; | |
173 i2 = i1 - d; | |
174 v = (i1 - off1) + (i2 - off2) - dd; | |
175 | |
176 if (v > XDL_K_HEUR * ec && v > best && | |
177 off1 + xenv->snake_cnt <= i1 && i1 < lim1 && | |
178 off2 + xenv->snake_cnt <= i2 && i2 < lim2) { | |
179 for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) | |
180 if (k == xenv->snake_cnt) { | |
181 best = v; | |
182 spl->i1 = i1; | |
183 spl->i2 = i2; | |
184 break; | |
185 } | |
186 } | |
187 } | |
188 if (best > 0) { | |
189 spl->min_lo = 1; | |
190 spl->min_hi = 0; | |
191 return ec; | |
192 } | |
193 | |
194 for (best = 0, d = bmax; d >= bmin; d -= 2) { | |
195 dd = d > bmid ? d - bmid: bmid - d; | |
196 i1 = kvdb[d]; | |
197 i2 = i1 - d; | |
198 v = (lim1 - i1) + (lim2 - i2) - dd; | |
199 | |
200 if (v > XDL_K_HEUR * ec && v > best && | |
201 off1 < i1 && i1 <= lim1 - xenv->snake_cnt && | |
202 off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { | |
203 for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) | |
204 if (k == xenv->snake_cnt - 1) { | |
205 best = v; | |
206 spl->i1 = i1; | |
207 spl->i2 = i2; | |
208 break; | |
209 } | |
210 } | |
211 } | |
212 if (best > 0) { | |
213 spl->min_lo = 0; | |
214 spl->min_hi = 1; | |
215 return ec; | |
216 } | |
217 } | |
218 | |
219 /* | |
220 * Enough is enough. We spent too much time here and now we collect | |
221 * the furthest reaching path using the (i1 + i2) measure. | |
222 */ | |
223 if (ec >= xenv->mxcost) { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
224 int64_t fbest, fbest1, bbest, bbest1; |
36671 | 225 |
226 fbest = fbest1 = -1; | |
227 for (d = fmax; d >= fmin; d -= 2) { | |
228 i1 = XDL_MIN(kvdf[d], lim1); | |
229 i2 = i1 - d; | |
230 if (lim2 < i2) | |
231 i1 = lim2 + d, i2 = lim2; | |
232 if (fbest < i1 + i2) { | |
233 fbest = i1 + i2; | |
234 fbest1 = i1; | |
235 } | |
236 } | |
237 | |
238 bbest = bbest1 = XDL_LINE_MAX; | |
239 for (d = bmax; d >= bmin; d -= 2) { | |
240 i1 = XDL_MAX(off1, kvdb[d]); | |
241 i2 = i1 - d; | |
242 if (i2 < off2) | |
243 i1 = off2 + d, i2 = off2; | |
244 if (i1 + i2 < bbest) { | |
245 bbest = i1 + i2; | |
246 bbest1 = i1; | |
247 } | |
248 } | |
249 | |
250 if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { | |
251 spl->i1 = fbest1; | |
252 spl->i2 = fbest - fbest1; | |
253 spl->min_lo = 1; | |
254 spl->min_hi = 0; | |
255 } else { | |
256 spl->i1 = bbest1; | |
257 spl->i2 = bbest - bbest1; | |
258 spl->min_lo = 0; | |
259 spl->min_hi = 1; | |
260 } | |
261 return ec; | |
262 } | |
263 } | |
264 } | |
265 | |
266 | |
267 /* | |
268 * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling | |
269 * the box splitting function. Note that the real job (marking changed lines) | |
270 * is done in the two boundary reaching checks. | |
271 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
272 int xdl_recs_cmp(diffdata_t *dd1, int64_t off1, int64_t lim1, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
273 diffdata_t *dd2, int64_t off2, int64_t lim2, |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
274 int64_t *kvdf, int64_t *kvdb, int need_min, xdalgoenv_t *xenv) { |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
275 uint64_t const *ha1 = dd1->ha, *ha2 = dd2->ha; |
36671 | 276 |
277 /* | |
278 * Shrink the box by walking through each diagonal snake (SW and NE). | |
279 */ | |
280 for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); | |
281 for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); | |
282 | |
283 /* | |
284 * If one dimension is empty, then all records on the other one must | |
285 * be obviously changed. | |
286 */ | |
287 if (off1 == lim1) { | |
288 char *rchg2 = dd2->rchg; | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
289 int64_t *rindex2 = dd2->rindex; |
36671 | 290 |
291 for (; off2 < lim2; off2++) | |
292 rchg2[rindex2[off2]] = 1; | |
293 } else if (off2 == lim2) { | |
294 char *rchg1 = dd1->rchg; | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
295 int64_t *rindex1 = dd1->rindex; |
36671 | 296 |
297 for (; off1 < lim1; off1++) | |
298 rchg1[rindex1[off1]] = 1; | |
299 } else { | |
300 xdpsplit_t spl; | |
301 spl.i1 = spl.i2 = 0; | |
302 | |
303 /* | |
304 * Divide ... | |
305 */ | |
306 if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, | |
307 need_min, &spl, xenv) < 0) { | |
308 | |
309 return -1; | |
310 } | |
311 | |
312 /* | |
313 * ... et Impera. | |
314 */ | |
315 if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, | |
316 kvdf, kvdb, spl.min_lo, xenv) < 0 || | |
317 xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, | |
318 kvdf, kvdb, spl.min_hi, xenv) < 0) { | |
319 | |
320 return -1; | |
321 } | |
322 } | |
323 | |
324 return 0; | |
325 } | |
326 | |
327 | |
328 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
329 xdfenv_t *xe) { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
330 int64_t ndiags; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
331 int64_t *kvd, *kvdf, *kvdb; |
36671 | 332 xdalgoenv_t xenv; |
333 diffdata_t dd1, dd2; | |
334 | |
335 if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { | |
336 | |
337 return -1; | |
338 } | |
339 | |
340 /* | |
341 * Allocate and setup K vectors to be used by the differential algorithm. | |
342 * One is to store the forward path and one to store the backward path. | |
343 */ | |
344 ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; | |
36923
d40b9e29c114
xdiff: fix a hard crash on Windows
Matt Harbison <matt_harbison@yahoo.com>
parents:
36826
diff
changeset
|
345 if (!(kvd = (int64_t *) xdl_malloc((2 * ndiags + 2) * sizeof(int64_t)))) { |
36671 | 346 |
347 xdl_free_env(xe); | |
348 return -1; | |
349 } | |
350 kvdf = kvd; | |
351 kvdb = kvdf + ndiags; | |
352 kvdf += xe->xdf2.nreff + 1; | |
353 kvdb += xe->xdf2.nreff + 1; | |
354 | |
355 xenv.mxcost = xdl_bogosqrt(ndiags); | |
356 if (xenv.mxcost < XDL_MAX_COST_MIN) | |
357 xenv.mxcost = XDL_MAX_COST_MIN; | |
358 xenv.snake_cnt = XDL_SNAKE_CNT; | |
359 xenv.heur_min = XDL_HEUR_MIN_COST; | |
360 | |
361 dd1.nrec = xe->xdf1.nreff; | |
362 dd1.ha = xe->xdf1.ha; | |
363 dd1.rchg = xe->xdf1.rchg; | |
364 dd1.rindex = xe->xdf1.rindex; | |
365 dd2.nrec = xe->xdf2.nreff; | |
366 dd2.ha = xe->xdf2.ha; | |
367 dd2.rchg = xe->xdf2.rchg; | |
368 dd2.rindex = xe->xdf2.rindex; | |
369 | |
370 if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, | |
371 kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { | |
372 | |
373 xdl_free(kvd); | |
374 xdl_free_env(xe); | |
375 return -1; | |
376 } | |
377 | |
378 xdl_free(kvd); | |
379 | |
380 return 0; | |
381 } | |
382 | |
383 | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
384 static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2) { |
36671 | 385 xdchange_t *xch; |
386 | |
387 if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) | |
388 return NULL; | |
389 | |
390 xch->next = xscr; | |
391 xch->i1 = i1; | |
392 xch->i2 = i2; | |
393 xch->chg1 = chg1; | |
394 xch->chg2 = chg2; | |
395 xch->ignore = 0; | |
396 | |
397 return xch; | |
398 } | |
399 | |
400 | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
401 static int recs_match(xrecord_t *rec1, xrecord_t *rec2) |
36671 | 402 { |
403 return (rec1->ha == rec2->ha && | |
404 xdl_recmatch(rec1->ptr, rec1->size, | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
405 rec2->ptr, rec2->size)); |
36671 | 406 } |
407 | |
408 /* | |
409 * If a line is indented more than this, get_indent() just returns this value. | |
410 * This avoids having to do absurd amounts of work for data that are not | |
411 * human-readable text, and also ensures that the output of get_indent fits within | |
412 * an int. | |
413 */ | |
414 #define MAX_INDENT 200 | |
415 | |
416 /* | |
417 * Return the amount of indentation of the specified line, treating TAB as 8 | |
418 * columns. Return -1 if line is empty or contains only whitespace. Clamp the | |
419 * output value at MAX_INDENT. | |
420 */ | |
421 static int get_indent(xrecord_t *rec) | |
422 { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
423 int64_t i; |
36671 | 424 int ret = 0; |
425 | |
426 for (i = 0; i < rec->size; i++) { | |
427 char c = rec->ptr[i]; | |
428 | |
429 if (!XDL_ISSPACE(c)) | |
430 return ret; | |
431 else if (c == ' ') | |
432 ret += 1; | |
433 else if (c == '\t') | |
434 ret += 8 - ret % 8; | |
435 /* ignore other whitespace characters */ | |
436 | |
437 if (ret >= MAX_INDENT) | |
438 return MAX_INDENT; | |
439 } | |
440 | |
441 /* The line contains only whitespace. */ | |
442 return -1; | |
443 } | |
444 | |
445 /* | |
446 * If more than this number of consecutive blank rows are found, just return this | |
447 * value. This avoids requiring O(N^2) work for pathological cases, and also | |
448 * ensures that the output of score_split fits in an int. | |
449 */ | |
450 #define MAX_BLANKS 20 | |
451 | |
452 /* Characteristics measured about a hypothetical split position. */ | |
453 struct split_measurement { | |
454 /* | |
455 * Is the split at the end of the file (aside from any blank lines)? | |
456 */ | |
457 int end_of_file; | |
458 | |
459 /* | |
460 * How much is the line immediately following the split indented (or -1 if | |
461 * the line is blank): | |
462 */ | |
463 int indent; | |
464 | |
465 /* | |
466 * How many consecutive lines above the split are blank? | |
467 */ | |
468 int pre_blank; | |
469 | |
470 /* | |
471 * How much is the nearest non-blank line above the split indented (or -1 | |
472 * if there is no such line)? | |
473 */ | |
474 int pre_indent; | |
475 | |
476 /* | |
477 * How many lines after the line following the split are blank? | |
478 */ | |
479 int post_blank; | |
480 | |
481 /* | |
482 * How much is the nearest non-blank line after the line following the | |
483 * split indented (or -1 if there is no such line)? | |
484 */ | |
485 int post_indent; | |
486 }; | |
487 | |
488 struct split_score { | |
489 /* The effective indent of this split (smaller is preferred). */ | |
490 int effective_indent; | |
491 | |
492 /* Penalty for this split (smaller is preferred). */ | |
493 int penalty; | |
494 }; | |
495 | |
496 /* | |
497 * Fill m with information about a hypothetical split of xdf above line split. | |
498 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
499 static void measure_split(const xdfile_t *xdf, int64_t split, |
36671 | 500 struct split_measurement *m) |
501 { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
502 int64_t i; |
36671 | 503 |
504 if (split >= xdf->nrec) { | |
505 m->end_of_file = 1; | |
506 m->indent = -1; | |
507 } else { | |
508 m->end_of_file = 0; | |
509 m->indent = get_indent(xdf->recs[split]); | |
510 } | |
511 | |
512 m->pre_blank = 0; | |
513 m->pre_indent = -1; | |
514 for (i = split - 1; i >= 0; i--) { | |
515 m->pre_indent = get_indent(xdf->recs[i]); | |
516 if (m->pre_indent != -1) | |
517 break; | |
518 m->pre_blank += 1; | |
519 if (m->pre_blank == MAX_BLANKS) { | |
520 m->pre_indent = 0; | |
521 break; | |
522 } | |
523 } | |
524 | |
525 m->post_blank = 0; | |
526 m->post_indent = -1; | |
527 for (i = split + 1; i < xdf->nrec; i++) { | |
528 m->post_indent = get_indent(xdf->recs[i]); | |
529 if (m->post_indent != -1) | |
530 break; | |
531 m->post_blank += 1; | |
532 if (m->post_blank == MAX_BLANKS) { | |
533 m->post_indent = 0; | |
534 break; | |
535 } | |
536 } | |
537 } | |
538 | |
539 /* | |
540 * The empirically-determined weight factors used by score_split() below. | |
541 * Larger values means that the position is a less favorable place to split. | |
542 * | |
543 * Note that scores are only ever compared against each other, so multiplying | |
544 * all of these weight/penalty values by the same factor wouldn't change the | |
545 * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*. | |
546 * In practice, these numbers are chosen to be large enough that they can be | |
547 * adjusted relative to each other with sufficient precision despite using | |
548 * integer math. | |
549 */ | |
550 | |
551 /* Penalty if there are no non-blank lines before the split */ | |
552 #define START_OF_FILE_PENALTY 1 | |
553 | |
554 /* Penalty if there are no non-blank lines after the split */ | |
555 #define END_OF_FILE_PENALTY 21 | |
556 | |
557 /* Multiplier for the number of blank lines around the split */ | |
558 #define TOTAL_BLANK_WEIGHT (-30) | |
559 | |
560 /* Multiplier for the number of blank lines after the split */ | |
561 #define POST_BLANK_WEIGHT 6 | |
562 | |
563 /* | |
564 * Penalties applied if the line is indented more than its predecessor | |
565 */ | |
566 #define RELATIVE_INDENT_PENALTY (-4) | |
567 #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10 | |
568 | |
569 /* | |
570 * Penalties applied if the line is indented less than both its predecessor and | |
571 * its successor | |
572 */ | |
573 #define RELATIVE_OUTDENT_PENALTY 24 | |
574 #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17 | |
575 | |
576 /* | |
577 * Penalties applied if the line is indented less than its predecessor but not | |
578 * less than its successor | |
579 */ | |
580 #define RELATIVE_DEDENT_PENALTY 23 | |
581 #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17 | |
582 | |
583 /* | |
584 * We only consider whether the sum of the effective indents for splits are | |
585 * less than (-1), equal to (0), or greater than (+1) each other. The resulting | |
586 * value is multiplied by the following weight and combined with the penalty to | |
587 * determine the better of two scores. | |
588 */ | |
589 #define INDENT_WEIGHT 60 | |
590 | |
591 /* | |
592 * Compute a badness score for the hypothetical split whose measurements are | |
593 * stored in m. The weight factors were determined empirically using the tools and | |
594 * corpus described in | |
595 * | |
596 * https://github.com/mhagger/diff-slider-tools | |
597 * | |
598 * Also see that project if you want to improve the weights based on, for example, | |
599 * a larger or more diverse corpus. | |
600 */ | |
601 static void score_add_split(const struct split_measurement *m, struct split_score *s) | |
602 { | |
603 /* | |
604 * A place to accumulate penalty factors (positive makes this index more | |
605 * favored): | |
606 */ | |
607 int post_blank, total_blank, indent, any_blanks; | |
608 | |
609 if (m->pre_indent == -1 && m->pre_blank == 0) | |
610 s->penalty += START_OF_FILE_PENALTY; | |
611 | |
612 if (m->end_of_file) | |
613 s->penalty += END_OF_FILE_PENALTY; | |
614 | |
615 /* | |
616 * Set post_blank to the number of blank lines following the split, | |
617 * including the line immediately after the split: | |
618 */ | |
619 post_blank = (m->indent == -1) ? 1 + m->post_blank : 0; | |
620 total_blank = m->pre_blank + post_blank; | |
621 | |
622 /* Penalties based on nearby blank lines: */ | |
623 s->penalty += TOTAL_BLANK_WEIGHT * total_blank; | |
624 s->penalty += POST_BLANK_WEIGHT * post_blank; | |
625 | |
626 if (m->indent != -1) | |
627 indent = m->indent; | |
628 else | |
629 indent = m->post_indent; | |
630 | |
631 any_blanks = (total_blank != 0); | |
632 | |
633 /* Note that the effective indent is -1 at the end of the file: */ | |
634 s->effective_indent += indent; | |
635 | |
636 if (indent == -1) { | |
637 /* No additional adjustments needed. */ | |
638 } else if (m->pre_indent == -1) { | |
639 /* No additional adjustments needed. */ | |
640 } else if (indent > m->pre_indent) { | |
641 /* | |
642 * The line is indented more than its predecessor. | |
643 */ | |
644 s->penalty += any_blanks ? | |
645 RELATIVE_INDENT_WITH_BLANK_PENALTY : | |
646 RELATIVE_INDENT_PENALTY; | |
647 } else if (indent == m->pre_indent) { | |
648 /* | |
649 * The line has the same indentation level as its predecessor. | |
650 * No additional adjustments needed. | |
651 */ | |
652 } else { | |
653 /* | |
654 * The line is indented less than its predecessor. It could be | |
655 * the block terminator of the previous block, but it could | |
656 * also be the start of a new block (e.g., an "else" block, or | |
657 * maybe the previous block didn't have a block terminator). | |
658 * Try to distinguish those cases based on what comes next: | |
659 */ | |
660 if (m->post_indent != -1 && m->post_indent > indent) { | |
661 /* | |
662 * The following line is indented more. So it is likely | |
663 * that this line is the start of a block. | |
664 */ | |
665 s->penalty += any_blanks ? | |
666 RELATIVE_OUTDENT_WITH_BLANK_PENALTY : | |
667 RELATIVE_OUTDENT_PENALTY; | |
668 } else { | |
669 /* | |
670 * That was probably the end of a block. | |
671 */ | |
672 s->penalty += any_blanks ? | |
673 RELATIVE_DEDENT_WITH_BLANK_PENALTY : | |
674 RELATIVE_DEDENT_PENALTY; | |
675 } | |
676 } | |
677 } | |
678 | |
679 static int score_cmp(struct split_score *s1, struct split_score *s2) | |
680 { | |
681 /* -1 if s1.effective_indent < s2->effective_indent, etc. */ | |
682 int cmp_indents = ((s1->effective_indent > s2->effective_indent) - | |
683 (s1->effective_indent < s2->effective_indent)); | |
684 | |
685 return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty); | |
686 } | |
687 | |
688 /* | |
689 * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group | |
690 * of lines that was inserted or deleted from the corresponding version of the | |
691 * file). We consider there to be such a group at the beginning of the file, at | |
692 * the end of the file, and between any two unchanged lines, though most such | |
693 * groups will usually be empty. | |
694 * | |
695 * If the first line in a group is equal to the line following the group, then | |
696 * the group can be slid down. Similarly, if the last line in a group is equal | |
697 * to the line preceding the group, then the group can be slid up. See | |
698 * group_slide_down() and group_slide_up(). | |
699 * | |
700 * Note that loops that are testing for changed lines in xdf->rchg do not need | |
701 * index bounding since the array is prepared with a zero at position -1 and N. | |
702 */ | |
703 struct xdlgroup { | |
704 /* | |
705 * The index of the first changed line in the group, or the index of | |
706 * the unchanged line above which the (empty) group is located. | |
707 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
708 int64_t start; |
36671 | 709 |
710 /* | |
711 * The index of the first unchanged line after the group. For an empty | |
712 * group, end is equal to start. | |
713 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
714 int64_t end; |
36671 | 715 }; |
716 | |
717 /* | |
718 * Initialize g to point at the first group in xdf. | |
719 */ | |
720 static void group_init(xdfile_t *xdf, struct xdlgroup *g) | |
721 { | |
722 g->start = g->end = 0; | |
723 while (xdf->rchg[g->end]) | |
724 g->end++; | |
725 } | |
726 | |
727 /* | |
728 * Move g to describe the next (possibly empty) group in xdf and return 0. If g | |
729 * is already at the end of the file, do nothing and return -1. | |
730 */ | |
731 static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) | |
732 { | |
733 if (g->end == xdf->nrec) | |
734 return -1; | |
735 | |
736 g->start = g->end + 1; | |
737 for (g->end = g->start; xdf->rchg[g->end]; g->end++) | |
738 ; | |
739 | |
740 return 0; | |
741 } | |
742 | |
743 /* | |
744 * Move g to describe the previous (possibly empty) group in xdf and return 0. | |
745 * If g is already at the beginning of the file, do nothing and return -1. | |
746 */ | |
747 static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) | |
748 { | |
749 if (g->start == 0) | |
750 return -1; | |
751 | |
752 g->end = g->start - 1; | |
753 for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--) | |
754 ; | |
755 | |
756 return 0; | |
757 } | |
758 | |
759 /* | |
760 * If g can be slid toward the end of the file, do so, and if it bumps into a | |
761 * following group, expand this group to include it. Return 0 on success or -1 | |
762 * if g cannot be slid down. | |
763 */ | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
764 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) |
36671 | 765 { |
766 if (g->end < xdf->nrec && | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
767 recs_match(xdf->recs[g->start], xdf->recs[g->end])) { |
36671 | 768 xdf->rchg[g->start++] = 0; |
769 xdf->rchg[g->end++] = 1; | |
770 | |
771 while (xdf->rchg[g->end]) | |
772 g->end++; | |
773 | |
774 return 0; | |
775 } else { | |
776 return -1; | |
777 } | |
778 } | |
779 | |
780 /* | |
781 * If g can be slid toward the beginning of the file, do so, and if it bumps | |
782 * into a previous group, expand this group to include it. Return 0 on success | |
783 * or -1 if g cannot be slid up. | |
784 */ | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
785 static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) |
36671 | 786 { |
787 if (g->start > 0 && | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
788 recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) { |
36671 | 789 xdf->rchg[--g->start] = 1; |
790 xdf->rchg[--g->end] = 0; | |
791 | |
792 while (xdf->rchg[g->start - 1]) | |
793 g->start--; | |
794 | |
795 return 0; | |
796 } else { | |
797 return -1; | |
798 } | |
799 } | |
800 | |
801 static void xdl_bug(const char *msg) | |
802 { | |
803 fprintf(stderr, "BUG: %s\n", msg); | |
804 exit(1); | |
805 } | |
806 | |
807 /* | |
36674
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
808 * For indentation heuristic, skip searching for better slide position after |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
809 * checking MAX_BORING lines without finding an improvement. This defends the |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
810 * indentation heuristic logic against pathological cases. The value is not |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
811 * picked scientifically but should be good enough. |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
812 */ |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
813 #define MAX_BORING 100 |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
814 |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
815 /* |
36671 | 816 * Move back and forward change groups for a consistent and pretty diff output. |
817 * This also helps in finding joinable change groups and reducing the diff | |
818 * size. | |
819 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
820 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, int64_t flags) { |
36671 | 821 struct xdlgroup g, go; |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
822 int64_t earliest_end, end_matching_other; |
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
823 int64_t groupsize; |
36671 | 824 |
825 group_init(xdf, &g); | |
826 group_init(xdfo, &go); | |
827 | |
828 while (1) { | |
829 /* If the group is empty in the to-be-compacted file, skip it: */ | |
830 if (g.end == g.start) | |
831 goto next; | |
832 | |
833 /* | |
834 * Now shift the change up and then down as far as possible in | |
835 * each direction. If it bumps into any other changes, merge them. | |
836 */ | |
837 do { | |
838 groupsize = g.end - g.start; | |
839 | |
840 /* | |
841 * Keep track of the last "end" index that causes this | |
842 * group to align with a group of changed lines in the | |
843 * other file. -1 indicates that we haven't found such | |
844 * a match yet: | |
845 */ | |
846 end_matching_other = -1; | |
847 | |
848 /* Shift the group backward as much as possible: */ | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
849 while (!group_slide_up(xdf, &g)) |
36671 | 850 if (group_previous(xdfo, &go)) |
851 xdl_bug("group sync broken sliding up"); | |
852 | |
853 /* | |
854 * This is this highest that this group can be shifted. | |
855 * Record its end index: | |
856 */ | |
857 earliest_end = g.end; | |
858 | |
859 if (go.end > go.start) | |
860 end_matching_other = g.end; | |
861 | |
862 /* Now shift the group forward as far as possible: */ | |
863 while (1) { | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
864 if (group_slide_down(xdf, &g)) |
36671 | 865 break; |
866 if (group_next(xdfo, &go)) | |
867 xdl_bug("group sync broken sliding down"); | |
868 | |
869 if (go.end > go.start) | |
870 end_matching_other = g.end; | |
871 } | |
872 } while (groupsize != g.end - g.start); | |
873 | |
874 /* | |
875 * If the group can be shifted, then we can possibly use this | |
876 * freedom to produce a more intuitive diff. | |
877 * | |
878 * The group is currently shifted as far down as possible, so the | |
879 * heuristics below only have to handle upwards shifts. | |
880 */ | |
881 | |
882 if (g.end == earliest_end) { | |
883 /* no shifting was possible */ | |
884 } else if (end_matching_other != -1) { | |
885 /* | |
886 * Move the possibly merged group of changes back to line | |
887 * up with the last group of changes from the other file | |
888 * that it can align with. | |
889 */ | |
890 while (go.end == go.start) { | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
891 if (group_slide_up(xdf, &g)) |
36671 | 892 xdl_bug("match disappeared"); |
893 if (group_previous(xdfo, &go)) | |
894 xdl_bug("group sync broken sliding to match"); | |
895 } | |
896 } else if (flags & XDF_INDENT_HEURISTIC) { | |
897 /* | |
898 * Indent heuristic: a group of pure add/delete lines | |
899 * implies two splits, one between the end of the "before" | |
900 * context and the start of the group, and another between | |
901 * the end of the group and the beginning of the "after" | |
902 * context. Some splits are aesthetically better and some | |
903 * are worse. We compute a badness "score" for each split, | |
904 * and add the scores for the two splits to define a | |
905 * "score" for each position that the group can be shifted | |
906 * to. Then we pick the shift with the lowest score. | |
907 */ | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
908 int64_t shift, best_shift = -1; |
36671 | 909 struct split_score best_score; |
910 | |
36674
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
911 /* |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
912 * This is O(N * MAX_BLANKS) (N = shift-able lines). |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
913 * Even with MAX_BLANKS bounded to a small value, a |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
914 * large N could still make this loop take several |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
915 * times longer than the main diff algorithm. The |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
916 * "boring" value is to help cut down N to something |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
917 * like (MAX_BORING + groupsize). |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
918 * |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
919 * Scan from bottom to top. So we can exit the loop |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
920 * without compromising the assumption "for a same best |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
921 * score, pick the bottommost shift". |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
922 */ |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
923 int boring = 0; |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
924 for (shift = g.end; shift >= earliest_end; shift--) { |
36671 | 925 struct split_measurement m; |
926 struct split_score score = {0, 0}; | |
36674
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
927 int cmp; |
36671 | 928 |
929 measure_split(xdf, shift, &m); | |
930 score_add_split(&m, &score); | |
931 measure_split(xdf, shift - groupsize, &m); | |
932 score_add_split(&m, &score); | |
36674
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
933 |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
934 if (best_shift == -1) { |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
935 cmp = -1; |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
936 } else { |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
937 cmp = score_cmp(&score, &best_score); |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
938 } |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
939 if (cmp < 0) { |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
940 boring = 0; |
36671 | 941 best_score.effective_indent = score.effective_indent; |
942 best_score.penalty = score.penalty; | |
943 best_shift = shift; | |
36674
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
944 } else { |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
945 boring += 1; |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
946 if (boring >= MAX_BORING) |
c420792217c8
xdiff: reduce indent heuristic overhead
Jun Wu <quark@fb.com>
parents:
36673
diff
changeset
|
947 break; |
36671 | 948 } |
949 } | |
950 | |
951 while (g.end > best_shift) { | |
36823
49fe6249937a
xdiff: remove unused flags parameter
Jun Wu <quark@fb.com>
parents:
36822
diff
changeset
|
952 if (group_slide_up(xdf, &g)) |
36671 | 953 xdl_bug("best shift unreached"); |
954 if (group_previous(xdfo, &go)) | |
955 xdl_bug("group sync broken sliding to blank line"); | |
956 } | |
957 } | |
958 | |
959 next: | |
960 /* Move past the just-processed group: */ | |
961 if (group_next(xdf, &g)) | |
962 break; | |
963 if (group_next(xdfo, &go)) | |
964 xdl_bug("group sync broken moving to next group"); | |
965 } | |
966 | |
967 if (!group_next(xdfo, &go)) | |
968 xdl_bug("group sync broken at end of file"); | |
969 | |
970 return 0; | |
971 } | |
972 | |
973 | |
974 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { | |
975 xdchange_t *cscr = NULL, *xch; | |
976 char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
977 int64_t i1, i2, l1, l2; |
36671 | 978 |
979 /* | |
980 * Trivial. Collects "groups" of changes and creates an edit script. | |
981 */ | |
982 for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) | |
983 if (rchg1[i1 - 1] || rchg2[i2 - 1]) { | |
984 for (l1 = i1; rchg1[i1 - 1]; i1--); | |
985 for (l2 = i2; rchg2[i2 - 1]; i2--); | |
986 | |
987 if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { | |
988 xdl_free_script(cscr); | |
989 return -1; | |
990 } | |
991 cscr = xch; | |
992 } | |
993 | |
994 *xscr = cscr; | |
995 | |
996 return 0; | |
997 } | |
998 | |
999 | |
1000 void xdl_free_script(xdchange_t *xscr) { | |
1001 xdchange_t *xch; | |
1002 | |
1003 while ((xch = xscr) != NULL) { | |
1004 xscr = xscr->next; | |
1005 xdl_free(xch); | |
1006 } | |
1007 } | |
1008 | |
36763 | 1009 |
1010 /* | |
1011 * Starting at the passed change atom, find the latest change atom to be included | |
1012 * inside the differential hunk according to the specified configuration. | |
1013 * Also advance xscr if the first changes must be discarded. | |
1014 */ | |
36824
f0d9811dda8e
xdiff: remove unused xpp and xecfg parameters
Jun Wu <quark@fb.com>
parents:
36823
diff
changeset
|
1015 xdchange_t *xdl_get_hunk(xdchange_t **xscr) |
36763 | 1016 { |
1017 xdchange_t *xch, *xchp, *lxch; | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
1018 uint64_t ignored = 0; /* number of ignored blank lines */ |
36763 | 1019 |
1020 /* remove ignorable changes that are too far before other changes */ | |
1021 for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { | |
1022 xch = xchp->next; | |
1023 | |
1024 if (xch == NULL || | |
36826
e5b14f5b8b94
xdiff: resolve signed unsigned comparison warning
Jun Wu <quark@fb.com>
parents:
36824
diff
changeset
|
1025 xch->i1 - (xchp->i1 + xchp->chg1) >= 0) |
36763 | 1026 *xscr = xch; |
1027 } | |
1028 | |
1029 if (*xscr == NULL) | |
1030 return NULL; | |
1031 | |
1032 lxch = *xscr; | |
1033 | |
1034 for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
1035 int64_t distance = xch->i1 - (xchp->i1 + xchp->chg1); |
36826
e5b14f5b8b94
xdiff: resolve signed unsigned comparison warning
Jun Wu <quark@fb.com>
parents:
36824
diff
changeset
|
1036 if (distance > 0) |
36763 | 1037 break; |
1038 | |
36826
e5b14f5b8b94
xdiff: resolve signed unsigned comparison warning
Jun Wu <quark@fb.com>
parents:
36824
diff
changeset
|
1039 if (distance < 0 && (!xch->ignore || lxch == xchp)) { |
36763 | 1040 lxch = xch; |
1041 ignored = 0; | |
36826
e5b14f5b8b94
xdiff: resolve signed unsigned comparison warning
Jun Wu <quark@fb.com>
parents:
36824
diff
changeset
|
1042 } else if (distance < 0 && xch->ignore) { |
36763 | 1043 ignored += xch->chg2; |
1044 } else if (lxch != xchp && | |
36826
e5b14f5b8b94
xdiff: resolve signed unsigned comparison warning
Jun Wu <quark@fb.com>
parents:
36824
diff
changeset
|
1045 xch->i1 + ignored - (lxch->i1 + lxch->chg1) > 0) { |
36763 | 1046 break; |
1047 } else if (!xch->ignore) { | |
1048 lxch = xch; | |
1049 ignored = 0; | |
1050 } else { | |
1051 ignored += xch->chg2; | |
1052 } | |
1053 } | |
1054 | |
1055 return lxch; | |
1056 } | |
1057 | |
1058 | |
36671 | 1059 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, |
1060 xdemitconf_t const *xecfg) | |
1061 { | |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
1062 int64_t p = xe->nprefix, s = xe->nsuffix; |
36671 | 1063 xdchange_t *xch, *xche; |
36763 | 1064 |
1065 if (!xecfg->hunk_func) | |
1066 return -1; | |
1067 | |
36673 | 1068 if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) { |
36822
882657a9f768
xdiff: replace {unsigned ,}long with {u,}int64_t
Jun Wu <quark@fb.com>
parents:
36820
diff
changeset
|
1069 int64_t i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec; |
36673 | 1070 for (xch = xscr; xch; xch = xche->next) { |
36824
f0d9811dda8e
xdiff: remove unused xpp and xecfg parameters
Jun Wu <quark@fb.com>
parents:
36823
diff
changeset
|
1071 xche = xdl_get_hunk(&xch); |
36673 | 1072 if (!xch) |
1073 break; | |
36820
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1074 if (xch != xche) |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1075 xdl_bug("xch != xche"); |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1076 xch->i1 += p; |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1077 xch->i2 += p; |
36673 | 1078 if (xch->i1 > i1 || xch->i2 > i2) { |
1079 if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0) | |
1080 return -1; | |
1081 } | |
1082 i1 = xche->i1 + xche->chg1; | |
1083 i2 = xche->i2 + xche->chg2; | |
1084 } | |
36820
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1085 if (xecfg->hunk_func(i1, n1 + p + s, i2, n2 + p + s, |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1086 ecb->priv) < 0) |
36671 | 1087 return -1; |
36673 | 1088 } else { |
1089 for (xch = xscr; xch; xch = xche->next) { | |
36824
f0d9811dda8e
xdiff: remove unused xpp and xecfg parameters
Jun Wu <quark@fb.com>
parents:
36823
diff
changeset
|
1090 xche = xdl_get_hunk(&xch); |
36673 | 1091 if (!xch) |
1092 break; | |
36820
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1093 if (xecfg->hunk_func(xch->i1 + p, |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1094 xche->i1 + xche->chg1 - xch->i1, |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1095 xch->i2 + p, |
f33a87cf60cc
xdiff: add a preprocessing step that trims files
Jun Wu <quark@fb.com>
parents:
36763
diff
changeset
|
1096 xche->i2 + xche->chg2 - xch->i2, |
36673 | 1097 ecb->priv) < 0) |
1098 return -1; | |
1099 } | |
36671 | 1100 } |
1101 return 0; | |
1102 } | |
1103 | |
1104 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
1105 xdemitconf_t const *xecfg, xdemitcb_t *ecb) { | |
1106 xdchange_t *xscr; | |
1107 xdfenv_t xe; | |
1108 | |
1109 if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) { | |
1110 | |
1111 return -1; | |
1112 } | |
1113 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || | |
1114 xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 || | |
1115 xdl_build_script(&xe, &xscr) < 0) { | |
1116 | |
1117 xdl_free_env(&xe); | |
1118 return -1; | |
1119 } | |
1120 | |
36763 | 1121 if (xdl_call_hunk_func(&xe, xscr, ecb, xecfg) < 0) { |
36671 | 1122 xdl_free_script(xscr); |
36673 | 1123 xdl_free_env(&xe); |
1124 return -1; | |
36671 | 1125 } |
36673 | 1126 xdl_free_script(xscr); |
36671 | 1127 xdl_free_env(&xe); |
1128 | |
1129 return 0; | |
1130 } |