Mercurial > hg-stable
changeset 36700:34e2ff1f9cd8
xdiff: vendor xdiff library from git
Vendor git's xdiff library from git commit
d7c6c2369d7c6c2369ac21141b7c6cceaebc6414ec3da14ad using GPL2+ license.
There is another recent user report that hg diff generates suboptimal
result. It seems the fix to issue4074 isn't good enough. I crafted some
other interesting cases, and hg diff barely has any advantage compared with
gnu diffutils or git diff.
| testcase | gnu diffutils | hg diff | git diff |
| | lines time | lines time | lines time |
| patience | 6 0.00 | 602 0.08 | 6 0.00 |
| random | 91772 0.90 | 109462 0.70 | 91772 0.24 |
| json | 2 0.03 | 1264814 1.81 | 2 0.29 |
"lines" means the size of the output, i.e. the count of "+/-" lines. "time"
means seconds needed to do the calculation. Both are the smaller the better.
"hg diff" counts Python startup overhead.
Git and GNU diffutils generate optimal results. For the "json" case, git can
have an optimization that does a scan for common prefix and suffix first,
and match them if the length is greater than half of the text. See
https://neil.fraser.name/news/2006/03/12/. That would make git the fastest
for all above cases.
About testcases:
patience:
Aiming for the weakness of the greedy "patience diff" algorithm. Using
git's patience diff option would also get suboptimal result. Generated using
the Python script:
```
open('a', 'w').write('\n'.join(list('a' + 'x' * 300 + 'u' + 'x' * 700 + 'a\n')))
open('b', 'w').write('\n'.join(list('b' + 'x' * 700 + 'u' + 'x' * 300 + 'b\n')))
```
random:
Generated using the script in `test-issue4074.t`. It practically makes the
algorithm suffer. Impressively, git wins in both performance and diff
quality.
json:
The recent user reported case. It's a single line movement near the end of a
very large (800K lines) JSON file.
Test Plan:
Code taken as-is.
Differential Revision: https://phab.mercurial-scm.org/D2572
# no-check-commit for vendored code
author | Jun Wu <quark@fb.com> |
---|---|
date | Sat, 03 Mar 2018 10:39:43 -0800 |
parents | 44048f1bcee5 |
children | 9e7b14caf67f |
files | contrib/clang-format-blacklist mercurial/thirdparty/xdiff/xdiff.h mercurial/thirdparty/xdiff/xdiffi.c mercurial/thirdparty/xdiff/xdiffi.h mercurial/thirdparty/xdiff/xemit.c mercurial/thirdparty/xdiff/xemit.h mercurial/thirdparty/xdiff/xhistogram.c mercurial/thirdparty/xdiff/xinclude.h mercurial/thirdparty/xdiff/xmacros.h mercurial/thirdparty/xdiff/xmerge.c mercurial/thirdparty/xdiff/xpatience.c mercurial/thirdparty/xdiff/xprepare.c mercurial/thirdparty/xdiff/xprepare.h mercurial/thirdparty/xdiff/xtypes.h mercurial/thirdparty/xdiff/xutils.c mercurial/thirdparty/xdiff/xutils.h |
diffstat | 16 files changed, 4218 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/clang-format-blacklist Sat Mar 03 14:30:21 2018 -0800 +++ b/contrib/clang-format-blacklist Sat Mar 03 10:39:43 2018 -0800 @@ -52,3 +52,18 @@ contrib/python-zstandard/zstd/dictBuilder/zdict.h contrib/python-zstandard/zstd/zstd.h hgext/fsmonitor/pywatchman/bser.c +mercurial/thirdparty/xdiff/xdiff.h +mercurial/thirdparty/xdiff/xdiffi.c +mercurial/thirdparty/xdiff/xdiffi.h +mercurial/thirdparty/xdiff/xemit.c +mercurial/thirdparty/xdiff/xemit.h +mercurial/thirdparty/xdiff/xhistogram.c +mercurial/thirdparty/xdiff/xinclude.h +mercurial/thirdparty/xdiff/xmacros.h +mercurial/thirdparty/xdiff/xmerge.c +mercurial/thirdparty/xdiff/xpatience.c +mercurial/thirdparty/xdiff/xprepare.c +mercurial/thirdparty/xdiff/xprepare.h +mercurial/thirdparty/xdiff/xtypes.h +mercurial/thirdparty/xdiff/xutils.c +mercurial/thirdparty/xdiff/xutils.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xdiff.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,150 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XDIFF_H) +#define XDIFF_H + +#ifdef __cplusplus +extern "C" { +#endif /* #ifdef __cplusplus */ + +/* xpparm_t.flags */ +#define XDF_NEED_MINIMAL (1 << 0) + +#define XDF_IGNORE_WHITESPACE (1 << 1) +#define XDF_IGNORE_WHITESPACE_CHANGE (1 << 2) +#define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 3) +#define XDF_IGNORE_CR_AT_EOL (1 << 4) +#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | \ + XDF_IGNORE_WHITESPACE_CHANGE | \ + XDF_IGNORE_WHITESPACE_AT_EOL | \ + XDF_IGNORE_CR_AT_EOL) + +#define XDF_IGNORE_BLANK_LINES (1 << 7) + +#define XDF_PATIENCE_DIFF (1 << 14) +#define XDF_HISTOGRAM_DIFF (1 << 15) +#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF) +#define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK) + +#define XDF_INDENT_HEURISTIC (1 << 23) + +/* xdemitconf_t.flags */ +#define XDL_EMIT_FUNCNAMES (1 << 0) +#define XDL_EMIT_FUNCCONTEXT (1 << 2) + +#define XDL_MMB_READONLY (1 << 0) + +#define XDL_MMF_ATOMIC (1 << 0) + +#define XDL_BDOP_INS 1 +#define XDL_BDOP_CPY 2 +#define XDL_BDOP_INSB 3 + +/* merge simplification levels */ +#define XDL_MERGE_MINIMAL 0 +#define XDL_MERGE_EAGER 1 +#define XDL_MERGE_ZEALOUS 2 +#define XDL_MERGE_ZEALOUS_ALNUM 3 + +/* merge favor modes */ +#define XDL_MERGE_FAVOR_OURS 1 +#define XDL_MERGE_FAVOR_THEIRS 2 +#define XDL_MERGE_FAVOR_UNION 3 + +/* merge output styles */ +#define XDL_MERGE_DIFF3 1 + +typedef struct s_mmfile { + char *ptr; + long size; +} mmfile_t; + +typedef struct s_mmbuffer { + char *ptr; + long size; +} mmbuffer_t; + +typedef struct s_xpparam { + unsigned long flags; + + /* See Documentation/diff-options.txt. */ + char **anchors; + size_t anchors_nr; +} xpparam_t; + +typedef struct s_xdemitcb { + void *priv; + int (*outf)(void *, mmbuffer_t *, int); +} xdemitcb_t; + +typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv); + +typedef int (*xdl_emit_hunk_consume_func_t)(long start_a, long count_a, + long start_b, long count_b, + void *cb_data); + +typedef struct s_xdemitconf { + long ctxlen; + long interhunkctxlen; + unsigned long flags; + find_func_t find_func; + void *find_func_priv; + xdl_emit_hunk_consume_func_t hunk_func; +} xdemitconf_t; + +typedef struct s_bdiffparam { + long bsize; +} bdiffparam_t; + + +#define xdl_malloc(x) malloc(x) +#define xdl_free(ptr) free(ptr) +#define xdl_realloc(ptr,x) realloc(ptr,x) + +void *xdl_mmfile_first(mmfile_t *mmf, long *size); +long xdl_mmfile_size(mmfile_t *mmf); + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb); + +typedef struct s_xmparam { + xpparam_t xpp; + int marker_size; + int level; + int favor; + int style; + const char *ancestor; /* label for orig */ + const char *file1; /* label for mf1 */ + const char *file2; /* label for mf2 */ +} xmparam_t; + +#define DEFAULT_CONFLICT_MARKER_SIZE 7 + +int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, + xmparam_t const *xmp, mmbuffer_t *result); + +#ifdef __cplusplus +} +#endif /* #ifdef __cplusplus */ + +#endif /* #if !defined(XDIFF_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xdiffi.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,1050 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + + +#define XDL_MAX_COST_MIN 256 +#define XDL_HEUR_MIN_COST 256 +#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) +#define XDL_SNAKE_CNT 20 +#define XDL_K_HEUR 4 + + + +typedef struct s_xdpsplit { + long i1, i2; + int min_lo, min_hi; +} xdpsplit_t; + + + + +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv); +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); + + + + + +/* + * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. + * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both + * the forward diagonal starting from (off1, off2) and the backward diagonal + * starting from (lim1, lim2). If the K values on the same diagonal crosses + * returns the furthest point of reach. We might end up having to expensive + * cases using this algorithm is full, so a little bit of heuristic is needed + * to cut the search and to return a suboptimal point. + */ +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv) { + long dmin = off1 - lim2, dmax = lim1 - off2; + long fmid = off1 - off2, bmid = lim1 - lim2; + long odd = (fmid - bmid) & 1; + long fmin = fmid, fmax = fmid; + long bmin = bmid, bmax = bmid; + long ec, d, i1, i2, prev1, best, dd, v, k; + + /* + * Set initial diagonal values for both forward and backward path. + */ + kvdf[fmid] = off1; + kvdb[bmid] = lim1; + + for (ec = 1;; ec++) { + int got_snake = 0; + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the external K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (fmin > dmin) + kvdf[--fmin - 1] = -1; + else + ++fmin; + if (fmax < dmax) + kvdf[++fmax + 1] = -1; + else + --fmax; + + for (d = fmax; d >= fmin; d -= 2) { + if (kvdf[d - 1] >= kvdf[d + 1]) + i1 = kvdf[d - 1] + 1; + else + i1 = kvdf[d + 1]; + prev1 = i1; + i2 = i1 - d; + for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); + if (i1 - prev1 > xenv->snake_cnt) + got_snake = 1; + kvdf[d] = i1; + if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the external K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (bmin > dmin) + kvdb[--bmin - 1] = XDL_LINE_MAX; + else + ++bmin; + if (bmax < dmax) + kvdb[++bmax + 1] = XDL_LINE_MAX; + else + --bmax; + + for (d = bmax; d >= bmin; d -= 2) { + if (kvdb[d - 1] < kvdb[d + 1]) + i1 = kvdb[d - 1]; + else + i1 = kvdb[d + 1] - 1; + prev1 = i1; + i2 = i1 - d; + for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); + if (prev1 - i1 > xenv->snake_cnt) + got_snake = 1; + kvdb[d] = i1; + if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + if (need_min) + continue; + + /* + * If the edit cost is above the heuristic trigger and if + * we got a good snake, we sample current diagonals to see + * if some of the, have reached an "interesting" path. Our + * measure is a function of the distance from the diagonal + * corner (i1 + i2) penalized with the distance from the + * mid diagonal itself. If this value is above the current + * edit cost times a magic factor (XDL_K_HEUR) we consider + * it interesting. + */ + if (got_snake && ec > xenv->heur_min) { + for (best = 0, d = fmax; d >= fmin; d -= 2) { + dd = d > fmid ? d - fmid: fmid - d; + i1 = kvdf[d]; + i2 = i1 - d; + v = (i1 - off1) + (i2 - off2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 + xenv->snake_cnt <= i1 && i1 < lim1 && + off2 + xenv->snake_cnt <= i2 && i2 < lim2) { + for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) + if (k == xenv->snake_cnt) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 1; + spl->min_hi = 0; + return ec; + } + + for (best = 0, d = bmax; d >= bmin; d -= 2) { + dd = d > bmid ? d - bmid: bmid - d; + i1 = kvdb[d]; + i2 = i1 - d; + v = (lim1 - i1) + (lim2 - i2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 < i1 && i1 <= lim1 - xenv->snake_cnt && + off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { + for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) + if (k == xenv->snake_cnt - 1) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 0; + spl->min_hi = 1; + return ec; + } + } + + /* + * Enough is enough. We spent too much time here and now we collect + * the furthest reaching path using the (i1 + i2) measure. + */ + if (ec >= xenv->mxcost) { + long fbest, fbest1, bbest, bbest1; + + fbest = fbest1 = -1; + for (d = fmax; d >= fmin; d -= 2) { + i1 = XDL_MIN(kvdf[d], lim1); + i2 = i1 - d; + if (lim2 < i2) + i1 = lim2 + d, i2 = lim2; + if (fbest < i1 + i2) { + fbest = i1 + i2; + fbest1 = i1; + } + } + + bbest = bbest1 = XDL_LINE_MAX; + for (d = bmax; d >= bmin; d -= 2) { + i1 = XDL_MAX(off1, kvdb[d]); + i2 = i1 - d; + if (i2 < off2) + i1 = off2 + d, i2 = off2; + if (i1 + i2 < bbest) { + bbest = i1 + i2; + bbest1 = i1; + } + } + + if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { + spl->i1 = fbest1; + spl->i2 = fbest - fbest1; + spl->min_lo = 1; + spl->min_hi = 0; + } else { + spl->i1 = bbest1; + spl->i2 = bbest - bbest1; + spl->min_lo = 0; + spl->min_hi = 1; + } + return ec; + } + } +} + + +/* + * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling + * the box splitting function. Note that the real job (marking changed lines) + * is done in the two boundary reaching checks. + */ +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { + unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; + + /* + * Shrink the box by walking through each diagonal snake (SW and NE). + */ + for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); + for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); + + /* + * If one dimension is empty, then all records on the other one must + * be obviously changed. + */ + if (off1 == lim1) { + char *rchg2 = dd2->rchg; + long *rindex2 = dd2->rindex; + + for (; off2 < lim2; off2++) + rchg2[rindex2[off2]] = 1; + } else if (off2 == lim2) { + char *rchg1 = dd1->rchg; + long *rindex1 = dd1->rindex; + + for (; off1 < lim1; off1++) + rchg1[rindex1[off1]] = 1; + } else { + xdpsplit_t spl; + spl.i1 = spl.i2 = 0; + + /* + * Divide ... + */ + if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, + need_min, &spl, xenv) < 0) { + + return -1; + } + + /* + * ... et Impera. + */ + if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, + kvdf, kvdb, spl.min_lo, xenv) < 0 || + xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, + kvdf, kvdb, spl.min_hi, xenv) < 0) { + + return -1; + } + } + + return 0; +} + + +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long ndiags; + long *kvd, *kvdf, *kvdb; + xdalgoenv_t xenv; + diffdata_t dd1, dd2; + + if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF) + return xdl_do_patience_diff(mf1, mf2, xpp, xe); + + if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) + return xdl_do_histogram_diff(mf1, mf2, xpp, xe); + + if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { + + return -1; + } + + /* + * Allocate and setup K vectors to be used by the differential algorithm. + * One is to store the forward path and one to store the backward path. + */ + ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; + if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { + + xdl_free_env(xe); + return -1; + } + kvdf = kvd; + kvdb = kvdf + ndiags; + kvdf += xe->xdf2.nreff + 1; + kvdb += xe->xdf2.nreff + 1; + + xenv.mxcost = xdl_bogosqrt(ndiags); + if (xenv.mxcost < XDL_MAX_COST_MIN) + xenv.mxcost = XDL_MAX_COST_MIN; + xenv.snake_cnt = XDL_SNAKE_CNT; + xenv.heur_min = XDL_HEUR_MIN_COST; + + dd1.nrec = xe->xdf1.nreff; + dd1.ha = xe->xdf1.ha; + dd1.rchg = xe->xdf1.rchg; + dd1.rindex = xe->xdf1.rindex; + dd2.nrec = xe->xdf2.nreff; + dd2.ha = xe->xdf2.ha; + dd2.rchg = xe->xdf2.rchg; + dd2.rindex = xe->xdf2.rindex; + + if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, + kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { + + xdl_free(kvd); + xdl_free_env(xe); + return -1; + } + + xdl_free(kvd); + + return 0; +} + + +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) { + xdchange_t *xch; + + if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) + return NULL; + + xch->next = xscr; + xch->i1 = i1; + xch->i2 = i2; + xch->chg1 = chg1; + xch->chg2 = chg2; + xch->ignore = 0; + + return xch; +} + + +static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) +{ + return (rec1->ha == rec2->ha && + xdl_recmatch(rec1->ptr, rec1->size, + rec2->ptr, rec2->size, + flags)); +} + +/* + * If a line is indented more than this, get_indent() just returns this value. + * This avoids having to do absurd amounts of work for data that are not + * human-readable text, and also ensures that the output of get_indent fits within + * an int. + */ +#define MAX_INDENT 200 + +/* + * Return the amount of indentation of the specified line, treating TAB as 8 + * columns. Return -1 if line is empty or contains only whitespace. Clamp the + * output value at MAX_INDENT. + */ +static int get_indent(xrecord_t *rec) +{ + long i; + int ret = 0; + + for (i = 0; i < rec->size; i++) { + char c = rec->ptr[i]; + + if (!XDL_ISSPACE(c)) + return ret; + else if (c == ' ') + ret += 1; + else if (c == '\t') + ret += 8 - ret % 8; + /* ignore other whitespace characters */ + + if (ret >= MAX_INDENT) + return MAX_INDENT; + } + + /* The line contains only whitespace. */ + return -1; +} + +/* + * If more than this number of consecutive blank rows are found, just return this + * value. This avoids requiring O(N^2) work for pathological cases, and also + * ensures that the output of score_split fits in an int. + */ +#define MAX_BLANKS 20 + +/* Characteristics measured about a hypothetical split position. */ +struct split_measurement { + /* + * Is the split at the end of the file (aside from any blank lines)? + */ + int end_of_file; + + /* + * How much is the line immediately following the split indented (or -1 if + * the line is blank): + */ + int indent; + + /* + * How many consecutive lines above the split are blank? + */ + int pre_blank; + + /* + * How much is the nearest non-blank line above the split indented (or -1 + * if there is no such line)? + */ + int pre_indent; + + /* + * How many lines after the line following the split are blank? + */ + int post_blank; + + /* + * How much is the nearest non-blank line after the line following the + * split indented (or -1 if there is no such line)? + */ + int post_indent; +}; + +struct split_score { + /* The effective indent of this split (smaller is preferred). */ + int effective_indent; + + /* Penalty for this split (smaller is preferred). */ + int penalty; +}; + +/* + * Fill m with information about a hypothetical split of xdf above line split. + */ +static void measure_split(const xdfile_t *xdf, long split, + struct split_measurement *m) +{ + long i; + + if (split >= xdf->nrec) { + m->end_of_file = 1; + m->indent = -1; + } else { + m->end_of_file = 0; + m->indent = get_indent(xdf->recs[split]); + } + + m->pre_blank = 0; + m->pre_indent = -1; + for (i = split - 1; i >= 0; i--) { + m->pre_indent = get_indent(xdf->recs[i]); + if (m->pre_indent != -1) + break; + m->pre_blank += 1; + if (m->pre_blank == MAX_BLANKS) { + m->pre_indent = 0; + break; + } + } + + m->post_blank = 0; + m->post_indent = -1; + for (i = split + 1; i < xdf->nrec; i++) { + m->post_indent = get_indent(xdf->recs[i]); + if (m->post_indent != -1) + break; + m->post_blank += 1; + if (m->post_blank == MAX_BLANKS) { + m->post_indent = 0; + break; + } + } +} + +/* + * The empirically-determined weight factors used by score_split() below. + * Larger values means that the position is a less favorable place to split. + * + * Note that scores are only ever compared against each other, so multiplying + * all of these weight/penalty values by the same factor wouldn't change the + * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*. + * In practice, these numbers are chosen to be large enough that they can be + * adjusted relative to each other with sufficient precision despite using + * integer math. + */ + +/* Penalty if there are no non-blank lines before the split */ +#define START_OF_FILE_PENALTY 1 + +/* Penalty if there are no non-blank lines after the split */ +#define END_OF_FILE_PENALTY 21 + +/* Multiplier for the number of blank lines around the split */ +#define TOTAL_BLANK_WEIGHT (-30) + +/* Multiplier for the number of blank lines after the split */ +#define POST_BLANK_WEIGHT 6 + +/* + * Penalties applied if the line is indented more than its predecessor + */ +#define RELATIVE_INDENT_PENALTY (-4) +#define RELATIVE_INDENT_WITH_BLANK_PENALTY 10 + +/* + * Penalties applied if the line is indented less than both its predecessor and + * its successor + */ +#define RELATIVE_OUTDENT_PENALTY 24 +#define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17 + +/* + * Penalties applied if the line is indented less than its predecessor but not + * less than its successor + */ +#define RELATIVE_DEDENT_PENALTY 23 +#define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17 + +/* + * We only consider whether the sum of the effective indents for splits are + * less than (-1), equal to (0), or greater than (+1) each other. The resulting + * value is multiplied by the following weight and combined with the penalty to + * determine the better of two scores. + */ +#define INDENT_WEIGHT 60 + +/* + * Compute a badness score for the hypothetical split whose measurements are + * stored in m. The weight factors were determined empirically using the tools and + * corpus described in + * + * https://github.com/mhagger/diff-slider-tools + * + * Also see that project if you want to improve the weights based on, for example, + * a larger or more diverse corpus. + */ +static void score_add_split(const struct split_measurement *m, struct split_score *s) +{ + /* + * A place to accumulate penalty factors (positive makes this index more + * favored): + */ + int post_blank, total_blank, indent, any_blanks; + + if (m->pre_indent == -1 && m->pre_blank == 0) + s->penalty += START_OF_FILE_PENALTY; + + if (m->end_of_file) + s->penalty += END_OF_FILE_PENALTY; + + /* + * Set post_blank to the number of blank lines following the split, + * including the line immediately after the split: + */ + post_blank = (m->indent == -1) ? 1 + m->post_blank : 0; + total_blank = m->pre_blank + post_blank; + + /* Penalties based on nearby blank lines: */ + s->penalty += TOTAL_BLANK_WEIGHT * total_blank; + s->penalty += POST_BLANK_WEIGHT * post_blank; + + if (m->indent != -1) + indent = m->indent; + else + indent = m->post_indent; + + any_blanks = (total_blank != 0); + + /* Note that the effective indent is -1 at the end of the file: */ + s->effective_indent += indent; + + if (indent == -1) { + /* No additional adjustments needed. */ + } else if (m->pre_indent == -1) { + /* No additional adjustments needed. */ + } else if (indent > m->pre_indent) { + /* + * The line is indented more than its predecessor. + */ + s->penalty += any_blanks ? + RELATIVE_INDENT_WITH_BLANK_PENALTY : + RELATIVE_INDENT_PENALTY; + } else if (indent == m->pre_indent) { + /* + * The line has the same indentation level as its predecessor. + * No additional adjustments needed. + */ + } else { + /* + * The line is indented less than its predecessor. It could be + * the block terminator of the previous block, but it could + * also be the start of a new block (e.g., an "else" block, or + * maybe the previous block didn't have a block terminator). + * Try to distinguish those cases based on what comes next: + */ + if (m->post_indent != -1 && m->post_indent > indent) { + /* + * The following line is indented more. So it is likely + * that this line is the start of a block. + */ + s->penalty += any_blanks ? + RELATIVE_OUTDENT_WITH_BLANK_PENALTY : + RELATIVE_OUTDENT_PENALTY; + } else { + /* + * That was probably the end of a block. + */ + s->penalty += any_blanks ? + RELATIVE_DEDENT_WITH_BLANK_PENALTY : + RELATIVE_DEDENT_PENALTY; + } + } +} + +static int score_cmp(struct split_score *s1, struct split_score *s2) +{ + /* -1 if s1.effective_indent < s2->effective_indent, etc. */ + int cmp_indents = ((s1->effective_indent > s2->effective_indent) - + (s1->effective_indent < s2->effective_indent)); + + return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty); +} + +/* + * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group + * of lines that was inserted or deleted from the corresponding version of the + * file). We consider there to be such a group at the beginning of the file, at + * the end of the file, and between any two unchanged lines, though most such + * groups will usually be empty. + * + * If the first line in a group is equal to the line following the group, then + * the group can be slid down. Similarly, if the last line in a group is equal + * to the line preceding the group, then the group can be slid up. See + * group_slide_down() and group_slide_up(). + * + * Note that loops that are testing for changed lines in xdf->rchg do not need + * index bounding since the array is prepared with a zero at position -1 and N. + */ +struct xdlgroup { + /* + * The index of the first changed line in the group, or the index of + * the unchanged line above which the (empty) group is located. + */ + long start; + + /* + * The index of the first unchanged line after the group. For an empty + * group, end is equal to start. + */ + long end; +}; + +/* + * Initialize g to point at the first group in xdf. + */ +static void group_init(xdfile_t *xdf, struct xdlgroup *g) +{ + g->start = g->end = 0; + while (xdf->rchg[g->end]) + g->end++; +} + +/* + * Move g to describe the next (possibly empty) group in xdf and return 0. If g + * is already at the end of the file, do nothing and return -1. + */ +static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) +{ + if (g->end == xdf->nrec) + return -1; + + g->start = g->end + 1; + for (g->end = g->start; xdf->rchg[g->end]; g->end++) + ; + + return 0; +} + +/* + * Move g to describe the previous (possibly empty) group in xdf and return 0. + * If g is already at the beginning of the file, do nothing and return -1. + */ +static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) +{ + if (g->start == 0) + return -1; + + g->end = g->start - 1; + for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--) + ; + + return 0; +} + +/* + * If g can be slid toward the end of the file, do so, and if it bumps into a + * following group, expand this group to include it. Return 0 on success or -1 + * if g cannot be slid down. + */ +static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags) +{ + if (g->end < xdf->nrec && + recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { + xdf->rchg[g->start++] = 0; + xdf->rchg[g->end++] = 1; + + while (xdf->rchg[g->end]) + g->end++; + + return 0; + } else { + return -1; + } +} + +/* + * If g can be slid toward the beginning of the file, do so, and if it bumps + * into a previous group, expand this group to include it. Return 0 on success + * or -1 if g cannot be slid up. + */ +static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags) +{ + if (g->start > 0 && + recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { + xdf->rchg[--g->start] = 1; + xdf->rchg[--g->end] = 0; + + while (xdf->rchg[g->start - 1]) + g->start--; + + return 0; + } else { + return -1; + } +} + +static void xdl_bug(const char *msg) +{ + fprintf(stderr, "BUG: %s\n", msg); + exit(1); +} + +/* + * Move back and forward change groups for a consistent and pretty diff output. + * This also helps in finding joinable change groups and reducing the diff + * size. + */ +int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { + struct xdlgroup g, go; + long earliest_end, end_matching_other; + long groupsize; + + group_init(xdf, &g); + group_init(xdfo, &go); + + while (1) { + /* If the group is empty in the to-be-compacted file, skip it: */ + if (g.end == g.start) + goto next; + + /* + * Now shift the change up and then down as far as possible in + * each direction. If it bumps into any other changes, merge them. + */ + do { + groupsize = g.end - g.start; + + /* + * Keep track of the last "end" index that causes this + * group to align with a group of changed lines in the + * other file. -1 indicates that we haven't found such + * a match yet: + */ + end_matching_other = -1; + + /* Shift the group backward as much as possible: */ + while (!group_slide_up(xdf, &g, flags)) + if (group_previous(xdfo, &go)) + xdl_bug("group sync broken sliding up"); + + /* + * This is this highest that this group can be shifted. + * Record its end index: + */ + earliest_end = g.end; + + if (go.end > go.start) + end_matching_other = g.end; + + /* Now shift the group forward as far as possible: */ + while (1) { + if (group_slide_down(xdf, &g, flags)) + break; + if (group_next(xdfo, &go)) + xdl_bug("group sync broken sliding down"); + + if (go.end > go.start) + end_matching_other = g.end; + } + } while (groupsize != g.end - g.start); + + /* + * If the group can be shifted, then we can possibly use this + * freedom to produce a more intuitive diff. + * + * The group is currently shifted as far down as possible, so the + * heuristics below only have to handle upwards shifts. + */ + + if (g.end == earliest_end) { + /* no shifting was possible */ + } else if (end_matching_other != -1) { + /* + * Move the possibly merged group of changes back to line + * up with the last group of changes from the other file + * that it can align with. + */ + while (go.end == go.start) { + if (group_slide_up(xdf, &g, flags)) + xdl_bug("match disappeared"); + if (group_previous(xdfo, &go)) + xdl_bug("group sync broken sliding to match"); + } + } else if (flags & XDF_INDENT_HEURISTIC) { + /* + * Indent heuristic: a group of pure add/delete lines + * implies two splits, one between the end of the "before" + * context and the start of the group, and another between + * the end of the group and the beginning of the "after" + * context. Some splits are aesthetically better and some + * are worse. We compute a badness "score" for each split, + * and add the scores for the two splits to define a + * "score" for each position that the group can be shifted + * to. Then we pick the shift with the lowest score. + */ + long shift, best_shift = -1; + struct split_score best_score; + + for (shift = earliest_end; shift <= g.end; shift++) { + struct split_measurement m; + struct split_score score = {0, 0}; + + measure_split(xdf, shift, &m); + score_add_split(&m, &score); + measure_split(xdf, shift - groupsize, &m); + score_add_split(&m, &score); + if (best_shift == -1 || + score_cmp(&score, &best_score) <= 0) { + best_score.effective_indent = score.effective_indent; + best_score.penalty = score.penalty; + best_shift = shift; + } + } + + while (g.end > best_shift) { + if (group_slide_up(xdf, &g, flags)) + xdl_bug("best shift unreached"); + if (group_previous(xdfo, &go)) + xdl_bug("group sync broken sliding to blank line"); + } + } + + next: + /* Move past the just-processed group: */ + if (group_next(xdf, &g)) + break; + if (group_next(xdfo, &go)) + xdl_bug("group sync broken moving to next group"); + } + + if (!group_next(xdfo, &go)) + xdl_bug("group sync broken at end of file"); + + return 0; +} + + +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { + xdchange_t *cscr = NULL, *xch; + char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; + long i1, i2, l1, l2; + + /* + * Trivial. Collects "groups" of changes and creates an edit script. + */ + for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) + if (rchg1[i1 - 1] || rchg2[i2 - 1]) { + for (l1 = i1; rchg1[i1 - 1]; i1--); + for (l2 = i2; rchg2[i2 - 1]; i2--); + + if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { + xdl_free_script(cscr); + return -1; + } + cscr = xch; + } + + *xscr = cscr; + + return 0; +} + + +void xdl_free_script(xdchange_t *xscr) { + xdchange_t *xch; + + while ((xch = xscr) != NULL) { + xscr = xscr->next; + xdl_free(xch); + } +} + +static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg) +{ + xdchange_t *xch, *xche; + + for (xch = xscr; xch; xch = xche->next) { + xche = xdl_get_hunk(&xch, xecfg); + if (!xch) + break; + if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1, + xch->i2, xche->i2 + xche->chg2 - xch->i2, + ecb->priv) < 0) + return -1; + } + return 0; +} + +static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags) +{ + xdchange_t *xch; + + for (xch = xscr; xch; xch = xch->next) { + int ignore = 1; + xrecord_t **rec; + long i; + + rec = &xe->xdf1.recs[xch->i1]; + for (i = 0; i < xch->chg1 && ignore; i++) + ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); + + rec = &xe->xdf2.recs[xch->i2]; + for (i = 0; i < xch->chg2 && ignore; i++) + ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); + + xch->ignore = ignore; + } +} + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb) { + xdchange_t *xscr; + xdfenv_t xe; + emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff; + + if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) { + + return -1; + } + if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || + xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 || + xdl_build_script(&xe, &xscr) < 0) { + + xdl_free_env(&xe); + return -1; + } + if (xscr) { + if (xpp->flags & XDF_IGNORE_BLANK_LINES) + xdl_mark_ignorable(xscr, &xe, xpp->flags); + + if (ef(&xe, xscr, ecb, xecfg) < 0) { + + xdl_free_script(xscr); + xdl_free_env(&xe); + return -1; + } + xdl_free_script(xscr); + } + xdl_free_env(&xe); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xdiffi.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,64 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XDIFFI_H) +#define XDIFFI_H + + +typedef struct s_diffdata { + long nrec; + unsigned long const *ha; + long *rindex; + char *rchg; +} diffdata_t; + +typedef struct s_xdalgoenv { + long mxcost; + long snake_cnt; + long heur_min; +} xdalgoenv_t; + +typedef struct s_xdchange { + struct s_xdchange *next; + long i1, i2; + long chg1, chg2; + int ignore; +} xdchange_t; + + + +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags); +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); +void xdl_free_script(xdchange_t *xscr); +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); +int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *env); +int xdl_do_histogram_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *env); + +#endif /* #if !defined(XDIFFI_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xemit.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,312 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + +static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { + + *rec = xdf->recs[ri]->ptr; + + return xdf->recs[ri]->size; +} + + +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { + long size, psize = strlen(pre); + char const *rec; + + size = xdl_get_rec(xdf, ri, &rec); + if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { + + return -1; + } + + return 0; +} + + +/* + * Starting at the passed change atom, find the latest change atom to be included + * inside the differential hunk according to the specified configuration. + * Also advance xscr if the first changes must be discarded. + */ +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) +{ + xdchange_t *xch, *xchp, *lxch; + long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen; + long max_ignorable = xecfg->ctxlen; + unsigned long ignored = 0; /* number of ignored blank lines */ + + /* remove ignorable changes that are too far before other changes */ + for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { + xch = xchp->next; + + if (xch == NULL || + xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable) + *xscr = xch; + } + + if (*xscr == NULL) + return NULL; + + lxch = *xscr; + + for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { + long distance = xch->i1 - (xchp->i1 + xchp->chg1); + if (distance > max_common) + break; + + if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) { + lxch = xch; + ignored = 0; + } else if (distance < max_ignorable && xch->ignore) { + ignored += xch->chg2; + } else if (lxch != xchp && + xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) { + break; + } else if (!xch->ignore) { + lxch = xch; + ignored = 0; + } else { + ignored += xch->chg2; + } + } + + return lxch; +} + + +static long def_ff(const char *rec, long len, char *buf, long sz, void *priv) +{ + if (len > 0 && + (isalpha((unsigned char)*rec) || /* identifier? */ + *rec == '_' || /* also identifier? */ + *rec == '$')) { /* identifiers from VMS and other esoterico */ + if (len > sz) + len = sz; + while (0 < len && isspace((unsigned char)rec[len - 1])) + len--; + memcpy(buf, rec, len); + return len; + } + return -1; +} + +static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, + char *buf, long sz) +{ + const char *rec; + long len = xdl_get_rec(xdf, ri, &rec); + if (!xecfg->find_func) + return def_ff(rec, len, buf, sz, xecfg->find_func_priv); + return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv); +} + +static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) +{ + char dummy[1]; + return match_func_rec(xdf, xecfg, ri, dummy, sizeof(dummy)) >= 0; +} + +struct func_line { + long len; + char buf[80]; +}; + +static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, + struct func_line *func_line, long start, long limit) +{ + long l, size, step = (start > limit) ? -1 : 1; + char *buf, dummy[1]; + + buf = func_line ? func_line->buf : dummy; + size = func_line ? sizeof(func_line->buf) : sizeof(dummy); + + for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) { + long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size); + if (len >= 0) { + if (func_line) + func_line->len = len; + return l; + } + } + return -1; +} + +static int is_empty_rec(xdfile_t *xdf, long ri) +{ + const char *rec; + long len = xdl_get_rec(xdf, ri, &rec); + + while (len > 0 && XDL_ISSPACE(*rec)) { + rec++; + len--; + } + return !len; +} + +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg) { + long s1, s2, e1, e2, lctx; + xdchange_t *xch, *xche; + long funclineprev = -1; + struct func_line func_line = { 0 }; + + for (xch = xscr; xch; xch = xche->next) { + xche = xdl_get_hunk(&xch, xecfg); + if (!xch) + break; + + s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); + s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); + + if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { + long fs1, i1 = xch->i1; + + /* Appended chunk? */ + if (i1 >= xe->xdf1.nrec) { + long i2 = xch->i2; + + /* + * We don't need additional context if + * a whole function was added. + */ + while (i2 < xe->xdf2.nrec) { + if (is_func_rec(&xe->xdf2, xecfg, i2)) + goto post_context_calculation; + i2++; + } + + /* + * Otherwise get more context from the + * pre-image. + */ + i1 = xe->xdf1.nrec - 1; + } + + fs1 = get_func_line(xe, xecfg, NULL, i1, -1); + while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) && + !is_func_rec(&xe->xdf1, xecfg, fs1 - 1)) + fs1--; + if (fs1 < 0) + fs1 = 0; + if (fs1 < s1) { + s2 -= s1 - fs1; + s1 = fs1; + } + } + + post_context_calculation: + lctx = xecfg->ctxlen; + lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); + lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); + + e1 = xche->i1 + xche->chg1 + lctx; + e2 = xche->i2 + xche->chg2 + lctx; + + if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { + long fe1 = get_func_line(xe, xecfg, NULL, + xche->i1 + xche->chg1, + xe->xdf1.nrec); + while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1)) + fe1--; + if (fe1 < 0) + fe1 = xe->xdf1.nrec; + if (fe1 > e1) { + e2 += fe1 - e1; + e1 = fe1; + } + + /* + * Overlap with next change? Then include it + * in the current hunk and start over to find + * its new end. + */ + if (xche->next) { + long l = XDL_MIN(xche->next->i1, + xe->xdf1.nrec - 1); + if (l - xecfg->ctxlen <= e1 || + get_func_line(xe, xecfg, NULL, l, e1) < 0) { + xche = xche->next; + goto post_context_calculation; + } + } + } + + /* + * Emit current hunk header. + */ + + if (xecfg->flags & XDL_EMIT_FUNCNAMES) { + get_func_line(xe, xecfg, &func_line, + s1 - 1, funclineprev); + funclineprev = s1 - 1; + } + if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, + func_line.buf, func_line.len, ecb) < 0) + return -1; + + /* + * Emit pre-context. + */ + for (; s2 < xch->i2; s2++) + if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) + return -1; + + for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { + /* + * Merge previous with current change atom. + */ + for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) + if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) + return -1; + + /* + * Removes lines from the first file. + */ + for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) + return -1; + + /* + * Adds lines from the second file. + */ + for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) + if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) + return -1; + + if (xch == xche) + break; + s1 = xch->i1 + xch->chg1; + s2 = xch->i2 + xch->chg2; + } + + /* + * Emit post-context. + */ + for (s2 = xche->i2 + xche->chg2; s2 < e2; s2++) + if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) + return -1; + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xemit.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,36 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XEMIT_H) +#define XEMIT_H + + +typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg); +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + + + +#endif /* #if !defined(XEMIT_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xhistogram.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,363 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in JGit's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "xinclude.h" +#include "xtypes.h" +#include "xdiff.h" + +#define MAX_PTR UINT_MAX +#define MAX_CNT UINT_MAX + +#define LINE_END(n) (line##n + count##n - 1) +#define LINE_END_PTR(n) (*line##n + *count##n - 1) + +struct histindex { + struct record { + unsigned int ptr, cnt; + struct record *next; + } **records, /* an occurrence */ + **line_map; /* map of line to record chain */ + chastore_t rcha; + unsigned int *next_ptrs; + unsigned int table_bits, + records_size, + line_map_size; + + unsigned int max_chain_length, + key_shift, + ptr_shift; + + unsigned int cnt, + has_common; + + xdfenv_t *env; + xpparam_t const *xpp; +}; + +struct region { + unsigned int begin1, end1; + unsigned int begin2, end2; +}; + +#define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) + +#define NEXT_PTR(index, ptr) \ + (index->next_ptrs[(ptr) - index->ptr_shift]) + +#define CNT(index, ptr) \ + ((LINE_MAP(index, ptr))->cnt) + +#define REC(env, s, l) \ + (env->xdf##s.recs[l - 1]) + +static int cmp_recs(xpparam_t const *xpp, + xrecord_t *r1, xrecord_t *r2) +{ + return r1->ha == r2->ha && + xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, + xpp->flags); +} + +#define CMP_ENV(xpp, env, s1, l1, s2, l2) \ + (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) + +#define CMP(i, s1, l1, s2, l2) \ + (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) + +#define TABLE_HASH(index, side, line) \ + XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) + +static int scanA(struct histindex *index, int line1, int count1) +{ + unsigned int ptr, tbl_idx; + unsigned int chain_len; + struct record **rec_chain, *rec; + + for (ptr = LINE_END(1); line1 <= ptr; ptr--) { + tbl_idx = TABLE_HASH(index, 1, ptr); + rec_chain = index->records + tbl_idx; + rec = *rec_chain; + + chain_len = 0; + while (rec) { + if (CMP(index, 1, rec->ptr, 1, ptr)) { + /* + * ptr is identical to another element. Insert + * it onto the front of the existing element + * chain. + */ + NEXT_PTR(index, ptr) = rec->ptr; + rec->ptr = ptr; + /* cap rec->cnt at MAX_CNT */ + rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); + LINE_MAP(index, ptr) = rec; + goto continue_scan; + } + + rec = rec->next; + chain_len++; + } + + if (chain_len == index->max_chain_length) + return -1; + + /* + * This is the first time we have ever seen this particular + * element in the sequence. Construct a new chain for it. + */ + if (!(rec = xdl_cha_alloc(&index->rcha))) + return -1; + rec->ptr = ptr; + rec->cnt = 1; + rec->next = *rec_chain; + *rec_chain = rec; + LINE_MAP(index, ptr) = rec; + +continue_scan: + ; /* no op */ + } + + return 0; +} + +static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, + int line1, int count1, int line2, int count2) +{ + unsigned int b_next = b_ptr + 1; + struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; + unsigned int as, ae, bs, be, np, rc; + int should_break; + + for (; rec; rec = rec->next) { + if (rec->cnt > index->cnt) { + if (!index->has_common) + index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); + continue; + } + + as = rec->ptr; + if (!CMP(index, 1, as, 2, b_ptr)) + continue; + + index->has_common = 1; + for (;;) { + should_break = 0; + np = NEXT_PTR(index, as); + bs = b_ptr; + ae = as; + be = bs; + rc = rec->cnt; + + while (line1 < as && line2 < bs + && CMP(index, 1, as - 1, 2, bs - 1)) { + as--; + bs--; + if (1 < rc) + rc = XDL_MIN(rc, CNT(index, as)); + } + while (ae < LINE_END(1) && be < LINE_END(2) + && CMP(index, 1, ae + 1, 2, be + 1)) { + ae++; + be++; + if (1 < rc) + rc = XDL_MIN(rc, CNT(index, ae)); + } + + if (b_next <= be) + b_next = be + 1; + if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { + lcs->begin1 = as; + lcs->begin2 = bs; + lcs->end1 = ae; + lcs->end2 = be; + index->cnt = rc; + } + + if (np == 0) + break; + + while (np <= ae) { + np = NEXT_PTR(index, np); + if (np == 0) { + should_break = 1; + break; + } + } + + if (should_break) + break; + + as = np; + } + } + return b_next; +} + +static int find_lcs(struct histindex *index, struct region *lcs, + int line1, int count1, int line2, int count2) { + int b_ptr; + + if (scanA(index, line1, count1)) + return -1; + + index->cnt = index->max_chain_length + 1; + + for (b_ptr = line2; b_ptr <= LINE_END(2); ) + b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); + + return index->has_common && index->max_chain_length < index->cnt; +} + +static int fall_back_to_classic_diff(struct histindex *index, + int line1, int count1, int line2, int count2) +{ + xpparam_t xpp; + xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; + + return xdl_fall_back_diff(index->env, &xpp, + line1, count1, line2, count2); +} + +static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct histindex index; + struct region lcs; + int sz; + int result = -1; + + if (count1 <= 0 && count2 <= 0) + return 0; + + if (LINE_END(1) >= MAX_PTR) + return -1; + + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + + memset(&index, 0, sizeof(index)); + + index.env = env; + index.xpp = xpp; + + index.records = NULL; + index.line_map = NULL; + /* in case of early xdl_cha_free() */ + index.rcha.head = NULL; + + index.table_bits = xdl_hashbits(count1); + sz = index.records_size = 1 << index.table_bits; + sz *= sizeof(struct record *); + if (!(index.records = (struct record **) xdl_malloc(sz))) + goto cleanup; + memset(index.records, 0, sz); + + sz = index.line_map_size = count1; + sz *= sizeof(struct record *); + if (!(index.line_map = (struct record **) xdl_malloc(sz))) + goto cleanup; + memset(index.line_map, 0, sz); + + sz = index.line_map_size; + sz *= sizeof(unsigned int); + if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) + goto cleanup; + memset(index.next_ptrs, 0, sz); + + /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ + if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) + goto cleanup; + + index.ptr_shift = line1; + index.max_chain_length = 64; + + memset(&lcs, 0, sizeof(lcs)); + if (find_lcs(&index, &lcs, line1, count1, line2, count2)) + result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); + else { + if (lcs.begin1 == 0 && lcs.begin2 == 0) { + while (count1--) + env->xdf1.rchg[line1++ - 1] = 1; + while (count2--) + env->xdf2.rchg[line2++ - 1] = 1; + result = 0; + } else { + result = histogram_diff(xpp, env, + line1, lcs.begin1 - line1, + line2, lcs.begin2 - line2); + if (result) + goto cleanup; + result = histogram_diff(xpp, env, + lcs.end1 + 1, LINE_END(1) - lcs.end1, + lcs.end2 + 1, LINE_END(2) - lcs.end2); + if (result) + goto cleanup; + } + } + +cleanup: + xdl_free(index.records); + xdl_free(index.line_map); + xdl_free(index.next_ptrs); + xdl_cha_free(&index.rcha); + + return result; +} + +int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env) +{ + if (xdl_prepare_env(file1, file2, xpp, env) < 0) + return -1; + + return histogram_diff(xpp, env, + env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, + env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xinclude.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,42 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XINCLUDE_H) +#define XINCLUDE_H + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <limits.h> + +#include "xmacros.h" +#include "xdiff.h" +#include "xtypes.h" +#include "xutils.h" +#include "xprepare.h" +#include "xdiffi.h" +#include "xemit.h" + + +#endif /* #if !defined(XINCLUDE_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xmacros.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,54 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XMACROS_H) +#define XMACROS_H + + + + +#define XDL_MIN(a, b) ((a) < (b) ? (a): (b)) +#define XDL_MAX(a, b) ((a) > (b) ? (a): (b)) +#define XDL_ABS(v) ((v) >= 0 ? (v): -(v)) +#define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define XDL_ISSPACE(c) (isspace((unsigned char)(c))) +#define XDL_ADDBITS(v,b) ((v) + ((v) >> (b))) +#define XDL_MASKBITS(b) ((1UL << (b)) - 1) +#define XDL_HASHLONG(v,b) (XDL_ADDBITS((unsigned long)(v), b) & XDL_MASKBITS(b)) +#define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) +#define XDL_LE32_PUT(p, v) \ +do { \ + unsigned char *__p = (unsigned char *) (p); \ + *__p++ = (unsigned char) (v); \ + *__p++ = (unsigned char) ((v) >> 8); \ + *__p++ = (unsigned char) ((v) >> 16); \ + *__p = (unsigned char) ((v) >> 24); \ +} while (0) +#define XDL_LE32_GET(p, v) \ +do { \ + unsigned char const *__p = (unsigned char const *) (p); \ + (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \ + ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \ +} while (0) + + +#endif /* #if !defined(XMACROS_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xmerge.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,686 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003-2006 Davide Libenzi, Johannes E. Schindelin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + +typedef struct s_xdmerge { + struct s_xdmerge *next; + /* + * 0 = conflict, + * 1 = no conflict, take first, + * 2 = no conflict, take second. + * 3 = no conflict, take both. + */ + int mode; + /* + * These point at the respective postimages. E.g. <i1,chg1> is + * how side #1 wants to change the common ancestor; if there is no + * overlap, lines before i1 in the postimage of side #1 appear + * in the merge result as a region touched by neither side. + */ + long i1, i2; + long chg1, chg2; + /* + * These point at the preimage; of course there is just one + * preimage, that is from the shared common ancestor. + */ + long i0; + long chg0; +} xdmerge_t; + +static int xdl_append_merge(xdmerge_t **merge, int mode, + long i0, long chg0, + long i1, long chg1, + long i2, long chg2) +{ + xdmerge_t *m = *merge; + if (m && (i1 <= m->i1 + m->chg1 || i2 <= m->i2 + m->chg2)) { + if (mode != m->mode) + m->mode = 0; + m->chg0 = i0 + chg0 - m->i0; + m->chg1 = i1 + chg1 - m->i1; + m->chg2 = i2 + chg2 - m->i2; + } else { + m = xdl_malloc(sizeof(xdmerge_t)); + if (!m) + return -1; + m->next = NULL; + m->mode = mode; + m->i0 = i0; + m->chg0 = chg0; + m->i1 = i1; + m->chg1 = chg1; + m->i2 = i2; + m->chg2 = chg2; + if (*merge) + (*merge)->next = m; + *merge = m; + } + return 0; +} + +static int xdl_cleanup_merge(xdmerge_t *c) +{ + int count = 0; + xdmerge_t *next_c; + + /* were there conflicts? */ + for (; c; c = next_c) { + if (c->mode == 0) + count++; + next_c = c->next; + free(c); + } + return count; +} + +static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, + int line_count, long flags) +{ + int i; + xrecord_t **rec1 = xe1->xdf2.recs + i1; + xrecord_t **rec2 = xe2->xdf2.recs + i2; + + for (i = 0; i < line_count; i++) { + int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size, + rec2[i]->ptr, rec2[i]->size, flags); + if (!result) + return -1; + } + return 0; +} + +static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) +{ + xrecord_t **recs; + int size = 0; + + recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; + + if (count < 1) + return 0; + + for (i = 0; i < count; size += recs[i++]->size) + if (dest) + memcpy(dest + size, recs[i]->ptr, recs[i]->size); + if (add_nl) { + i = recs[count - 1]->size; + if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { + if (needs_cr) { + if (dest) + dest[size] = '\r'; + size++; + } + + if (dest) + dest[size] = '\n'; + size++; + } + } + return size; +} + +static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) +{ + return xdl_recs_copy_0(0, xe, i, count, needs_cr, add_nl, dest); +} + +static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) +{ + return xdl_recs_copy_0(1, xe, i, count, needs_cr, add_nl, dest); +} + +/* + * Returns 1 if the i'th line ends in CR/LF (if it is the last line and + * has no eol, the preceding line, if any), 0 if it ends in LF-only, and + * -1 if the line ending cannot be determined. + */ +static int is_eol_crlf(xdfile_t *file, int i) +{ + long size; + + if (i < file->nrec - 1) + /* All lines before the last *must* end in LF */ + return (size = file->recs[i]->size) > 1 && + file->recs[i]->ptr[size - 2] == '\r'; + if (!file->nrec) + /* Cannot determine eol style from empty file */ + return -1; + if ((size = file->recs[i]->size) && + file->recs[i]->ptr[size - 1] == '\n') + /* Last line; ends in LF; Is it CR/LF? */ + return size > 1 && + file->recs[i]->ptr[size - 2] == '\r'; + if (!i) + /* The only line has no eol */ + return -1; + /* Determine eol from second-to-last line */ + return (size = file->recs[i - 1]->size) > 1 && + file->recs[i - 1]->ptr[size - 2] == '\r'; +} + +static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m) +{ + int needs_cr; + + /* Match post-images' preceding, or first, lines' end-of-line style */ + needs_cr = is_eol_crlf(&xe1->xdf2, m->i1 ? m->i1 - 1 : 0); + if (needs_cr) + needs_cr = is_eol_crlf(&xe2->xdf2, m->i2 ? m->i2 - 1 : 0); + /* Look at pre-image's first line, unless we already settled on LF */ + if (needs_cr) + needs_cr = is_eol_crlf(&xe1->xdf1, 0); + /* If still undecided, use LF-only */ + return needs_cr < 0 ? 0 : needs_cr; +} + +static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, + xdfenv_t *xe2, const char *name2, + const char *name3, + int size, int i, int style, + xdmerge_t *m, char *dest, int marker_size) +{ + int marker1_size = (name1 ? strlen(name1) + 1 : 0); + int marker2_size = (name2 ? strlen(name2) + 1 : 0); + int marker3_size = (name3 ? strlen(name3) + 1 : 0); + int needs_cr = is_cr_needed(xe1, xe2, m); + + if (marker_size <= 0) + marker_size = DEFAULT_CONFLICT_MARKER_SIZE; + + /* Before conflicting part */ + size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, + dest ? dest + size : NULL); + + if (!dest) { + size += marker_size + 1 + needs_cr + marker1_size; + } else { + memset(dest + size, '<', marker_size); + size += marker_size; + if (marker1_size) { + dest[size] = ' '; + memcpy(dest + size + 1, name1, marker1_size - 1); + size += marker1_size; + } + if (needs_cr) + dest[size++] = '\r'; + dest[size++] = '\n'; + } + + /* Postimage from side #1 */ + size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, 1, + dest ? dest + size : NULL); + + if (style == XDL_MERGE_DIFF3) { + /* Shared preimage */ + if (!dest) { + size += marker_size + 1 + needs_cr + marker3_size; + } else { + memset(dest + size, '|', marker_size); + size += marker_size; + if (marker3_size) { + dest[size] = ' '; + memcpy(dest + size + 1, name3, marker3_size - 1); + size += marker3_size; + } + if (needs_cr) + dest[size++] = '\r'; + dest[size++] = '\n'; + } + size += xdl_orig_copy(xe1, m->i0, m->chg0, needs_cr, 1, + dest ? dest + size : NULL); + } + + if (!dest) { + size += marker_size + 1 + needs_cr; + } else { + memset(dest + size, '=', marker_size); + size += marker_size; + if (needs_cr) + dest[size++] = '\r'; + dest[size++] = '\n'; + } + + /* Postimage from side #2 */ + size += xdl_recs_copy(xe2, m->i2, m->chg2, needs_cr, 1, + dest ? dest + size : NULL); + if (!dest) { + size += marker_size + 1 + needs_cr + marker2_size; + } else { + memset(dest + size, '>', marker_size); + size += marker_size; + if (marker2_size) { + dest[size] = ' '; + memcpy(dest + size + 1, name2, marker2_size - 1); + size += marker2_size; + } + if (needs_cr) + dest[size++] = '\r'; + dest[size++] = '\n'; + } + return size; +} + +static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, + xdfenv_t *xe2, const char *name2, + const char *ancestor_name, + int favor, + xdmerge_t *m, char *dest, int style, + int marker_size) +{ + int size, i; + + for (size = i = 0; m; m = m->next) { + if (favor && !m->mode) + m->mode = favor; + + if (m->mode == 0) + size = fill_conflict_hunk(xe1, name1, xe2, name2, + ancestor_name, + size, i, style, m, dest, + marker_size); + else if (m->mode & 3) { + /* Before conflicting part */ + size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, + dest ? dest + size : NULL); + /* Postimage from side #1 */ + if (m->mode & 1) { + int needs_cr = is_cr_needed(xe1, xe2, m); + + size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, (m->mode & 2), + dest ? dest + size : NULL); + } + /* Postimage from side #2 */ + if (m->mode & 2) + size += xdl_recs_copy(xe2, m->i2, m->chg2, 0, 0, + dest ? dest + size : NULL); + } else + continue; + i = m->i1 + m->chg1; + } + size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0, + dest ? dest + size : NULL); + return size; +} + +/* + * Sometimes, changes are not quite identical, but differ in only a few + * lines. Try hard to show only these few lines as conflicting. + */ +static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, + xpparam_t const *xpp) +{ + for (; m; m = m->next) { + mmfile_t t1, t2; + xdfenv_t xe; + xdchange_t *xscr, *x; + int i1 = m->i1, i2 = m->i2; + + /* let's handle just the conflicts */ + if (m->mode) + continue; + + /* no sense refining a conflict when one side is empty */ + if (m->chg1 == 0 || m->chg2 == 0) + continue; + + /* + * This probably does not work outside git, since + * we have a very simple mmfile structure. + */ + t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr; + t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr + + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr; + t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr; + t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr + + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr; + if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) + return -1; + if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || + xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 || + xdl_build_script(&xe, &xscr) < 0) { + xdl_free_env(&xe); + return -1; + } + if (!xscr) { + /* If this happens, the changes are identical. */ + xdl_free_env(&xe); + m->mode = 4; + continue; + } + x = xscr; + m->i1 = xscr->i1 + i1; + m->chg1 = xscr->chg1; + m->i2 = xscr->i2 + i2; + m->chg2 = xscr->chg2; + while (xscr->next) { + xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t)); + if (!m2) { + xdl_free_env(&xe); + xdl_free_script(x); + return -1; + } + xscr = xscr->next; + m2->next = m->next; + m->next = m2; + m = m2; + m->mode = 0; + m->i1 = xscr->i1 + i1; + m->chg1 = xscr->chg1; + m->i2 = xscr->i2 + i2; + m->chg2 = xscr->chg2; + } + xdl_free_env(&xe); + xdl_free_script(x); + } + return 0; +} + +static int line_contains_alnum(const char *ptr, long size) +{ + while (size--) + if (isalnum((unsigned char)*(ptr++))) + return 1; + return 0; +} + +static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) +{ + for (; chg; chg--, i++) + if (line_contains_alnum(xe->xdf2.recs[i]->ptr, + xe->xdf2.recs[i]->size)) + return 1; + return 0; +} + +/* + * This function merges m and m->next, marking everything between those hunks + * as conflicting, too. + */ +static void xdl_merge_two_conflicts(xdmerge_t *m) +{ + xdmerge_t *next_m = m->next; + m->chg1 = next_m->i1 + next_m->chg1 - m->i1; + m->chg2 = next_m->i2 + next_m->chg2 - m->i2; + m->next = next_m->next; + free(next_m); +} + +/* + * If there are less than 3 non-conflicting lines between conflicts, + * it appears simpler -- because it takes up less (or as many) lines -- + * if the lines are moved into the conflicts. + */ +static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m, + int simplify_if_no_alnum) +{ + int result = 0; + + if (!m) + return result; + for (;;) { + xdmerge_t *next_m = m->next; + int begin, end; + + if (!next_m) + return result; + + begin = m->i1 + m->chg1; + end = next_m->i1; + + if (m->mode != 0 || next_m->mode != 0 || + (end - begin > 3 && + (!simplify_if_no_alnum || + lines_contain_alnum(xe1, begin, end - begin)))) { + m = next_m; + } else { + result++; + xdl_merge_two_conflicts(m); + } + } +} + +/* + * level == 0: mark all overlapping changes as conflict + * level == 1: mark overlapping changes as conflict only if not identical + * level == 2: analyze non-identical changes for minimal conflict set + * level == 3: analyze non-identical changes for minimal conflict set, but + * treat hunks not containing any letter or number as conflicting + * + * returns < 0 on error, == 0 for no conflicts, else number of conflicts + */ +static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, + xdfenv_t *xe2, xdchange_t *xscr2, + xmparam_t const *xmp, mmbuffer_t *result) +{ + xdmerge_t *changes, *c; + xpparam_t const *xpp = &xmp->xpp; + const char *const ancestor_name = xmp->ancestor; + const char *const name1 = xmp->file1; + const char *const name2 = xmp->file2; + int i0, i1, i2, chg0, chg1, chg2; + int level = xmp->level; + int style = xmp->style; + int favor = xmp->favor; + + if (style == XDL_MERGE_DIFF3) { + /* + * "diff3 -m" output does not make sense for anything + * more aggressive than XDL_MERGE_EAGER. + */ + if (XDL_MERGE_EAGER < level) + level = XDL_MERGE_EAGER; + } + + c = changes = NULL; + + while (xscr1 && xscr2) { + if (!changes) + changes = c; + if (xscr1->i1 + xscr1->chg1 < xscr2->i1) { + i0 = xscr1->i1; + i1 = xscr1->i2; + i2 = xscr2->i2 - xscr2->i1 + xscr1->i1; + chg0 = xscr1->chg1; + chg1 = xscr1->chg2; + chg2 = xscr1->chg1; + if (xdl_append_merge(&c, 1, + i0, chg0, i1, chg1, i2, chg2)) { + xdl_cleanup_merge(changes); + return -1; + } + xscr1 = xscr1->next; + continue; + } + if (xscr2->i1 + xscr2->chg1 < xscr1->i1) { + i0 = xscr2->i1; + i1 = xscr1->i2 - xscr1->i1 + xscr2->i1; + i2 = xscr2->i2; + chg0 = xscr2->chg1; + chg1 = xscr2->chg1; + chg2 = xscr2->chg2; + if (xdl_append_merge(&c, 2, + i0, chg0, i1, chg1, i2, chg2)) { + xdl_cleanup_merge(changes); + return -1; + } + xscr2 = xscr2->next; + continue; + } + if (level == XDL_MERGE_MINIMAL || xscr1->i1 != xscr2->i1 || + xscr1->chg1 != xscr2->chg1 || + xscr1->chg2 != xscr2->chg2 || + xdl_merge_cmp_lines(xe1, xscr1->i2, + xe2, xscr2->i2, + xscr1->chg2, xpp->flags)) { + /* conflict */ + int off = xscr1->i1 - xscr2->i1; + int ffo = off + xscr1->chg1 - xscr2->chg1; + + i0 = xscr1->i1; + i1 = xscr1->i2; + i2 = xscr2->i2; + if (off > 0) { + i0 -= off; + i1 -= off; + } + else + i2 += off; + chg0 = xscr1->i1 + xscr1->chg1 - i0; + chg1 = xscr1->i2 + xscr1->chg2 - i1; + chg2 = xscr2->i2 + xscr2->chg2 - i2; + if (ffo < 0) { + chg0 -= ffo; + chg1 -= ffo; + } else + chg2 += ffo; + if (xdl_append_merge(&c, 0, + i0, chg0, i1, chg1, i2, chg2)) { + xdl_cleanup_merge(changes); + return -1; + } + } + + i1 = xscr1->i1 + xscr1->chg1; + i2 = xscr2->i1 + xscr2->chg1; + + if (i1 >= i2) + xscr2 = xscr2->next; + if (i2 >= i1) + xscr1 = xscr1->next; + } + while (xscr1) { + if (!changes) + changes = c; + i0 = xscr1->i1; + i1 = xscr1->i2; + i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec; + chg0 = xscr1->chg1; + chg1 = xscr1->chg2; + chg2 = xscr1->chg1; + if (xdl_append_merge(&c, 1, + i0, chg0, i1, chg1, i2, chg2)) { + xdl_cleanup_merge(changes); + return -1; + } + xscr1 = xscr1->next; + } + while (xscr2) { + if (!changes) + changes = c; + i0 = xscr2->i1; + i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec; + i2 = xscr2->i2; + chg0 = xscr2->chg1; + chg1 = xscr2->chg1; + chg2 = xscr2->chg2; + if (xdl_append_merge(&c, 2, + i0, chg0, i1, chg1, i2, chg2)) { + xdl_cleanup_merge(changes); + return -1; + } + xscr2 = xscr2->next; + } + if (!changes) + changes = c; + /* refine conflicts */ + if (XDL_MERGE_ZEALOUS <= level && + (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 || + xdl_simplify_non_conflicts(xe1, changes, + XDL_MERGE_ZEALOUS < level) < 0)) { + xdl_cleanup_merge(changes); + return -1; + } + /* output */ + if (result) { + int marker_size = xmp->marker_size; + int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2, + ancestor_name, + favor, changes, NULL, style, + marker_size); + result->ptr = xdl_malloc(size); + if (!result->ptr) { + xdl_cleanup_merge(changes); + return -1; + } + result->size = size; + xdl_fill_merge_buffer(xe1, name1, xe2, name2, + ancestor_name, favor, changes, + result->ptr, style, marker_size); + } + return xdl_cleanup_merge(changes); +} + +int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, + xmparam_t const *xmp, mmbuffer_t *result) +{ + xdchange_t *xscr1, *xscr2; + xdfenv_t xe1, xe2; + int status; + xpparam_t const *xpp = &xmp->xpp; + + result->ptr = NULL; + result->size = 0; + + if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0) { + return -1; + } + if (xdl_do_diff(orig, mf2, xpp, &xe2) < 0) { + xdl_free_env(&xe1); + return -1; + } + if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 || + xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 || + xdl_build_script(&xe1, &xscr1) < 0) { + xdl_free_env(&xe1); + return -1; + } + if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 || + xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 || + xdl_build_script(&xe2, &xscr2) < 0) { + xdl_free_script(xscr1); + xdl_free_env(&xe1); + xdl_free_env(&xe2); + return -1; + } + status = 0; + if (!xscr1) { + result->ptr = xdl_malloc(mf2->size); + memcpy(result->ptr, mf2->ptr, mf2->size); + result->size = mf2->size; + } else if (!xscr2) { + result->ptr = xdl_malloc(mf1->size); + memcpy(result->ptr, mf1->ptr, mf1->size); + result->size = mf1->size; + } else { + status = xdl_do_merge(&xe1, xscr1, + &xe2, xscr2, + xmp, result); + } + xdl_free_script(xscr1); + xdl_free_script(xscr2); + + xdl_free_env(&xe1); + xdl_free_env(&xe2); + + return status; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xpatience.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,390 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ +#include "xinclude.h" +#include "xtypes.h" +#include "xdiff.h" + +/* + * The basic idea of patience diff is to find lines that are unique in + * both files. These are intuitively the ones that we want to see as + * common lines. + * + * The maximal ordered sequence of such line pairs (where ordered means + * that the order in the sequence agrees with the order of the lines in + * both files) naturally defines an initial set of common lines. + * + * Now, the algorithm tries to extend the set of common lines by growing + * the line ranges where the files have identical lines. + * + * Between those common lines, the patience diff algorithm is applied + * recursively, until no unique line pairs can be found; these line ranges + * are handled by the well-known Myers algorithm. + */ + +#define NON_UNIQUE ULONG_MAX + +/* + * This is a hash mapping from line hash to line numbers in the first and + * second file. + */ +struct hashmap { + int nr, alloc; + struct entry { + unsigned long hash; + /* + * 0 = unused entry, 1 = first line, 2 = second, etc. + * line2 is NON_UNIQUE if the line is not unique + * in either the first or the second file. + */ + unsigned long line1, line2; + /* + * "next" & "previous" are used for the longest common + * sequence; + * initially, "next" reflects only the order in file1. + */ + struct entry *next, *previous; + + /* + * If 1, this entry can serve as an anchor. See + * Documentation/diff-options.txt for more information. + */ + unsigned anchor : 1; + } *entries, *first, *last; + /* were common records found? */ + unsigned long has_matches; + mmfile_t *file1, *file2; + xdfenv_t *env; + xpparam_t const *xpp; +}; + +static int is_anchor(xpparam_t const *xpp, const char *line) +{ + int i; + for (i = 0; i < xpp->anchors_nr; i++) { + if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i]))) + return 1; + } + return 0; +} + +/* The argument "pass" is 1 for the first file, 2 for the second. */ +static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, + int pass) +{ + xrecord_t **records = pass == 1 ? + map->env->xdf1.recs : map->env->xdf2.recs; + xrecord_t *record = records[line - 1], *other; + /* + * After xdl_prepare_env() (or more precisely, due to + * xdl_classify_record()), the "ha" member of the records (AKA lines) + * is _not_ the hash anymore, but a linearized version of it. In + * other words, the "ha" member is guaranteed to start with 0 and + * the second record's ha can only be 0 or 1, etc. + * + * So we multiply ha by 2 in the hope that the hashing was + * "unique enough". + */ + int index = (int)((record->ha << 1) % map->alloc); + + while (map->entries[index].line1) { + other = map->env->xdf1.recs[map->entries[index].line1 - 1]; + if (map->entries[index].hash != record->ha || + !xdl_recmatch(record->ptr, record->size, + other->ptr, other->size, + map->xpp->flags)) { + if (++index >= map->alloc) + index = 0; + continue; + } + if (pass == 2) + map->has_matches = 1; + if (pass == 1 || map->entries[index].line2) + map->entries[index].line2 = NON_UNIQUE; + else + map->entries[index].line2 = line; + return; + } + if (pass == 2) + return; + map->entries[index].line1 = line; + map->entries[index].hash = record->ha; + map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr); + if (!map->first) + map->first = map->entries + index; + if (map->last) { + map->last->next = map->entries + index; + map->entries[index].previous = map->last; + } + map->last = map->entries + index; + map->nr++; +} + +/* + * This function has to be called for each recursion into the inter-hunk + * parts, as previously non-unique lines can become unique when being + * restricted to a smaller part of the files. + * + * It is assumed that env has been prepared using xdl_prepare(). + */ +static int fill_hashmap(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + struct hashmap *result, + int line1, int count1, int line2, int count2) +{ + result->file1 = file1; + result->file2 = file2; + result->xpp = xpp; + result->env = env; + + /* We know exactly how large we want the hash map */ + result->alloc = count1 * 2; + result->entries = (struct entry *) + xdl_malloc(result->alloc * sizeof(struct entry)); + if (!result->entries) + return -1; + memset(result->entries, 0, result->alloc * sizeof(struct entry)); + + /* First, fill with entries from the first file */ + while (count1--) + insert_record(xpp, line1++, result, 1); + + /* Then search for matches in the second file */ + while (count2--) + insert_record(xpp, line2++, result, 2); + + return 0; +} + +/* + * Find the longest sequence with a smaller last element (meaning a smaller + * line2, as we construct the sequence with entries ordered by line1). + */ +static int binary_search(struct entry **sequence, int longest, + struct entry *entry) +{ + int left = -1, right = longest; + + while (left + 1 < right) { + int middle = left + (right - left) / 2; + /* by construction, no two entries can be equal */ + if (sequence[middle]->line2 > entry->line2) + right = middle; + else + left = middle; + } + /* return the index in "sequence", _not_ the sequence length */ + return left; +} + +/* + * The idea is to start with the list of common unique lines sorted by + * the order in file1. For each of these pairs, the longest (partial) + * sequence whose last element's line2 is smaller is determined. + * + * For efficiency, the sequences are kept in a list containing exactly one + * item per sequence length: the sequence with the smallest last + * element (in terms of line2). + */ +static struct entry *find_longest_common_sequence(struct hashmap *map) +{ + struct entry **sequence = xdl_malloc(map->nr * sizeof(struct entry *)); + int longest = 0, i; + struct entry *entry; + + /* + * If not -1, this entry in sequence must never be overridden. + * Therefore, overriding entries before this has no effect, so + * do not do that either. + */ + int anchor_i = -1; + + for (entry = map->first; entry; entry = entry->next) { + if (!entry->line2 || entry->line2 == NON_UNIQUE) + continue; + i = binary_search(sequence, longest, entry); + entry->previous = i < 0 ? NULL : sequence[i]; + ++i; + if (i <= anchor_i) + continue; + sequence[i] = entry; + if (entry->anchor) { + anchor_i = i; + longest = anchor_i + 1; + } else if (i == longest) { + longest++; + } + } + + /* No common unique lines were found */ + if (!longest) { + xdl_free(sequence); + return NULL; + } + + /* Iterate starting at the last element, adjusting the "next" members */ + entry = sequence[longest - 1]; + entry->next = NULL; + while (entry->previous) { + entry->previous->next = entry; + entry = entry->previous; + } + xdl_free(sequence); + return entry; +} + +static int match(struct hashmap *map, int line1, int line2) +{ + xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; + xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; + return xdl_recmatch(record1->ptr, record1->size, + record2->ptr, record2->size, map->xpp->flags); +} + +static int patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2); + +static int walk_common_sequence(struct hashmap *map, struct entry *first, + int line1, int count1, int line2, int count2) +{ + int end1 = line1 + count1, end2 = line2 + count2; + int next1, next2; + + for (;;) { + /* Try to grow the line ranges of common lines */ + if (first) { + next1 = first->line1; + next2 = first->line2; + while (next1 > line1 && next2 > line2 && + match(map, next1 - 1, next2 - 1)) { + next1--; + next2--; + } + } else { + next1 = end1; + next2 = end2; + } + while (line1 < next1 && line2 < next2 && + match(map, line1, line2)) { + line1++; + line2++; + } + + /* Recurse */ + if (next1 > line1 || next2 > line2) { + struct hashmap submap; + + memset(&submap, 0, sizeof(submap)); + if (patience_diff(map->file1, map->file2, + map->xpp, map->env, + line1, next1 - line1, + line2, next2 - line2)) + return -1; + } + + if (!first) + return 0; + + while (first->next && + first->next->line1 == first->line1 + 1 && + first->next->line2 == first->line2 + 1) + first = first->next; + + line1 = first->line1 + 1; + line2 = first->line2 + 1; + + first = first->next; + } +} + +static int fall_back_to_classic_diff(struct hashmap *map, + int line1, int count1, int line2, int count2) +{ + xpparam_t xpp; + xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; + + return xdl_fall_back_diff(map->env, &xpp, + line1, count1, line2, count2); +} + +/* + * Recursively find the longest common sequence of unique lines, + * and if none was found, ask xdl_do_diff() to do the job. + * + * This function assumes that env was prepared with xdl_prepare_env(). + */ +static int patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct hashmap map; + struct entry *first; + int result = 0; + + /* trivial case: one side is empty */ + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + + memset(&map, 0, sizeof(map)); + if (fill_hashmap(file1, file2, xpp, env, &map, + line1, count1, line2, count2)) + return -1; + + /* are there any matching lines at all? */ + if (!map.has_matches) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + xdl_free(map.entries); + return 0; + } + + first = find_longest_common_sequence(&map); + if (first) + result = walk_common_sequence(&map, first, + line1, count1, line2, count2); + else + result = fall_back_to_classic_diff(&map, + line1, count1, line2, count2); + + xdl_free(map.entries); + return result; +} + +int xdl_do_patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env) +{ + if (xdl_prepare_env(file1, file2, xpp, env) < 0) + return -1; + + /* environment is cleaned up in xdl_diff() */ + return patience_diff(file1, file2, xpp, env, + 1, env->xdf1.nrec, 1, env->xdf2.nrec); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xprepare.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,483 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + +#define XDL_KPDIS_RUN 4 +#define XDL_MAX_EQLIMIT 1024 +#define XDL_SIMSCAN_WINDOW 100 +#define XDL_GUESS_NLINES1 256 +#define XDL_GUESS_NLINES2 20 + + +typedef struct s_xdlclass { + struct s_xdlclass *next; + unsigned long ha; + char const *line; + long size; + long idx; + long len1, len2; +} xdlclass_t; + +typedef struct s_xdlclassifier { + unsigned int hbits; + long hsize; + xdlclass_t **rchash; + chastore_t ncha; + xdlclass_t **rcrecs; + long alloc; + long count; + long flags; +} xdlclassifier_t; + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); +static void xdl_free_classifier(xdlclassifier_t *cf); +static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, + unsigned int hbits, xrecord_t *rec); +static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf); +static void xdl_free_ctx(xdfile_t *xdf); +static int xdl_clean_mmatch(char const *dis, long i, long s, long e); +static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { + cf->flags = flags; + + cf->hbits = xdl_hashbits((unsigned int) size); + cf->hsize = 1 << cf->hbits; + + if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) { + + return -1; + } + if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) { + + xdl_cha_free(&cf->ncha); + return -1; + } + memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); + + cf->alloc = size; + if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) { + + xdl_free(cf->rchash); + xdl_cha_free(&cf->ncha); + return -1; + } + + cf->count = 0; + + return 0; +} + + +static void xdl_free_classifier(xdlclassifier_t *cf) { + + xdl_free(cf->rcrecs); + xdl_free(cf->rchash); + xdl_cha_free(&cf->ncha); +} + + +static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, + unsigned int hbits, xrecord_t *rec) { + long hi; + char const *line; + xdlclass_t *rcrec; + xdlclass_t **rcrecs; + + line = rec->ptr; + hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); + for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) + if (rcrec->ha == rec->ha && + xdl_recmatch(rcrec->line, rcrec->size, + rec->ptr, rec->size, cf->flags)) + break; + + if (!rcrec) { + if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { + + return -1; + } + rcrec->idx = cf->count++; + if (cf->count > cf->alloc) { + cf->alloc *= 2; + if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) { + + return -1; + } + cf->rcrecs = rcrecs; + } + cf->rcrecs[rcrec->idx] = rcrec; + rcrec->line = line; + rcrec->size = rec->size; + rcrec->ha = rec->ha; + rcrec->len1 = rcrec->len2 = 0; + rcrec->next = cf->rchash[hi]; + cf->rchash[hi] = rcrec; + } + + (pass == 1) ? rcrec->len1++ : rcrec->len2++; + + rec->ha = (unsigned long) rcrec->idx; + + hi = (long) XDL_HASHLONG(rec->ha, hbits); + rec->next = rhash[hi]; + rhash[hi] = rec; + + return 0; +} + + +static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf) { + unsigned int hbits; + long nrec, hsize, bsize; + unsigned long hav; + char const *blk, *cur, *top, *prev; + xrecord_t *crec; + xrecord_t **recs, **rrecs; + xrecord_t **rhash; + unsigned long *ha; + char *rchg; + long *rindex; + + ha = NULL; + rindex = NULL; + rchg = NULL; + rhash = NULL; + recs = NULL; + + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) + goto abort; + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) + goto abort; + + if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) + hbits = hsize = 0; + else { + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); + } + + nrec = 0; + if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { + for (top = blk + bsize; cur < top; ) { + prev = cur; + hav = xdl_hash_record(&cur, top, xpp->flags); + if (nrec >= narec) { + narec *= 2; + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) + goto abort; + recs = rrecs; + } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) + goto abort; + crec->ptr = prev; + crec->size = (long) (cur - prev); + crec->ha = hav; + recs[nrec++] = crec; + + if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && + xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) + goto abort; + } + } + + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) + goto abort; + memset(rchg, 0, (nrec + 2) * sizeof(char)); + + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) + goto abort; + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) + goto abort; + + xdf->nrec = nrec; + xdf->recs = recs; + xdf->hbits = hbits; + xdf->rhash = rhash; + xdf->rchg = rchg + 1; + xdf->rindex = rindex; + xdf->nreff = 0; + xdf->ha = ha; + xdf->dstart = 0; + xdf->dend = nrec - 1; + + return 0; + +abort: + xdl_free(ha); + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; +} + + +static void xdl_free_ctx(xdfile_t *xdf) { + + xdl_free(xdf->rhash); + xdl_free(xdf->rindex); + xdl_free(xdf->rchg - 1); + xdl_free(xdf->ha); + xdl_free(xdf->recs); + xdl_cha_free(&xdf->rcha); +} + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long enl1, enl2, sample; + xdlclassifier_t cf; + + memset(&cf, 0, sizeof(cf)); + + /* + * For histogram diff, we can afford a smaller sample size and + * thus a poorer estimate of the number of lines, as the hash + * table (rhash) won't be filled up/grown. The number of lines + * (nrecs) will be updated correctly anyway by + * xdl_prepare_ctx(). + */ + sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF + ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1); + + enl1 = xdl_guess_lines(mf1, sample) + 1; + enl2 = xdl_guess_lines(mf2, sample) + 1; + + if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF && + xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) + return -1; + + if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { + + xdl_free_classifier(&cf); + return -1; + } + if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf1); + xdl_free_classifier(&cf); + return -1; + } + + if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && + (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && + xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); + xdl_free_classifier(&cf); + return -1; + } + + if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) + xdl_free_classifier(&cf); + + return 0; +} + + +void xdl_free_env(xdfenv_t *xe) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); +} + + +static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { + long r, rdis0, rpdis0, rdis1, rpdis1; + + /* + * Limits the window the is examined during the similar-lines + * scan. The loops below stops when dis[i - r] == 1 (line that + * has no match), but there are corner cases where the loop + * proceed all the way to the extremities by causing huge + * performance penalties in case of big files. + */ + if (i - s > XDL_SIMSCAN_WINDOW) + s = i - XDL_SIMSCAN_WINDOW; + if (e - i > XDL_SIMSCAN_WINDOW) + e = i + XDL_SIMSCAN_WINDOW; + + /* + * Scans the lines before 'i' to find a run of lines that either + * have no match (dis[j] == 0) or have multiple matches (dis[j] > 1). + * Note that we always call this function with dis[i] > 1, so the + * current line (i) is already a multimatch line. + */ + for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) { + if (!dis[i - r]) + rdis0++; + else if (dis[i - r] == 2) + rpdis0++; + else + break; + } + /* + * If the run before the line 'i' found only multimatch lines, we + * return 0 and hence we don't make the current line (i) discarded. + * We want to discard multimatch lines only when they appear in the + * middle of runs with nomatch lines (dis[j] == 0). + */ + if (rdis0 == 0) + return 0; + for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) { + if (!dis[i + r]) + rdis1++; + else if (dis[i + r] == 2) + rpdis1++; + else + break; + } + /* + * If the run after the line 'i' found only multimatch lines, we + * return 0 and hence we don't make the current line (i) discarded. + */ + if (rdis1 == 0) + return 0; + rdis1 += rdis0; + rpdis1 += rpdis0; + + return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1); +} + + +/* + * Try to reduce the problem complexity, discard records that have no + * matches on the other file. Also, lines that have multiple matches + * might be potentially discarded if they happear in a run of discardable. + */ +static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { + long i, nm, nreff, mlim; + xrecord_t **recs; + xdlclass_t *rcrec; + char *dis, *dis1, *dis2; + + if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { + + return -1; + } + memset(dis, 0, xdf1->nrec + xdf2->nrec + 2); + dis1 = dis; + dis2 = dis1 + xdf1->nrec + 1; + + if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; + for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { + rcrec = cf->rcrecs[(*recs)->ha]; + nm = rcrec ? rcrec->len2 : 0; + dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + } + + if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) + mlim = XDL_MAX_EQLIMIT; + for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { + rcrec = cf->rcrecs[(*recs)->ha]; + nm = rcrec ? rcrec->len1 : 0; + dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + } + + for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; + i <= xdf1->dend; i++, recs++) { + if (dis1[i] == 1 || + (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { + xdf1->rindex[nreff] = i; + xdf1->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf1->rchg[i] = 1; + } + xdf1->nreff = nreff; + + for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; + i <= xdf2->dend; i++, recs++) { + if (dis2[i] == 1 || + (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { + xdf2->rindex[nreff] = i; + xdf2->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf2->rchg[i] = 1; + } + xdf2->nreff = nreff; + + xdl_free(dis); + + return 0; +} + + +/* + * Early trim initial and terminal matching records. + */ +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { + long i, lim; + xrecord_t **recs1, **recs2; + + recs1 = xdf1->recs; + recs2 = xdf2->recs; + for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; + i++, recs1++, recs2++) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dstart = xdf2->dstart = i; + + recs1 = xdf1->recs + xdf1->nrec - 1; + recs2 = xdf2->recs + xdf2->nrec - 1; + for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dend = xdf1->nrec - i - 1; + xdf2->dend = xdf2->nrec - i - 1; + + return 0; +} + + +static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { + + if (xdl_trim_ends(xdf1, xdf2) < 0 || + xdl_cleanup_records(cf, xdf1, xdf2) < 0) { + + return -1; + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xprepare.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,34 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XPREPARE_H) +#define XPREPARE_H + + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +void xdl_free_env(xdfenv_t *xe); + + + +#endif /* #if !defined(XPREPARE_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xtypes.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,67 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XTYPES_H) +#define XTYPES_H + + + +typedef struct s_chanode { + struct s_chanode *next; + long icurr; +} chanode_t; + +typedef struct s_chastore { + chanode_t *head, *tail; + long isize, nsize; + chanode_t *ancur; + chanode_t *sncur; + long scurr; +} chastore_t; + +typedef struct s_xrecord { + struct s_xrecord *next; + char const *ptr; + long size; + unsigned long ha; +} xrecord_t; + +typedef struct s_xdfile { + chastore_t rcha; + long nrec; + unsigned int hbits; + xrecord_t **rhash; + long dstart, dend; + xrecord_t **recs; + char *rchg; + long *rindex; + long nreff; + unsigned long *ha; +} xdfile_t; + +typedef struct s_xdfenv { + xdfile_t xdf1, xdf2; +} xdfenv_t; + + + +#endif /* #if !defined(XTYPES_H) */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xutils.c Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,425 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include <limits.h> +#include <assert.h> +#include "xinclude.h" + + + + +long xdl_bogosqrt(long n) { + long i; + + /* + * Classical integer square root approximation using shifts. + */ + for (i = 1; n > 0; n >>= 2) + i <<= 1; + + return i; +} + + +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb) { + int i = 2; + mmbuffer_t mb[3]; + + mb[0].ptr = (char *) pre; + mb[0].size = psize; + mb[1].ptr = (char *) rec; + mb[1].size = size; + if (size > 0 && rec[size - 1] != '\n') { + mb[2].ptr = (char *) "\n\\ No newline at end of file\n"; + mb[2].size = strlen(mb[2].ptr); + i++; + } + if (ecb->outf(ecb->priv, mb, i) < 0) { + + return -1; + } + + return 0; +} + +void *xdl_mmfile_first(mmfile_t *mmf, long *size) +{ + *size = mmf->size; + return mmf->ptr; +} + + +long xdl_mmfile_size(mmfile_t *mmf) +{ + return mmf->size; +} + + +int xdl_cha_init(chastore_t *cha, long isize, long icount) { + + cha->head = cha->tail = NULL; + cha->isize = isize; + cha->nsize = icount * isize; + cha->ancur = cha->sncur = NULL; + cha->scurr = 0; + + return 0; +} + + +void xdl_cha_free(chastore_t *cha) { + chanode_t *cur, *tmp; + + for (cur = cha->head; (tmp = cur) != NULL;) { + cur = cur->next; + xdl_free(tmp); + } +} + + +void *xdl_cha_alloc(chastore_t *cha) { + chanode_t *ancur; + void *data; + + if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) { + if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) { + + return NULL; + } + ancur->icurr = 0; + ancur->next = NULL; + if (cha->tail) + cha->tail->next = ancur; + if (!cha->head) + cha->head = ancur; + cha->tail = ancur; + cha->ancur = ancur; + } + + data = (char *) ancur + sizeof(chanode_t) + ancur->icurr; + ancur->icurr += cha->isize; + + return data; +} + +long xdl_guess_lines(mmfile_t *mf, long sample) { + long nl = 0, size, tsize = 0; + char const *data, *cur, *top; + + if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { + for (top = data + size; nl < sample && cur < top; ) { + nl++; + if (!(cur = memchr(cur, '\n', top - cur))) + cur = top; + else + cur++; + } + tsize += (long) (cur - data); + } + + if (nl && tsize) + nl = xdl_mmfile_size(mf) / (tsize / nl); + + return nl + 1; +} + +int xdl_blankline(const char *line, long size, long flags) +{ + long i; + + if (!(flags & XDF_WHITESPACE_FLAGS)) + return (size <= 1); + + for (i = 0; i < size && XDL_ISSPACE(line[i]); i++) + ; + + return (i == size); +} + +/* + * Have we eaten everything on the line, except for an optional + * CR at the very end? + */ +static int ends_with_optional_cr(const char *l, long s, long i) +{ + int complete = s && l[s-1] == '\n'; + + if (complete) + s--; + if (s == i) + return 1; + /* do not ignore CR at the end of an incomplete line */ + if (complete && s == i + 1 && l[i] == '\r') + return 1; + return 0; +} + +int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) +{ + int i1, i2; + + if (s1 == s2 && !memcmp(l1, l2, s1)) + return 1; + if (!(flags & XDF_WHITESPACE_FLAGS)) + return 0; + + i1 = 0; + i2 = 0; + + /* + * -w matches everything that matches with -b, and -b in turn + * matches everything that matches with --ignore-space-at-eol, + * which in turn matches everything that matches with --ignore-cr-at-eol. + * + * Each flavor of ignoring needs different logic to skip whitespaces + * while we have both sides to compare. + */ + if (flags & XDF_IGNORE_WHITESPACE) { + goto skip_ws; + while (i1 < s1 && i2 < s2) { + if (l1[i1++] != l2[i2++]) + return 0; + skip_ws: + while (i1 < s1 && XDL_ISSPACE(l1[i1])) + i1++; + while (i2 < s2 && XDL_ISSPACE(l2[i2])) + i2++; + } + } else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) { + while (i1 < s1 && i2 < s2) { + if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) { + /* Skip matching spaces and try again */ + while (i1 < s1 && XDL_ISSPACE(l1[i1])) + i1++; + while (i2 < s2 && XDL_ISSPACE(l2[i2])) + i2++; + continue; + } + if (l1[i1++] != l2[i2++]) + return 0; + } + } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) { + while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { + i1++; + i2++; + } + } else if (flags & XDF_IGNORE_CR_AT_EOL) { + /* Find the first difference and see how the line ends */ + while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { + i1++; + i2++; + } + return (ends_with_optional_cr(l1, s1, i1) && + ends_with_optional_cr(l2, s2, i2)); + } + + /* + * After running out of one side, the remaining side must have + * nothing but whitespace for the lines to match. Note that + * ignore-whitespace-at-eol case may break out of the loop + * while there still are characters remaining on both lines. + */ + if (i1 < s1) { + while (i1 < s1 && XDL_ISSPACE(l1[i1])) + i1++; + if (s1 != i1) + return 0; + } + if (i2 < s2) { + while (i2 < s2 && XDL_ISSPACE(l2[i2])) + i2++; + return (s2 == i2); + } + return 1; +} + +static unsigned long xdl_hash_record_with_whitespace(char const **data, + char const *top, long flags) { + unsigned long ha = 5381; + char const *ptr = *data; + int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; + + for (; ptr < top && *ptr != '\n'; ptr++) { + if (cr_at_eol_only) { + /* do not ignore CR at the end of an incomplete line */ + if (*ptr == '\r' && + (ptr + 1 < top && ptr[1] == '\n')) + continue; + } + else if (XDL_ISSPACE(*ptr)) { + const char *ptr2 = ptr; + int at_eol; + while (ptr + 1 < top && XDL_ISSPACE(ptr[1]) + && ptr[1] != '\n') + ptr++; + at_eol = (top <= ptr + 1 || ptr[1] == '\n'); + if (flags & XDF_IGNORE_WHITESPACE) + ; /* already handled */ + else if (flags & XDF_IGNORE_WHITESPACE_CHANGE + && !at_eol) { + ha += (ha << 5); + ha ^= (unsigned long) ' '; + } + else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL + && !at_eol) { + while (ptr2 != ptr + 1) { + ha += (ha << 5); + ha ^= (unsigned long) *ptr2; + ptr2++; + } + } + continue; + } + ha += (ha << 5); + ha ^= (unsigned long) *ptr; + } + *data = ptr < top ? ptr + 1: ptr; + + return ha; +} + +unsigned long xdl_hash_record(char const **data, char const *top, long flags) { + unsigned long ha = 5381; + char const *ptr = *data; + + if (flags & XDF_WHITESPACE_FLAGS) + return xdl_hash_record_with_whitespace(data, top, flags); + + for (; ptr < top && *ptr != '\n'; ptr++) { + ha += (ha << 5); + ha ^= (unsigned long) *ptr; + } + *data = ptr < top ? ptr + 1: ptr; + + return ha; +} + +unsigned int xdl_hashbits(unsigned int size) { + unsigned int val = 1, bits = 0; + + for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++); + return bits ? bits: 1; +} + + +int xdl_num_out(char *out, long val) { + char *ptr, *str = out; + char buf[32]; + + ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + if (val < 0) { + *--ptr = '-'; + val = -val; + } + for (; val && ptr > buf; val /= 10) + *--ptr = "0123456789"[val % 10]; + if (*ptr) + for (; *ptr; ptr++, str++) + *str = *ptr; + else + *str++ = '0'; + *str = '\0'; + + return str - out; +} + +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, + const char *func, long funclen, xdemitcb_t *ecb) { + int nb = 0; + mmbuffer_t mb; + char buf[128]; + + memcpy(buf, "@@ -", 4); + nb += 4; + + nb += xdl_num_out(buf + nb, c1 ? s1: s1 - 1); + + if (c1 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c1); + } + + memcpy(buf + nb, " +", 2); + nb += 2; + + nb += xdl_num_out(buf + nb, c2 ? s2: s2 - 1); + + if (c2 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c2); + } + + memcpy(buf + nb, " @@", 3); + nb += 3; + if (func && funclen) { + buf[nb++] = ' '; + if (funclen > sizeof(buf) - nb - 1) + funclen = sizeof(buf) - nb - 1; + memcpy(buf + nb, func, funclen); + nb += funclen; + } + buf[nb++] = '\n'; + + mb.ptr = buf; + mb.size = nb; + if (ecb->outf(ecb->priv, &mb, 1) < 0) + return -1; + + return 0; +} + +int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, + int line1, int count1, int line2, int count2) +{ + /* + * This probably does not work outside Git, since + * we have a very simple mmfile structure. + * + * Note: ideally, we would reuse the prepared environment, but + * the libxdiff interface does not (yet) allow for diffing only + * ranges of lines instead of the whole files. + */ + mmfile_t subfile1, subfile2; + xdfenv_t env; + + subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; + subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + + diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; + subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; + subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + + diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; + if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) + return -1; + + memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); + memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); + + xdl_free_env(&env); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/xdiff/xutils.h Sat Mar 03 10:39:43 2018 -0800 @@ -0,0 +1,47 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XUTILS_H) +#define XUTILS_H + + + +long xdl_bogosqrt(long n); +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb); +int xdl_cha_init(chastore_t *cha, long isize, long icount); +void xdl_cha_free(chastore_t *cha); +void *xdl_cha_alloc(chastore_t *cha); +long xdl_guess_lines(mmfile_t *mf, long sample); +int xdl_blankline(const char *line, long size, long flags); +int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); +unsigned long xdl_hash_record(char const **data, char const *top, long flags); +unsigned int xdl_hashbits(unsigned int size); +int xdl_num_out(char *out, long val); +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, + const char *func, long funclen, xdemitcb_t *ecb); +int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, + int line1, int count1, int line2, int count2); + + + +#endif /* #if !defined(XUTILS_H) */