29 #define XDL_GUESS_NLINES1 256 |
29 #define XDL_GUESS_NLINES1 256 |
30 |
30 |
31 |
31 |
32 typedef struct s_xdlclass { |
32 typedef struct s_xdlclass { |
33 struct s_xdlclass *next; |
33 struct s_xdlclass *next; |
34 unsigned long ha; |
34 uint64_t ha; |
35 char const *line; |
35 char const *line; |
36 long size; |
36 int64_t size; |
37 long idx; |
37 int64_t idx; |
38 long len1, len2; |
38 int64_t len1, len2; |
39 } xdlclass_t; |
39 } xdlclass_t; |
40 |
40 |
41 typedef struct s_xdlclassifier { |
41 typedef struct s_xdlclassifier { |
42 unsigned int hbits; |
42 unsigned int hbits; |
43 long hsize; |
43 int64_t hsize; |
44 xdlclass_t **rchash; |
44 xdlclass_t **rchash; |
45 chastore_t ncha; |
45 chastore_t ncha; |
46 xdlclass_t **rcrecs; |
46 xdlclass_t **rcrecs; |
47 long alloc; |
47 int64_t alloc; |
48 long count; |
48 int64_t count; |
49 long flags; |
49 int64_t flags; |
50 } xdlclassifier_t; |
50 } xdlclassifier_t; |
51 |
51 |
52 |
52 |
53 |
53 |
54 |
54 |
55 static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); |
55 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags); |
56 static void xdl_free_classifier(xdlclassifier_t *cf); |
56 static void xdl_free_classifier(xdlclassifier_t *cf); |
57 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
57 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
58 unsigned int hbits, xrecord_t *rec); |
58 unsigned int hbits, xrecord_t *rec); |
59 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, |
59 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, |
60 xdlclassifier_t *cf, xdfile_t *xdf); |
60 xdlclassifier_t *cf, xdfile_t *xdf); |
61 static void xdl_free_ctx(xdfile_t *xdf); |
61 static void xdl_free_ctx(xdfile_t *xdf); |
62 static int xdl_clean_mmatch(char const *dis, long i, long s, long e); |
62 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e); |
63 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
63 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
64 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); |
64 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); |
65 static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
65 static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
66 |
66 |
67 |
67 |
68 |
68 |
69 |
69 |
70 static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { |
70 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags) { |
71 cf->flags = flags; |
71 cf->flags = flags; |
72 |
72 |
73 cf->hbits = xdl_hashbits((unsigned int) size); |
73 cf->hbits = xdl_hashbits((unsigned int) size); |
74 cf->hsize = 1 << cf->hbits; |
74 cf->hsize = 1 << cf->hbits; |
75 |
75 |
161 * |
161 * |
162 * Note: trimming could affect hunk shifting. But the performance benefit |
162 * Note: trimming could affect hunk shifting. But the performance benefit |
163 * outweighs the shift change. A diff result with suboptimal shifting is still |
163 * outweighs the shift change. A diff result with suboptimal shifting is still |
164 * valid. |
164 * valid. |
165 */ |
165 */ |
166 static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved, |
166 static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, int64_t reserved, |
167 xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) { |
167 xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) { |
168 mmfile_t msmall, mlarge; |
168 mmfile_t msmall, mlarge; |
169 /* prefix lines, prefix bytes, suffix lines, suffix bytes */ |
169 /* prefix lines, prefix bytes, suffix lines, suffix bytes */ |
170 long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i; |
170 int64_t plines = 0, pbytes = 0, slines = 0, sbytes = 0, i; |
171 /* prefix char pointer for msmall and mlarge */ |
171 /* prefix char pointer for msmall and mlarge */ |
172 const char *pp1, *pp2; |
172 const char *pp1, *pp2; |
173 /* suffix char pointer for msmall and mlarge */ |
173 /* suffix char pointer for msmall and mlarge */ |
174 const char *ps1, *ps2; |
174 const char *ps1, *ps2; |
175 |
175 |
235 out_mf2->ptr = mf2->ptr + pbytes; |
235 out_mf2->ptr = mf2->ptr + pbytes; |
236 out_mf2->size = mf2->size - pbytes - sbytes; |
236 out_mf2->size = mf2->size - pbytes - sbytes; |
237 } |
237 } |
238 |
238 |
239 |
239 |
240 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, |
240 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, |
241 xdlclassifier_t *cf, xdfile_t *xdf) { |
241 xdlclassifier_t *cf, xdfile_t *xdf) { |
242 unsigned int hbits; |
242 unsigned int hbits; |
243 long nrec, hsize, bsize; |
243 int64_t nrec, hsize, bsize; |
244 unsigned long hav; |
244 uint64_t hav; |
245 char const *blk, *cur, *top, *prev; |
245 char const *blk, *cur, *top, *prev; |
246 xrecord_t *crec; |
246 xrecord_t *crec; |
247 xrecord_t **recs, **rrecs; |
247 xrecord_t **recs, **rrecs; |
248 xrecord_t **rhash; |
248 xrecord_t **rhash; |
249 unsigned long *ha; |
249 uint64_t *ha; |
250 char *rchg; |
250 char *rchg; |
251 long *rindex; |
251 int64_t *rindex; |
252 |
252 |
253 ha = NULL; |
253 ha = NULL; |
254 rindex = NULL; |
254 rindex = NULL; |
255 rchg = NULL; |
255 rchg = NULL; |
256 rhash = NULL; |
256 rhash = NULL; |
338 /* Reserved lines for trimming, to leave room for shifting */ |
338 /* Reserved lines for trimming, to leave room for shifting */ |
339 #define TRIM_RESERVED_LINES 100 |
339 #define TRIM_RESERVED_LINES 100 |
340 |
340 |
341 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
341 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
342 xdfenv_t *xe) { |
342 xdfenv_t *xe) { |
343 long enl1, enl2, sample; |
343 int64_t enl1, enl2, sample; |
344 mmfile_t tmf1, tmf2; |
344 mmfile_t tmf1, tmf2; |
345 xdlclassifier_t cf; |
345 xdlclassifier_t cf; |
346 |
346 |
347 memset(&cf, 0, sizeof(cf)); |
347 memset(&cf, 0, sizeof(cf)); |
348 |
348 |
386 xdl_free_ctx(&xe->xdf2); |
386 xdl_free_ctx(&xe->xdf2); |
387 xdl_free_ctx(&xe->xdf1); |
387 xdl_free_ctx(&xe->xdf1); |
388 } |
388 } |
389 |
389 |
390 |
390 |
391 static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { |
391 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e) { |
392 long r, rdis0, rpdis0, rdis1, rpdis1; |
392 int64_t r, rdis0, rpdis0, rdis1, rpdis1; |
393 |
393 |
394 /* |
394 /* |
395 * Limits the window the is examined during the similar-lines |
395 * Limits the window the is examined during the similar-lines |
396 * scan. The loops below stops when dis[i - r] == 1 (line that |
396 * scan. The loops below stops when dis[i - r] == 1 (line that |
397 * has no match), but there are corner cases where the loop |
397 * has no match), but there are corner cases where the loop |
450 * Try to reduce the problem complexity, discard records that have no |
450 * Try to reduce the problem complexity, discard records that have no |
451 * matches on the other file. Also, lines that have multiple matches |
451 * matches on the other file. Also, lines that have multiple matches |
452 * might be potentially discarded if they happear in a run of discardable. |
452 * might be potentially discarded if they happear in a run of discardable. |
453 */ |
453 */ |
454 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
454 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
455 long i, nm, nreff, mlim; |
455 int64_t i, nm, nreff, mlim; |
456 xrecord_t **recs; |
456 xrecord_t **recs; |
457 xdlclass_t *rcrec; |
457 xdlclass_t *rcrec; |
458 char *dis, *dis1, *dis2; |
458 char *dis, *dis1, *dis2; |
459 |
459 |
460 if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { |
460 if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { |