mercurial/thirdparty/xdiff/xprepare.c
changeset 36822 882657a9f768
parent 36820 f33a87cf60cc
child 36823 49fe6249937a
equal deleted inserted replaced
36821:0c7350656f93 36822:882657a9f768
    29 #define XDL_GUESS_NLINES1 256
    29 #define XDL_GUESS_NLINES1 256
    30 
    30 
    31 
    31 
    32 typedef struct s_xdlclass {
    32 typedef struct s_xdlclass {
    33 	struct s_xdlclass *next;
    33 	struct s_xdlclass *next;
    34 	unsigned long ha;
    34 	uint64_t ha;
    35 	char const *line;
    35 	char const *line;
    36 	long size;
    36 	int64_t size;
    37 	long idx;
    37 	int64_t idx;
    38 	long len1, len2;
    38 	int64_t len1, len2;
    39 } xdlclass_t;
    39 } xdlclass_t;
    40 
    40 
    41 typedef struct s_xdlclassifier {
    41 typedef struct s_xdlclassifier {
    42 	unsigned int hbits;
    42 	unsigned int hbits;
    43 	long hsize;
    43 	int64_t hsize;
    44 	xdlclass_t **rchash;
    44 	xdlclass_t **rchash;
    45 	chastore_t ncha;
    45 	chastore_t ncha;
    46 	xdlclass_t **rcrecs;
    46 	xdlclass_t **rcrecs;
    47 	long alloc;
    47 	int64_t alloc;
    48 	long count;
    48 	int64_t count;
    49 	long flags;
    49 	int64_t flags;
    50 } xdlclassifier_t;
    50 } xdlclassifier_t;
    51 
    51 
    52 
    52 
    53 
    53 
    54 
    54 
    55 static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags);
    55 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags);
    56 static void xdl_free_classifier(xdlclassifier_t *cf);
    56 static void xdl_free_classifier(xdlclassifier_t *cf);
    57 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
    57 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
    58 			       unsigned int hbits, xrecord_t *rec);
    58 			       unsigned int hbits, xrecord_t *rec);
    59 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
    59 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp,
    60 			   xdlclassifier_t *cf, xdfile_t *xdf);
    60 			   xdlclassifier_t *cf, xdfile_t *xdf);
    61 static void xdl_free_ctx(xdfile_t *xdf);
    61 static void xdl_free_ctx(xdfile_t *xdf);
    62 static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
    62 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e);
    63 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
    63 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
    64 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
    64 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
    65 static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
    65 static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
    66 
    66 
    67 
    67 
    68 
    68 
    69 
    69 
    70 static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
    70 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags) {
    71 	cf->flags = flags;
    71 	cf->flags = flags;
    72 
    72 
    73 	cf->hbits = xdl_hashbits((unsigned int) size);
    73 	cf->hbits = xdl_hashbits((unsigned int) size);
    74 	cf->hsize = 1 << cf->hbits;
    74 	cf->hsize = 1 << cf->hbits;
    75 
    75 
   106 }
   106 }
   107 
   107 
   108 
   108 
   109 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
   109 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
   110 			       unsigned int hbits, xrecord_t *rec) {
   110 			       unsigned int hbits, xrecord_t *rec) {
   111 	long hi;
   111 	int64_t hi;
   112 	char const *line;
   112 	char const *line;
   113 	xdlclass_t *rcrec;
   113 	xdlclass_t *rcrec;
   114 	xdlclass_t **rcrecs;
   114 	xdlclass_t **rcrecs;
   115 
   115 
   116 	line = rec->ptr;
   116 	line = rec->ptr;
   161  *
   161  *
   162  * Note: trimming could affect hunk shifting. But the performance benefit
   162  * Note: trimming could affect hunk shifting. But the performance benefit
   163  * outweighs the shift change. A diff result with suboptimal shifting is still
   163  * outweighs the shift change. A diff result with suboptimal shifting is still
   164  * valid.
   164  * valid.
   165  */
   165  */
   166 static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
   166 static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, int64_t reserved,
   167 		xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
   167 		xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
   168 	mmfile_t msmall, mlarge;
   168 	mmfile_t msmall, mlarge;
   169 	/* prefix lines, prefix bytes, suffix lines, suffix bytes */
   169 	/* prefix lines, prefix bytes, suffix lines, suffix bytes */
   170 	long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
   170 	int64_t plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
   171 	/* prefix char pointer for msmall and mlarge */
   171 	/* prefix char pointer for msmall and mlarge */
   172 	const char *pp1, *pp2;
   172 	const char *pp1, *pp2;
   173 	/* suffix char pointer for msmall and mlarge */
   173 	/* suffix char pointer for msmall and mlarge */
   174 	const char *ps1, *ps2;
   174 	const char *ps1, *ps2;
   175 
   175 
   235 	out_mf2->ptr = mf2->ptr + pbytes;
   235 	out_mf2->ptr = mf2->ptr + pbytes;
   236 	out_mf2->size = mf2->size - pbytes - sbytes;
   236 	out_mf2->size = mf2->size - pbytes - sbytes;
   237 }
   237 }
   238 
   238 
   239 
   239 
   240 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
   240 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp,
   241 			   xdlclassifier_t *cf, xdfile_t *xdf) {
   241 			   xdlclassifier_t *cf, xdfile_t *xdf) {
   242 	unsigned int hbits;
   242 	unsigned int hbits;
   243 	long nrec, hsize, bsize;
   243 	int64_t nrec, hsize, bsize;
   244 	unsigned long hav;
   244 	uint64_t hav;
   245 	char const *blk, *cur, *top, *prev;
   245 	char const *blk, *cur, *top, *prev;
   246 	xrecord_t *crec;
   246 	xrecord_t *crec;
   247 	xrecord_t **recs, **rrecs;
   247 	xrecord_t **recs, **rrecs;
   248 	xrecord_t **rhash;
   248 	xrecord_t **rhash;
   249 	unsigned long *ha;
   249 	uint64_t *ha;
   250 	char *rchg;
   250 	char *rchg;
   251 	long *rindex;
   251 	int64_t *rindex;
   252 
   252 
   253 	ha = NULL;
   253 	ha = NULL;
   254 	rindex = NULL;
   254 	rindex = NULL;
   255 	rchg = NULL;
   255 	rchg = NULL;
   256 	rhash = NULL;
   256 	rhash = NULL;
   294 
   294 
   295 	if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char))))
   295 	if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char))))
   296 		goto abort;
   296 		goto abort;
   297 	memset(rchg, 0, (nrec + 2) * sizeof(char));
   297 	memset(rchg, 0, (nrec + 2) * sizeof(char));
   298 
   298 
   299 	if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long))))
   299 	if (!(rindex = (int64_t *) xdl_malloc((nrec + 1) * sizeof(long))))
   300 		goto abort;
   300 		goto abort;
   301 	if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long))))
   301 	if (!(ha = (uint64_t *) xdl_malloc((nrec + 1) * sizeof(unsigned long))))
   302 		goto abort;
   302 		goto abort;
   303 
   303 
   304 	xdf->nrec = nrec;
   304 	xdf->nrec = nrec;
   305 	xdf->recs = recs;
   305 	xdf->recs = recs;
   306 	xdf->hbits = hbits;
   306 	xdf->hbits = hbits;
   338 /* Reserved lines for trimming, to leave room for shifting */
   338 /* Reserved lines for trimming, to leave room for shifting */
   339 #define TRIM_RESERVED_LINES 100
   339 #define TRIM_RESERVED_LINES 100
   340 
   340 
   341 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
   341 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
   342 		    xdfenv_t *xe) {
   342 		    xdfenv_t *xe) {
   343 	long enl1, enl2, sample;
   343 	int64_t enl1, enl2, sample;
   344 	mmfile_t tmf1, tmf2;
   344 	mmfile_t tmf1, tmf2;
   345 	xdlclassifier_t cf;
   345 	xdlclassifier_t cf;
   346 
   346 
   347 	memset(&cf, 0, sizeof(cf));
   347 	memset(&cf, 0, sizeof(cf));
   348 
   348 
   386 	xdl_free_ctx(&xe->xdf2);
   386 	xdl_free_ctx(&xe->xdf2);
   387 	xdl_free_ctx(&xe->xdf1);
   387 	xdl_free_ctx(&xe->xdf1);
   388 }
   388 }
   389 
   389 
   390 
   390 
   391 static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
   391 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e) {
   392 	long r, rdis0, rpdis0, rdis1, rpdis1;
   392 	int64_t r, rdis0, rpdis0, rdis1, rpdis1;
   393 
   393 
   394 	/*
   394 	/*
   395 	 * Limits the window the is examined during the similar-lines
   395 	 * Limits the window the is examined during the similar-lines
   396 	 * scan. The loops below stops when dis[i - r] == 1 (line that
   396 	 * scan. The loops below stops when dis[i - r] == 1 (line that
   397 	 * has no match), but there are corner cases where the loop
   397 	 * has no match), but there are corner cases where the loop
   450  * Try to reduce the problem complexity, discard records that have no
   450  * Try to reduce the problem complexity, discard records that have no
   451  * matches on the other file. Also, lines that have multiple matches
   451  * matches on the other file. Also, lines that have multiple matches
   452  * might be potentially discarded if they happear in a run of discardable.
   452  * might be potentially discarded if they happear in a run of discardable.
   453  */
   453  */
   454 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
   454 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
   455 	long i, nm, nreff, mlim;
   455 	int64_t i, nm, nreff, mlim;
   456 	xrecord_t **recs;
   456 	xrecord_t **recs;
   457 	xdlclass_t *rcrec;
   457 	xdlclass_t *rcrec;
   458 	char *dis, *dis1, *dis2;
   458 	char *dis, *dis1, *dis2;
   459 
   459 
   460 	if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) {
   460 	if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) {
   513 
   513 
   514 /*
   514 /*
   515  * Early trim initial and terminal matching records.
   515  * Early trim initial and terminal matching records.
   516  */
   516  */
   517 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
   517 static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
   518 	long i, lim;
   518 	int64_t i, lim;
   519 	xrecord_t **recs1, **recs2;
   519 	xrecord_t **recs1, **recs2;
   520 
   520 
   521 	recs1 = xdf1->recs;
   521 	recs1 = xdf1->recs;
   522 	recs2 = xdf2->recs;
   522 	recs2 = xdf2->recs;
   523 	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
   523 	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;