mercurial/bdiff.c
author Pierre-Yves David <pierre-yves.david@octobus.net>
Wed, 11 Sep 2024 12:02:38 +0200
branchstable
changeset 51840 1bb71046f5e0
parent 46819 d4ba4d51f85f
permissions -rw-r--r--
profiling: improve 3.12 error message for calling lsprof twice Python 3.12 prevent lsprof to be enabled if it is already enabled. This break the use of lsprof in `hg serve` as both the initial `serve` command and the request serving want to profile. The "stat" profiler (the default) does not have this problem, so we focus on improving the error message for now.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     1
/*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     3
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 41336
diff changeset
     4
 Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     9
 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    10
*/
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    11
34626
ff4c9c6263de bdiff: sort includes using clang-format
Augie Fackler <augie@google.com>
parents: 30461
diff changeset
    12
#include <limits.h>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    13
#include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    14
#include <string.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    15
34626
ff4c9c6263de bdiff: sort includes using clang-format
Augie Fackler <augie@google.com>
parents: 30461
diff changeset
    16
#include "bdiff.h"
ff4c9c6263de bdiff: sort includes using clang-format
Augie Fackler <augie@google.com>
parents: 30461
diff changeset
    17
#include "bitmanipulation.h"
29539
666832b9e154 bdiff: use ssize_t in favor of Py_ssize_t in cpython-unaware locations
Maciej Fijalkowski <fijall@gmail.com>
parents: 29444
diff changeset
    18
#include "compat.h"
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    19
30318
e1d6aa0e4c3a bdiff: replace hash algorithm
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30308
diff changeset
    20
/* Hash implementation from diffutils */
e1d6aa0e4c3a bdiff: replace hash algorithm
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30308
diff changeset
    21
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof(v) * CHAR_BIT - (n)))
34628
72b24ac81d5d bdiff: fix misplaced comma in macro definition with clang-format
Augie Fackler <augie@google.com>
parents: 34626
diff changeset
    22
#define HASH(h, c) ((c) + ROL(h, 7))
30318
e1d6aa0e4c3a bdiff: replace hash algorithm
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30308
diff changeset
    23
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    24
struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    25
	int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    26
};
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    27
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
    28
int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    29
{
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    30
	unsigned hash;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
    31
	int i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    32
	const char *p, *b = a;
34629
2c79a5a0c7f3 bdiff: remove extra space after * per clang-format
Augie Fackler <augie@google.com>
parents: 34628
diff changeset
    33
	const char *const plast = a + len - 1;
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
    34
	struct bdiff_line *l;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    35
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    36
	/* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    37
	i = 1; /* extra line for sentinel */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    38
	for (p = a; p < plast; p++) {
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    39
		if (*p == '\n') {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    40
			i++;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    41
		}
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    42
	}
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    43
	if (p == plast) {
30308
d500ddae7494 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 29541
diff changeset
    44
		i++;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    45
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    46
35677
cf2e2a7399bc bdiff: handle the possibility of an integer overflow when allocating
Alex Gaynor <agaynor@mozilla.com>
parents: 34632
diff changeset
    47
	*lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line));
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    48
	if (!l) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    49
		return -1;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    50
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    51
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    52
	/* build the line array and calculate hashes */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    53
	hash = 0;
30461
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    54
	for (p = a; p < plast; p++) {
30318
e1d6aa0e4c3a bdiff: replace hash algorithm
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30308
diff changeset
    55
		hash = HASH(hash, *p);
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    56
30461
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    57
		if (*p == '\n') {
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    58
			l->hash = hash;
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    59
			hash = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    60
			l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    61
			l->l = b;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
    62
			l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    63
			l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    64
			b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    65
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    66
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    67
30461
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    68
	if (p == plast) {
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    69
		hash = HASH(hash, *p);
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    70
		l->hash = hash;
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    71
		l->len = p - b + 1;
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    72
		l->l = b;
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    73
		l->n = INT_MAX;
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    74
		l++;
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    75
	}
d195fa651b51 bdiff: don't check border condition in loop
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30433
diff changeset
    76
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    77
	/* set up a sentinel */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    78
	l->hash = 0;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
    79
	l->len = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    80
	l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    81
	return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    82
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    83
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
    84
static inline int cmp(struct bdiff_line *a, struct bdiff_line *b)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    85
{
34630
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
    86
	return a->hash != b->hash || a->len != b->len ||
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
    87
	       memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    88
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    89
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
    90
static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b,
34631
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
    91
                       int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    92
{
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    93
	int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    94
	struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    95
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    96
	/* build a hash table of the next highest power of 2 */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    97
	while (buckets < bn + 1) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    98
		buckets *= 2;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
    99
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   100
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   101
	/* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   102
	for (scale = 4; scale; scale /= 2) {
35723
50868145a8de bdiff: handle the possibility of overflow when computing allocation size
Alex Gaynor <agaynor@mozilla.com>
parents: 35677
diff changeset
   103
		h = (struct pos *)calloc(buckets, scale * sizeof(struct pos));
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   104
		if (h) {
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   105
			break;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   106
		}
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   107
	}
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   108
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   109
	if (!h) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   110
		return 0;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   111
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   112
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   113
	buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   114
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   115
	/* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   116
	for (i = 0; i <= buckets; i++) {
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   117
		h[i].pos = -1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   118
		h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   119
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   120
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   121
	/* add lines to the hash table chains */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   122
	for (i = 0; i < bn; i++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   123
		/* find the equivalence class */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   124
		for (j = b[i].hash & buckets; h[j].pos != -1;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   125
		     j = (j + 1) & buckets) {
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   126
			if (!cmp(b + i, b + h[j].pos)) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   127
				break;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   128
			}
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   129
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   130
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   131
		/* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   132
		b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   133
		b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   134
		h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   135
		h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   136
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   137
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   138
	/* compute popularity threshold */
9534
8e202431d620 bdiff: gradually enable the popularity hack
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8858
diff changeset
   139
	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   140
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   141
	/* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   142
	for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   143
		/* find the equivalence class */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   144
		for (j = a[i].hash & buckets; h[j].pos != -1;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   145
		     j = (j + 1) & buckets) {
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   146
			if (!cmp(a + i, b + h[j].pos)) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   147
				break;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   148
			}
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   149
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   150
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   151
		a[i].e = j; /* use equivalence class for quick compare */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   152
		if (h[j].len <= t) {
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   153
			a[i].n = h[j].pos; /* point to head of match list */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   154
		} else {
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   155
			a[i].n = -1; /* too popular */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   156
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   157
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   158
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   159
	/* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   160
	free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   161
	return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   162
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   163
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
   164
static int longest_match(struct bdiff_line *a, struct bdiff_line *b,
34631
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   165
                         struct pos *pos, int a1, int a2, int b1, int b2,
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   166
                         int *omi, int *omj)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   167
{
30431
8c0c75aa3ff4 bdiff: give slight preference to longest matches in the middle of the B side
Mads Kiilerich <madski@unity3d.com>
parents: 30430
diff changeset
   168
	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
29015
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   169
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   170
	/* window our search on large regions to better bound
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   171
	   worst-case performance. by choosing a window at the end, we
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   172
	   reduce skipping overhead on the b chains. */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   173
	if (a2 - a1 > 30000) {
29015
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   174
		a1 = a2 - 30000;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   175
	}
29015
87d4a6c5567e bdiff: further restrain potential quadratic performance
Matt Mackall <mpm@selenic.com>
parents: 29014
diff changeset
   176
30429
38ed54888617 bdiff: adjust criteria for getting optimal longest match in the A side middle
Mads Kiilerich <madski@unity3d.com>
parents: 30318
diff changeset
   177
	half = (a1 + a2 - 1) / 2;
30431
8c0c75aa3ff4 bdiff: give slight preference to longest matches in the middle of the B side
Mads Kiilerich <madski@unity3d.com>
parents: 30430
diff changeset
   178
	bhalf = (b1 + b2 - 1) / 2;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   179
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   180
	for (i = a1; i < a2; i++) {
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   181
		/* skip all lines in b after the current block */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   182
		for (j = a[i].n; j >= b2; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   183
			;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   184
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   185
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   186
		/* loop through all lines match a[i] in b */
29013
9a8363d23419 bdiff: deal better with duplicate lines
Matt Mackall <mpm@selenic.com>
parents: 29012
diff changeset
   187
		for (; j >= b1; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   188
			/* does this extend an earlier match? */
29322
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   189
			for (k = 1; j - k >= b1 && i - k >= a1; k++) {
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   190
				/* reached an earlier match? */
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   191
				if (pos[j - k].pos == i - k) {
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   192
					k += pos[j - k].len;
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   193
					break;
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   194
				}
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   195
				/* previous line mismatch? */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   196
				if (a[i - k].e != b[j - k].e) {
29322
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   197
					break;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   198
				}
29322
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   199
			}
66dbdd3cc2b9 bdiff: extend matches across popular lines
Matt Mackall <mpm@selenic.com>
parents: 29015
diff changeset
   200
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   201
			pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   202
			pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   203
29014
f1ca249696ed bdiff: balance recursion to avoid quadratic behavior (issue4704)
Matt Mackall <mpm@selenic.com>
parents: 29013
diff changeset
   204
			/* best match so far? we prefer matches closer
f1ca249696ed bdiff: balance recursion to avoid quadratic behavior (issue4704)
Matt Mackall <mpm@selenic.com>
parents: 29013
diff changeset
   205
			   to the middle to balance recursion */
30430
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   206
			if (k > mk) {
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   207
				/* a longer match */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   208
				mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   209
				mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   210
				mk = k;
30430
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   211
			} else if (k == mk) {
30433
96f2f50d923f bdiff: give slight preference to removing trailing lines
Mads Kiilerich <madski@unity3d.com>
parents: 30432
diff changeset
   212
				if (i > mi && i <= half && j > b1) {
30430
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   213
					/* same match but closer to half */
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   214
					mi = i;
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   215
					mj = j;
30432
3633403888ae bdiff: give slight preference to appending lines
Mads Kiilerich <madski@unity3d.com>
parents: 30431
diff changeset
   216
				} else if (i == mi && (mj > bhalf || i == a1)) {
30431
8c0c75aa3ff4 bdiff: give slight preference to longest matches in the middle of the B side
Mads Kiilerich <madski@unity3d.com>
parents: 30430
diff changeset
   217
					/* same i but best earlier j */
30430
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   218
					mj = j;
5c4e2636c1a9 bdiff: rearrange the "better longest match" code
Mads Kiilerich <madski@unity3d.com>
parents: 30429
diff changeset
   219
				}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   220
			}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   221
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   222
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   223
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   224
	if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   225
		mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   226
		mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   227
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   228
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
   229
	/* expand match to include subsequent popular lines */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   230
	while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   231
		mk++;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   232
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   233
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
   234
	*omi = mi;
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
   235
	*omj = mj;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   236
29323
d29cb5e735e9 bdiff: remove effectively dead code
Matt Mackall <mpm@selenic.com>
parents: 29322
diff changeset
   237
	return mk;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   238
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   239
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
   240
static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b,
34631
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   241
                                  struct pos *pos, int a1, int a2, int b1,
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   242
                                  int b2, struct bdiff_hunk *l)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   243
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   244
	int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   245
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   246
	while (1) {
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   247
		/* find the longest match in this chunk */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   248
		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   249
		if (!k) {
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   250
			return l;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   251
		}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   252
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   253
		/* and recurse on the remaining chunks on either side */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   254
		l = recurse(a, b, pos, a1, i, b1, j, l);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   255
		if (!l) {
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   256
			return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   257
		}
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   258
34630
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   259
		l->next =
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   260
		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   261
		if (!l->next) {
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   262
			return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   263
		}
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   264
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   265
		l = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   266
		l->a1 = i;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   267
		l->a2 = i + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   268
		l->b1 = j;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   269
		l->b2 = j + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   270
		l->next = NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   271
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   272
		/* tail-recursion didn't happen, so do equivalent iteration */
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   273
		a1 = i + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   274
		b1 = j + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   275
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   276
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   277
34631
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   278
int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b, int bn,
4dea82ee7945 bdiff: rewrap function prototypes per clang-format
Augie Fackler <augie@google.com>
parents: 34630
diff changeset
   279
               struct bdiff_hunk *base)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   280
{
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
   281
	struct bdiff_hunk *curr;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   282
	struct pos *pos;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   283
	int t, count = 0;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   284
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   285
	/* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   286
	t = equatelines(a, an, b, bn);
5571
f84bb2e1cc3a fix calloc(0, ...) issue
Jim Hague <jim.hague@acm.org>
parents: 5452
diff changeset
   287
	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   288
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   289
	if (pos && t) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   290
		/* generate the matching block list */
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   291
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   292
		curr = recurse(a, b, pos, 0, an, 0, bn, base);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   293
		if (!curr) {
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   294
			return -1;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   295
		}
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   296
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   297
		/* sentinel end hunk */
34630
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   298
		curr->next =
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   299
		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   300
		if (!curr->next) {
13090
c73745762f33 bdiff: Fix bogus NULL return
Matt Mackall <mpm@selenic.com>
parents: 13089
diff changeset
   301
			return -1;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   302
		}
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   303
		curr = curr->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   304
		curr->a1 = curr->a2 = an;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   305
		curr->b1 = curr->b2 = bn;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   306
		curr->next = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   307
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   308
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   309
	free(pos);
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   310
7625
930a2be7e875 bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7189
diff changeset
   311
	/* normalize the hunk list, try to push each hunk towards the end */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   312
	for (curr = base->next; curr; curr = curr->next) {
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
   313
		struct bdiff_hunk *next = curr->next;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   314
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   315
		if (!next) {
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   316
			break;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   317
		}
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   318
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   319
		if (curr->a2 == next->a1 || curr->b2 == next->b1) {
34630
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   320
			while (curr->a2 < an && curr->b2 < bn &&
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   321
			       next->a1 < next->a2 && next->b1 < next->b2 &&
38463bd96d21 bdiff: re-wrap lines per clang-format
Augie Fackler <augie@google.com>
parents: 34629
diff changeset
   322
			       !cmp(a + curr->a2, b + curr->b2)) {
29011
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
   323
				curr->a2++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
   324
				next->a1++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
   325
				curr->b2++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
   326
				next->b1++;
8bcda4c76820 bdiff: fold in shift calculation in normalize
Matt Mackall <mpm@selenic.com>
parents: 29010
diff changeset
   327
			}
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   328
		}
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   329
	}
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   330
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   331
	for (curr = base->next; curr; curr = curr->next) {
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   332
		count++;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 38308
diff changeset
   333
	}
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   334
	return count;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   335
}
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   336
38308
068e774ae29e bdiff: document that bdiff_freehunks() accepts NULL
Yuya Nishihara <yuya@tcha.org>
parents: 35723
diff changeset
   337
/* deallocate list of hunks; l may be NULL */
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents: 29540
diff changeset
   338
void bdiff_freehunks(struct bdiff_hunk *l)
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   339
{
29540
4ce1fc91e30a bdiff: rename functions and structs to be amenable for later exporting
Maciej Fijalkowski <fijall@gmail.com>
parents: 29539
diff changeset
   340
	struct bdiff_hunk *n;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   341
	for (; l; l = n) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   342
		n = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   343
		free(l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   344
	}
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   345
}