mercurial/bdiff.c
author Nicolas Dumazet <nicdumz.commits@gmail.com>
Thu, 11 Nov 2010 02:10:37 +0900
branchstable
changeset 12972 7916a84c0758
parent 11364 0044193a1c45
child 13089 faee0ffbc24b
permissions -rw-r--r--
log: fix log -rREV FILE when REV isnt the last filerev (issue2492) Regression from 99cafcae25d9. That previous commit is not supposed to affect log calls without --follow, so we step out of this codepath if follow is not True, and it's enough to fix the regression. When --follow is given, we fix the issue by taking into account changesets that have a rev > maxrev to build the filegraph: even if those files are not included in the final result, it's still needed to walk correctly the graph from the end of the filelog to minrev, to track accurately renames.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     1
/*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     3
2859
345bac2bc4ec update copyrights.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2600
diff changeset
     4
 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     9
 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    10
*/
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    11
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    12
#include <Python.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    13
#include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    14
#include <string.h>
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
    15
#include <limits.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    16
3561
8c617d48564a add AIX to the list of compilers that don't have inline keyword
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 3369
diff changeset
    17
#if defined __hpux || defined __SUNPRO_C || defined _AIX
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
    18
#define inline
2600
c4325f0a9b91 clean up trailing white space.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2577
diff changeset
    19
#endif
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
    20
8858
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
    21
#ifdef __linux
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
    22
#define inline __inline
8858
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
    23
#endif
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
    24
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    25
#ifdef _WIN32
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    26
#ifdef _MSC_VER
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    27
#define inline __inline
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    28
typedef unsigned long uint32_t;
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    29
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    30
#include <stdint.h>
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
    31
#endif
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    32
static uint32_t htonl(uint32_t x)
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    33
{
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    34
	return ((x & 0x000000ffUL) << 24) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    35
		((x & 0x0000ff00UL) <<  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    36
		((x & 0x00ff0000UL) >>  8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    37
		((x & 0xff000000UL) >> 24);
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    38
}
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    39
#else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
    40
#include <sys/types.h>
7036
bfad9865b1dc allow Mercurial to compile on Haiku
Scott McCreary <scottmc2@gmail.com>
parents: 5620
diff changeset
    41
#if defined __BEOS__ && !defined __HAIKU__
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
    42
#include <ByteOrder.h>
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
    43
#else
597
e530637ea060 [PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents: 553
diff changeset
    44
#include <arpa/inet.h>
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
    45
#endif
2543
860e9c83fc59 Include inttypes.h instead of stdint.h (fixes issue299)
Thomas Arendsen Hein <thomas@intevation.de>
parents: 2483
diff changeset
    46
#include <inttypes.h>
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
    47
#endif
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    48
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
    49
#include "util.h"
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
    50
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    51
struct line {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
    52
	int h, len, n, e;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    53
	const char *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    54
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    55
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    56
struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    57
	int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    58
};
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    59
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    60
struct hunk {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    61
	int a1, a2, b1, b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    62
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    63
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    64
struct hunklist {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    65
	struct hunk *base, *head;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    66
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    67
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    68
int splitlines(const char *a, int len, struct line **lr)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    69
{
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    70
	int h, i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    71
	const char *p, *b = a;
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    72
	const char * const plast = a + len - 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    73
	struct line *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    74
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    75
	/* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    76
	i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    77
	for (p = a; p < a + len; p++)
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    78
		if (*p == '\n' || p == plast)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    79
			i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    80
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
    81
	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    82
	if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    83
		return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    84
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    85
	/* build the line array and calculate hashes */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    86
	h = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    87
	for (p = a; p < a + len; p++) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    88
		/* Leonid Yuriev's hash */
7187
a79d3f535809 spaces->tabs in one line of a C extension for consistency
Thomas Arendsen Hein <thomas@intevation.de>
parents: 7036
diff changeset
    89
		h = (h * 1664525) + *p + 1013904223;
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    90
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    91
		if (*p == '\n' || p == plast) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    92
			l->h = h;
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    93
			h = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    94
			l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    95
			l->l = b;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
    96
			l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    97
			l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    98
			b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    99
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   100
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   101
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   102
	/* set up a sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   103
	l->h = l->len = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   104
	l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   105
	return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   106
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   107
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   108
int inline cmp(struct line *a, struct line *b)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   109
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   110
	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   111
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   112
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   113
static int equatelines(struct line *a, int an, struct line *b, int bn)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   114
{
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   115
	int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   116
	struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   117
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   118
	/* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   119
	while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   120
		buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   121
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   122
	/* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   123
	for (scale = 4; scale; scale /= 2) {
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   124
		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   125
		if (h)
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   126
			break;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
   127
	}
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   128
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   129
	if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   130
		return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   131
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   132
	buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   133
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   134
	/* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   135
	for (i = 0; i <= buckets; i++) {
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   136
		h[i].pos = INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   137
		h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   138
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   139
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   140
	/* add lines to the hash table chains */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   141
	for (i = bn - 1; i >= 0; i--) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   142
		/* find the equivalence class */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   143
		for (j = b[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   144
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   145
			if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   146
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   147
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   148
		/* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   149
		b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   150
		b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   151
		h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   152
		h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   153
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   154
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   155
	/* compute popularity threshold */
9534
8e202431d620 bdiff: gradually enable the popularity hack
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8858
diff changeset
   156
	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   157
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   158
	/* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   159
	for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   160
		/* find the equivalence class */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   161
		for (j = a[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   162
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   163
			if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   164
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   165
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   166
		a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   167
		if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   168
			a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   169
		else
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   170
			a[i].n = INT_MAX; /* too popular */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   171
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   172
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   173
	/* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   174
	free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   175
	return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   176
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   177
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   178
static int longest_match(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   179
			 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   180
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   181
	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   182
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   183
	for (i = a1; i < a2; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   184
		/* skip things before the current block */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   185
		for (j = a[i].n; j < b1; j = b[j].n)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   186
			;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   187
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   188
		/* loop through all lines match a[i] in b */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   189
		for (; j < b2; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   190
			/* does this extend an earlier match? */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   191
			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   192
				k = pos[j - 1].len + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   193
			else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   194
				k = 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   195
			pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   196
			pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   197
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   198
			/* best match so far? */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   199
			if (k > mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   200
				mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   201
				mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   202
				mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   203
			}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   204
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   205
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   206
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   207
	if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   208
		mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   209
		mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   210
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   211
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   212
	/* expand match to include neighboring popular lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   213
	while (mi - mb > a1 && mj - mb > b1 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   214
	       a[mi - mb - 1].e == b[mj - mb - 1].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   215
		mb++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   216
	while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   217
	       a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   218
		mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   219
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   220
	*omi = mi - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   221
	*omj = mj - mb;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   222
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   223
	return mk + mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   224
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   225
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   226
static void recurse(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   227
		    int a1, int a2, int b1, int b2, struct hunklist *l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   228
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   229
	int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   230
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   231
	while (1) {
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   232
		/* find the longest match in this chunk */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   233
		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   234
		if (!k)
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   235
			return;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   236
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   237
		/* and recurse on the remaining chunks on either side */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   238
		recurse(a, b, pos, a1, i, b1, j, l);
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   239
		l->head->a1 = i;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   240
		l->head->a2 = i + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   241
		l->head->b1 = j;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   242
		l->head->b2 = j + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   243
		l->head++;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   244
		/* tail-recursion didn't happen, so doing equivalent iteration */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   245
		a1 = i + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   246
		b1 = j + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   247
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   248
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   249
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   250
static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   251
{
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   252
	struct hunklist l;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   253
	struct hunk *curr;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   254
	struct pos *pos;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   255
	int t;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   256
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   257
	/* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   258
	t = equatelines(a, an, b, bn);
5571
f84bb2e1cc3a fix calloc(0, ...) issue
Jim Hague <jim.hague@acm.org>
parents: 5452
diff changeset
   259
	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
827
a61728b58dc0 Fix array overflow bug in bdiff
"Wallace, Eric S" <eric.s.wallace@intel.com>
parents: 597
diff changeset
   260
	/* we can't have more matches than lines in the shorter file */
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   261
	l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
   262
	                                        ((an<bn ? an:bn) + 1));
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   263
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   264
	if (pos && l.base && t) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   265
		/* generate the matching block list */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   266
		recurse(a, b, pos, 0, an, 0, bn, &l);
4131
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3561
diff changeset
   267
		l.head->a1 = l.head->a2 = an;
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3561
diff changeset
   268
		l.head->b1 = l.head->b2 = bn;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   269
		l.head++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   270
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   271
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   272
	free(pos);
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   273
7625
930a2be7e875 bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7189
diff changeset
   274
	/* normalize the hunk list, try to push each hunk towards the end */
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   275
	for (curr = l.base; curr != l.head; curr++) {
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   276
		struct hunk *next = curr + 1;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   277
		int shift = 0;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   278
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   279
		if (next == l.head)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   280
			break;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   281
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   282
		if (curr->a2 == next->a1)
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   283
			while (curr->a2 + shift < an && curr->b2 + shift < bn
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   284
			       && !cmp(a + curr->a2 + shift,
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   285
				       b + curr->b2 + shift))
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   286
				shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   287
		else if (curr->b2 == next->b1)
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   288
			while (curr->b2 + shift < bn && curr->a2 + shift < an
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   289
			       && !cmp(b + curr->b2 + shift,
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   290
				       a + curr->a2 + shift))
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   291
				shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   292
		if (!shift)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   293
			continue;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   294
		curr->b2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   295
		next->b1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   296
		curr->a2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   297
		next->a1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   298
	}
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   299
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   300
	return l;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   301
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   302
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   303
static PyObject *blocks(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   304
{
435
e731d25ddab2 Fix a compile warning for bdiff
mpm@selenic.com
parents: 433
diff changeset
   305
	PyObject *sa, *sb, *rl = NULL, *m;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   306
	struct line *a, *b;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   307
	struct hunklist l = {NULL, NULL};
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   308
	struct hunk *h;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   309
	int an, bn, pos = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   310
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   311
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   312
		return NULL;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   313
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   314
	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   315
	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   316
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   317
	if (!a || !b)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   318
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   319
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   320
	l = diff(a, an, b, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   321
	rl = PyList_New(l.head - l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   322
	if (!l.head || !rl)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   323
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   324
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   325
	for (h = l.base; h != l.head; h++) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   326
		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   327
		PyList_SetItem(rl, pos, m);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   328
		pos++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   329
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   330
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   331
nomem:
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   332
	free(a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   333
	free(b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   334
	free(l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   335
	return rl ? rl : PyErr_NoMemory();
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   336
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   337
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   338
static PyObject *bdiff(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   339
{
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   340
	char *sa, *sb;
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   341
	PyObject *result = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   342
	struct line *al, *bl;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
   343
	struct hunklist l = {NULL, NULL};
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   344
	struct hunk *h;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   345
	char encode[12], *rb;
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   346
	int an, bn, len = 0, la, lb;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   347
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3335
diff changeset
   348
	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   349
		return NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   350
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   351
	an = splitlines(sa, la, &al);
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   352
	bn = splitlines(sb, lb, &bl);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   353
	if (!al || !bl)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   354
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   355
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   356
	l = diff(al, an, bl, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   357
	if (!l.head)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   358
		goto nomem;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   359
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   360
	/* calculate length of output */
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   361
	la = lb = 0;
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   362
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   363
		if (h->a1 != la || h->b1 != lb)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   364
			len += 12 + bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   365
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   366
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   367
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   368
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   369
	result = PyBytes_FromStringAndSize(NULL, len);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   370
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   371
	if (!result)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   372
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   373
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   374
	/* build binary patch */
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   375
	rb = PyBytes_AsString(result);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   376
	la = lb = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   377
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   378
	for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   379
		if (h->a1 != la || h->b1 != lb) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   380
			len = bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   381
			*(uint32_t *)(encode)     = htonl(al[la].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   382
			*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   383
			*(uint32_t *)(encode + 8) = htonl(len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   384
			memcpy(rb, encode, 12);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   385
			memcpy(rb + 12, bl[lb].l, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   386
			rb += 12 + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   387
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   388
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   389
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   390
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   391
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   392
nomem:
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   393
	free(al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   394
	free(bl);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   395
	free(l.base);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   396
	return result ? result : PyErr_NoMemory();
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   397
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   398
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   399
static char mdiff_doc[] = "Efficient binary diff.";
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   401
static PyMethodDef methods[] = {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   402
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   403
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   404
	{NULL, NULL}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   405
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   406
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   407
#ifdef IS_PY3K
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   408
static struct PyModuleDef bdiff_module = {
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   409
	PyModuleDef_HEAD_INIT,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   410
	"bdiff",
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   411
	mdiff_doc,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   412
	-1,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   413
	methods
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   414
};
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   415
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   416
PyMODINIT_FUNC PyInit_bdiff(void)
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   417
{
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   418
	return PyModule_Create(&bdiff_module);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   419
}
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   420
#else
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   421
PyMODINIT_FUNC initbdiff(void)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   422
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   423
	Py_InitModule3("bdiff", methods, mdiff_doc);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   424
}
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   425
#endif
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   426