annotate mercurial/bdiff.c @ 9485:7d6ac5d7917c

test-gendoc: add tests for all languages This ensures that we catch errors in the reST syntax early and for all languages. The only change needed in gendoc.py was to correct the computation of section underlines for Asian languages.
author Martin Geisler <mg@lazybytes.net>
date Sun, 27 Sep 2009 10:12:02 +0200
parents 16f6c13706df
children 8e202431d620
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
1 /*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
2 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
3
2859
345bac2bc4ec update copyrights.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2600
diff changeset
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
6 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
7 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
9 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
10 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
11
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
12 #include <Python.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
13 #include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
14 #include <string.h>
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
15 #include <limits.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
16
3561
8c617d48564a add AIX to the list of compilers that don't have inline keyword
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 3369
diff changeset
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
18 # define inline
2600
c4325f0a9b91 clean up trailing white space.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2577
diff changeset
19 #endif
1759
5afd459db177 Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents: 1542
diff changeset
20
8858
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
21 #ifdef __linux
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
22 # define inline __inline
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
23 #endif
16f6c13706df bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents: 7625
diff changeset
24
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
25 #ifdef _WIN32
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
26 #ifdef _MSC_VER
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
27 #define inline __inline
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
28 typedef unsigned long uint32_t;
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
29 #else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
30 #include <stdint.h>
551
b460a2fd8bb7 [PATCH] bdiff/mpatch under MSVC
mpm@selenic.com
parents: 510
diff changeset
31 #endif
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
32 static uint32_t htonl(uint32_t x)
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
33 {
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
34 return ((x & 0x000000ffUL) << 24) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
35 ((x & 0x0000ff00UL) << 8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
36 ((x & 0x00ff0000UL) >> 8) |
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
37 ((x & 0xff000000UL) >> 24);
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
38 }
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
39 #else
510
7f3fc8fd427e More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents: 495
diff changeset
40 #include <sys/types.h>
7036
bfad9865b1dc allow Mercurial to compile on Haiku
Scott McCreary <scottmc2@gmail.com>
parents: 5620
diff changeset
41 #if defined __BEOS__ && !defined __HAIKU__
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
42 #include <ByteOrder.h>
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
43 #else
597
e530637ea060 [PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents: 553
diff changeset
44 #include <arpa/inet.h>
4073
95ffa36d1d2a BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents: 3561
diff changeset
45 #endif
2543
860e9c83fc59 Include inttypes.h instead of stdint.h (fixes issue299)
Thomas Arendsen Hein <thomas@intevation.de>
parents: 2483
diff changeset
46 #include <inttypes.h>
411
9e9f7ab43ce2 Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents: 400
diff changeset
47 #endif
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
48
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
49 struct line {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
50 int h, len, n, e;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
51 const char *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
52 };
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
53
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
54 struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
55 int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
56 };
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
57
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
58 struct hunk {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
59 int a1, a2, b1, b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
60 };
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
61
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
62 struct hunklist {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
63 struct hunk *base, *head;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
64 };
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
65
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
66 int splitlines(const char *a, int len, struct line **lr)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
67 {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
68 int h, i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
69 const char *p, *b = a;
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
70 const char * const plast = a + len - 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
71 struct line *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
72
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
73 /* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
74 i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
75 for (p = a; p < a + len; p++)
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
76 if (*p == '\n' || p == plast)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
77 i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
78
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
79 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
80 if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
81 return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
82
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
83 /* build the line array and calculate hashes */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
84 h = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
85 for (p = a; p < a + len; p++) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
86 /* Leonid Yuriev's hash */
7187
a79d3f535809 spaces->tabs in one line of a C extension for consistency
Thomas Arendsen Hein <thomas@intevation.de>
parents: 7036
diff changeset
87 h = (h * 1664525) + *p + 1013904223;
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
88
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
89 if (*p == '\n' || p == plast) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
90 l->h = h;
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
91 h = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
92 l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
93 l->l = b;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
94 l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
95 l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
96 b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
97 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
98 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
99
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
100 /* set up a sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
101 l->h = l->len = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
102 l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
103 return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
104 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
105
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
106 int inline cmp(struct line *a, struct line *b)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
107 {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
108 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
109 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
110
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
111 static int equatelines(struct line *a, int an, struct line *b, int bn)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
112 {
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
113 int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
114 struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
115
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
116 /* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
117 while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
118 buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
119
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
120 /* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
121 for (scale = 4; scale; scale /= 2) {
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
122 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
123 if (h)
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
124 break;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
125 }
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
126
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
127 if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
128 return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
129
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
130 buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
131
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
132 /* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
133 for (i = 0; i <= buckets; i++) {
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
134 h[i].pos = INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
135 h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
136 }
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
137
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
138 /* add lines to the hash table chains */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
139 for (i = bn - 1; i >= 0; i--) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
140 /* find the equivalence class */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
141 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
142 j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
143 if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
144 break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
145
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
146 /* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
147 b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
148 b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
149 h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
150 h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
151 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
152
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
153 /* compute popularity threshold */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
154 t = (bn >= 4000) ? bn / 1000 : bn + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
155
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
156 /* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
157 for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
158 /* find the equivalence class */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
159 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
160 j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
161 if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
162 break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
163
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
164 a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
165 if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
166 a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
167 else
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
168 a[i].n = INT_MAX; /* too popular */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
169 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
170
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
171 /* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
172 free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
173 return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
174 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
175
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
176 static int longest_match(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
177 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
178 {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
179 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
180
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
181 for (i = a1; i < a2; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
182 /* skip things before the current block */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
183 for (j = a[i].n; j < b1; j = b[j].n)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
184 ;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
185
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
186 /* loop through all lines match a[i] in b */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
187 for (; j < b2; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
188 /* does this extend an earlier match? */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
189 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
190 k = pos[j - 1].len + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
191 else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
192 k = 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
193 pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
194 pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
195
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
196 /* best match so far? */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
197 if (k > mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
198 mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
199 mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
200 mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
201 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
202 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
203 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
204
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
205 if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
206 mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
207 mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
208 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
209
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
210 /* expand match to include neighboring popular lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
211 while (mi - mb > a1 && mj - mb > b1 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
212 a[mi - mb - 1].e == b[mj - mb - 1].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
213 mb++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
214 while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
215 a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
216 mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
217
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
218 *omi = mi - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
219 *omj = mj - mb;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
220
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
221 return mk + mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
222 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
223
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
224 static void recurse(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
225 int a1, int a2, int b1, int b2, struct hunklist *l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
226 {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
227 int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
228
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
229 /* find the longest match in this chunk */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
230 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
231 if (!k)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
232 return;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
233
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
234 /* and recurse on the remaining chunks on either side */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
235 recurse(a, b, pos, a1, i, b1, j, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
236 l->head->a1 = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
237 l->head->a2 = i + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
238 l->head->b1 = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
239 l->head->b2 = j + k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
240 l->head++;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
241 recurse(a, b, pos, i + k, a2, j + k, b2, l);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
242 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
243
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
244 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
245 {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
246 struct hunklist l;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
247 struct hunk *curr;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
248 struct pos *pos;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
249 int t;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
250
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
251 /* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
252 t = equatelines(a, an, b, bn);
5571
f84bb2e1cc3a fix calloc(0, ...) issue
Jim Hague <jim.hague@acm.org>
parents: 5452
diff changeset
253 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
827
a61728b58dc0 Fix array overflow bug in bdiff
"Wallace, Eric S" <eric.s.wallace@intel.com>
parents: 597
diff changeset
254 /* we can't have more matches than lines in the shorter file */
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
255 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
256 ((an<bn ? an:bn) + 1));
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
257
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
258 if (pos && l.base && t) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
259 /* generate the matching block list */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
260 recurse(a, b, pos, 0, an, 0, bn, &l);
4131
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3561
diff changeset
261 l.head->a1 = l.head->a2 = an;
1ca664c964e0 don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents: 3561
diff changeset
262 l.head->b1 = l.head->b2 = bn;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
263 l.head++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
264 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
265
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
266 free(pos);
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
267
7625
930a2be7e875 bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7189
diff changeset
268 /* normalize the hunk list, try to push each hunk towards the end */
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
269 for (curr = l.base; curr != l.head; curr++) {
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
270 struct hunk *next = curr+1;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
271 int shift = 0;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
272
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
273 if (next == l.head)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
274 break;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
275
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
276 if (curr->a2 == next->a1)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
277 while (curr->a2+shift < an && curr->b2+shift < bn
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
278 && !cmp(a+curr->a2+shift, b+curr->b2+shift))
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
279 shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
280 else if (curr->b2 == next->b1)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
281 while (curr->b2+shift < bn && curr->a2+shift < an
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
282 && !cmp(b+curr->b2+shift, a+curr->a2+shift))
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
283 shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
284 if (!shift)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
285 continue;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
286 curr->b2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
287 next->b1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
288 curr->a2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
289 next->a1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
290 }
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
291
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
292 return l;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
293 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
294
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
295 static PyObject *blocks(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
296 {
435
e731d25ddab2 Fix a compile warning for bdiff
mpm@selenic.com
parents: 433
diff changeset
297 PyObject *sa, *sb, *rl = NULL, *m;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
298 struct line *a, *b;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
299 struct hunklist l = {NULL, NULL};
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
300 struct hunk *h;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
301 int an, bn, pos = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
302
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
303 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
304 return NULL;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
305
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
306 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
307 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
308 if (!a || !b)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
309 goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
310
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
311 l = diff(a, an, b, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
312 rl = PyList_New(l.head - l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
313 if (!l.head || !rl)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
314 goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
315
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
316 for (h = l.base; h != l.head; h++) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
317 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
318 PyList_SetItem(rl, pos, m);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
319 pos++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
320 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
321
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
322 nomem:
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
323 free(a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
324 free(b);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
325 free(l.base);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
326 return rl ? rl : PyErr_NoMemory();
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
327 }
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
328
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
329 static PyObject *bdiff(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
330 {
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
331 char *sa, *sb;
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
332 PyObject *result = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
333 struct line *al, *bl;
970
bf375e93073b Fix possible unitialized variable warnings
mpm@selenic.com
parents: 896
diff changeset
334 struct hunklist l = {NULL, NULL};
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
335 struct hunk *h;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
336 char encode[12], *rb;
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
337 int an, bn, len = 0, la, lb;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
338
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3335
diff changeset
339 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
340 return NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
341
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
342 an = splitlines(sa, la, &al);
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
343 bn = splitlines(sb, lb, &bl);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
344 if (!al || !bl)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
345 goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
346
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
347 l = diff(al, an, bl, bn);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
348 if (!l.head)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
349 goto nomem;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
350
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
351 /* calculate length of output */
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
352 la = lb = 0;
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
353 for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
354 if (h->a1 != la || h->b1 != lb)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
355 len += 12 + bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
356 la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
357 lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
358 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
359
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
360 result = PyString_FromStringAndSize(NULL, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
361 if (!result)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
362 goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
363
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
364 /* build binary patch */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
365 rb = PyString_AsString(result);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
366 la = lb = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
367
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
368 for (h = l.base; h != l.head; h++) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
369 if (h->a1 != la || h->b1 != lb) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
370 len = bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
371 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
372 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
373 *(uint32_t *)(encode + 8) = htonl(len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
374 memcpy(rb, encode, 12);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
375 memcpy(rb + 12, bl[lb].l, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
376 rb += 12 + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
377 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
378 la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
379 lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
380 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
381
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
382 nomem:
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
383 free(al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
384 free(bl);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
385 free(l.base);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
386 return result ? result : PyErr_NoMemory();
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
387 }
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
388
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
389 static char mdiff_doc[] = "Efficient binary diff.";
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
390
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
391 static PyMethodDef methods[] = {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
392 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
393 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
394 {NULL, NULL}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
395 };
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
396
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
397 PyMODINIT_FUNC initbdiff(void)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
398 {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
399 Py_InitModule3("bdiff", methods, mdiff_doc);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
400 }
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
401