Mercurial > hg
annotate mercurial/bdiff.c @ 12866:eddc20306ab6 stable
encoding: default ambiguous character to narrow
The current implementation of colwidth was treating 'A'mbiguous
characters as wide, which was incorrect in a non-East Asian context.
As per http://unicode.org/reports/tr11/#Recommendations, we should
instead default to 'narrow' if we don't know better. As character
width is dependent on the particular font used and we have no idea
what fonts are in use, this recommendation applies.
This introduces HGENCODINGAMBIGUOUS to get the old behavior back.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Wed, 27 Oct 2010 15:35:21 -0500 |
parents | 0044193a1c45 |
children | faee0ffbc24b |
rev | line source |
---|---|
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
1 /* |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
2 bdiff.c - efficient binary diff extension for Mercurial |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
3 |
2859 | 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
5 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
6 This software may be used and distributed according to the terms of |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
7 the GNU General Public License, incorporated herein by reference. |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
8 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
9 Based roughly on Python difflib |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
10 */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
11 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
12 #include <Python.h> |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
13 #include <stdlib.h> |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
14 #include <string.h> |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
15 #include <limits.h> |
867
0cd2ee61b10a
Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents:
839
diff
changeset
|
16 |
3561
8c617d48564a
add AIX to the list of compilers that don't have inline keyword
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
3369
diff
changeset
|
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
18 #define inline |
2600
c4325f0a9b91
clean up trailing white space.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents:
2577
diff
changeset
|
19 #endif |
1759
5afd459db177
Sunpro compiler patch
Fabian Otto <sigsegv@alchiba.ni.cs.tu-berlin.de>
parents:
1542
diff
changeset
|
20 |
8858
16f6c13706df
bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents:
7625
diff
changeset
|
21 #ifdef __linux |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
22 #define inline __inline |
8858
16f6c13706df
bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents:
7625
diff
changeset
|
23 #endif |
16f6c13706df
bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall <mpm@selenic.com>
parents:
7625
diff
changeset
|
24 |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
25 #ifdef _WIN32 |
551 | 26 #ifdef _MSC_VER |
27 #define inline __inline | |
28 typedef unsigned long uint32_t; | |
29 #else | |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
30 #include <stdint.h> |
551 | 31 #endif |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
32 static uint32_t htonl(uint32_t x) |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
33 { |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
34 return ((x & 0x000000ffUL) << 24) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
35 ((x & 0x0000ff00UL) << 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
36 ((x & 0x00ff0000UL) >> 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
37 ((x & 0xff000000UL) >> 24); |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
38 } |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
39 #else |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
40 #include <sys/types.h> |
7036
bfad9865b1dc
allow Mercurial to compile on Haiku
Scott McCreary <scottmc2@gmail.com>
parents:
5620
diff
changeset
|
41 #if defined __BEOS__ && !defined __HAIKU__ |
4073
95ffa36d1d2a
BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents:
3561
diff
changeset
|
42 #include <ByteOrder.h> |
95ffa36d1d2a
BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents:
3561
diff
changeset
|
43 #else |
597
e530637ea060
[PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents:
553
diff
changeset
|
44 #include <arpa/inet.h> |
4073
95ffa36d1d2a
BeOS compatibility support
Andrew Bachmann <andrewbachmann@gmail.com>
parents:
3561
diff
changeset
|
45 #endif |
2543
860e9c83fc59
Include inttypes.h instead of stdint.h (fixes issue299)
Thomas Arendsen Hein <thomas@intevation.de>
parents:
2483
diff
changeset
|
46 #include <inttypes.h> |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
400
diff
changeset
|
47 #endif |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
48 |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
49 #include "util.h" |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
50 |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
51 struct line { |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
52 int h, len, n, e; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
53 const char *l; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
54 }; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
55 |
474 | 56 struct pos { |
57 int pos, len; | |
58 }; | |
59 | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
60 struct hunk { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
61 int a1, a2, b1, b2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
62 }; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
63 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
64 struct hunklist { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
65 struct hunk *base, *head; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
66 }; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
67 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
68 int splitlines(const char *a, int len, struct line **lr) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
69 { |
5342 | 70 int h, i; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
71 const char *p, *b = a; |
5340
5737845fd974
bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents:
5339
diff
changeset
|
72 const char * const plast = a + len - 1; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
73 struct line *l; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
74 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
75 /* count the lines */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
76 i = 1; /* extra line for sentinel */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
77 for (p = a; p < a + len; p++) |
5340
5737845fd974
bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents:
5339
diff
changeset
|
78 if (*p == '\n' || p == plast) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
79 i++; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
80 |
1978
10606ee61107
do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents:
1759
diff
changeset
|
81 *lr = l = (struct line *)malloc(sizeof(struct line) * i); |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
82 if (!l) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
83 return -1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
84 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
85 /* build the line array and calculate hashes */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
86 h = 0; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
87 for (p = a; p < a + len; p++) { |
5342 | 88 /* Leonid Yuriev's hash */ |
7187
a79d3f535809
spaces->tabs in one line of a C extension for consistency
Thomas Arendsen Hein <thomas@intevation.de>
parents:
7036
diff
changeset
|
89 h = (h * 1664525) + *p + 1013904223; |
5342 | 90 |
5340
5737845fd974
bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents:
5339
diff
changeset
|
91 if (*p == '\n' || p == plast) { |
5342 | 92 l->h = h; |
93 h = 0; | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
94 l->len = p - b + 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
95 l->l = b; |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
96 l->n = INT_MAX; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
97 l++; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
98 b = p + 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
99 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
100 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
101 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
102 /* set up a sentinel */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
103 l->h = l->len = 0; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
104 l->l = a + len; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
105 return i - 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
106 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
107 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
108 int inline cmp(struct line *a, struct line *b) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
109 { |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
110 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
111 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
112 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
113 static int equatelines(struct line *a, int an, struct line *b, int bn) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
114 { |
5452
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
115 int i, j, buckets = 1, t, scale; |
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
116 struct pos *h = NULL; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
117 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
118 /* build a hash table of the next highest power of 2 */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
119 while (buckets < bn + 1) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
120 buckets *= 2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
121 |
5339
058e93c3d07d
I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents:
4134
diff
changeset
|
122 /* try to allocate a large hash table to avoid collisions */ |
5452
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
123 for (scale = 4; scale; scale /= 2) { |
5339
058e93c3d07d
I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents:
4134
diff
changeset
|
124 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); |
5452
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
125 if (h) |
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
126 break; |
82b4ff3abbcd
bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents:
5342
diff
changeset
|
127 } |
5339
058e93c3d07d
I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents:
4134
diff
changeset
|
128 |
474 | 129 if (!h) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
130 return 0; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
131 |
5339
058e93c3d07d
I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents:
4134
diff
changeset
|
132 buckets = buckets * scale - 1; |
058e93c3d07d
I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents:
4134
diff
changeset
|
133 |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
134 /* clear the hash table */ |
474 | 135 for (i = 0; i <= buckets; i++) { |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
136 h[i].pos = INT_MAX; |
474 | 137 h[i].len = 0; |
138 } | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
139 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
140 /* add lines to the hash table chains */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
141 for (i = bn - 1; i >= 0; i--) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
142 /* find the equivalence class */ |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
143 for (j = b[i].h & buckets; h[j].pos != INT_MAX; |
474 | 144 j = (j + 1) & buckets) |
145 if (!cmp(b + i, b + h[j].pos)) | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
146 break; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
147 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
148 /* add to the head of the equivalence class */ |
474 | 149 b[i].n = h[j].pos; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
150 b[i].e = j; |
474 | 151 h[j].pos = i; |
152 h[j].len++; /* keep track of popularity */ | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
153 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
154 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
155 /* compute popularity threshold */ |
9534
8e202431d620
bdiff: gradually enable the popularity hack
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
8858
diff
changeset
|
156 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1); |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
157 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
158 /* match items in a to their equivalence class in b */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
159 for (i = 0; i < an; i++) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
160 /* find the equivalence class */ |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
161 for (j = a[i].h & buckets; h[j].pos != INT_MAX; |
474 | 162 j = (j + 1) & buckets) |
163 if (!cmp(a + i, b + h[j].pos)) | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
164 break; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
165 |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
166 a[i].e = j; /* use equivalence class for quick compare */ |
1542
8e80eefb3de6
made C src formatting more consistent
twaldmann@thinkmo.de
parents:
1397
diff
changeset
|
167 if (h[j].len <= t) |
474 | 168 a[i].n = h[j].pos; /* point to head of match list */ |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
169 else |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
170 a[i].n = INT_MAX; /* too popular */ |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
171 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
172 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
173 /* discard hash tables */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
174 free(h); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
175 return 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
176 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
177 |
474 | 178 static int longest_match(struct line *a, struct line *b, struct pos *pos, |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
179 int a1, int a2, int b1, int b2, int *omi, int *omj) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
180 { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
181 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
182 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
183 for (i = a1; i < a2; i++) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
184 /* skip things before the current block */ |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
185 for (j = a[i].n; j < b1; j = b[j].n) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
186 ; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
187 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
188 /* loop through all lines match a[i] in b */ |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
189 for (; j < b2; j = b[j].n) { |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
190 /* does this extend an earlier match? */ |
474 | 191 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1) |
192 k = pos[j - 1].len + 1; | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
193 else |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
194 k = 1; |
474 | 195 pos[j].pos = i; |
196 pos[j].len = k; | |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
197 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
198 /* best match so far? */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
199 if (k > mk) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
200 mi = i; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
201 mj = j; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
202 mk = k; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
203 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
204 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
205 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
206 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
207 if (mk) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
208 mi = mi - mk + 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
209 mj = mj - mk + 1; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
210 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
211 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
212 /* expand match to include neighboring popular lines */ |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
213 while (mi - mb > a1 && mj - mb > b1 && |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
214 a[mi - mb - 1].e == b[mj - mb - 1].e) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
215 mb++; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
216 while (mi + mk < a2 && mj + mk < b2 && |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
217 a[mi + mk].e == b[mj + mk].e) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
218 mk++; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
219 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
220 *omi = mi - mb; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
221 *omj = mj - mb; |
5341
458acf92b49e
bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents:
5340
diff
changeset
|
222 |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
223 return mk + mb; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
224 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
225 |
474 | 226 static void recurse(struct line *a, struct line *b, struct pos *pos, |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
227 int a1, int a2, int b1, int b2, struct hunklist *l) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
228 { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
229 int i, j, k; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
230 |
10500
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
231 while (1) { |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
232 /* find the longest match in this chunk */ |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
233 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
234 if (!k) |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
235 return; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
236 |
10500
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
237 /* and recurse on the remaining chunks on either side */ |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
238 recurse(a, b, pos, a1, i, b1, j, l); |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
239 l->head->a1 = i; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
240 l->head->a2 = i + k; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
241 l->head->b1 = j; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
242 l->head->b2 = j + k; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
243 l->head++; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
244 /* tail-recursion didn't happen, so doing equivalent iteration */ |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
245 a1 = i + k; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
246 b1 = j + k; |
e96597c8d0ea
bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents:
10282
diff
changeset
|
247 } |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
248 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
249 |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
250 static struct hunklist diff(struct line *a, int an, struct line *b, int bn) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
251 { |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
252 struct hunklist l; |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
253 struct hunk *curr; |
474 | 254 struct pos *pos; |
255 int t; | |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
256 |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
257 /* allocate and fill arrays */ |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
258 t = equatelines(a, an, b, bn); |
5571 | 259 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos)); |
827
a61728b58dc0
Fix array overflow bug in bdiff
"Wallace, Eric S" <eric.s.wallace@intel.com>
parents:
597
diff
changeset
|
260 /* we can't have more matches than lines in the shorter file */ |
1978
10606ee61107
do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents:
1759
diff
changeset
|
261 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) * |
10606ee61107
do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents:
1759
diff
changeset
|
262 ((an<bn ? an:bn) + 1)); |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
263 |
474 | 264 if (pos && l.base && t) { |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
265 /* generate the matching block list */ |
474 | 266 recurse(a, b, pos, 0, an, 0, bn, &l); |
4131
1ca664c964e0
don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents:
3561
diff
changeset
|
267 l.head->a1 = l.head->a2 = an; |
1ca664c964e0
don't return uninitialized memory from bdiff.blocks()
Erling Ellingsen <erlingalf@gmail.com>
parents:
3561
diff
changeset
|
268 l.head->b1 = l.head->b2 = bn; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
269 l.head++; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
270 } |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
271 |
474 | 272 free(pos); |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
273 |
7625
930a2be7e875
bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7189
diff
changeset
|
274 /* normalize the hunk list, try to push each hunk towards the end */ |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
275 for (curr = l.base; curr != l.head; curr++) { |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
276 struct hunk *next = curr + 1; |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
277 int shift = 0; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
278 |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
279 if (next == l.head) |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
280 break; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
281 |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
282 if (curr->a2 == next->a1) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
283 while (curr->a2 + shift < an && curr->b2 + shift < bn |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
284 && !cmp(a + curr->a2 + shift, |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
285 b + curr->b2 + shift)) |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
286 shift++; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
287 else if (curr->b2 == next->b1) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
288 while (curr->b2 + shift < bn && curr->a2 + shift < an |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
289 && !cmp(b + curr->b2 + shift, |
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
9534
diff
changeset
|
290 a + curr->a2 + shift)) |
7104
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
291 shift++; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
292 if (!shift) |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
293 continue; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
294 curr->b2 += shift; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
295 next->b1 += shift; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
296 curr->a2 += shift; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
297 next->a1 += shift; |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
298 } |
9514cbb6e4f6
bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
7036
diff
changeset
|
299 |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
300 return l; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
301 } |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
302 |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
303 static PyObject *blocks(PyObject *self, PyObject *args) |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
304 { |
435 | 305 PyObject *sa, *sb, *rl = NULL, *m; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
306 struct line *a, *b; |
970 | 307 struct hunklist l = {NULL, NULL}; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
308 struct hunk *h; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
309 int an, bn, pos = 0; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
310 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
311 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
312 return NULL; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
313 |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
314 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
315 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
316 |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
317 if (!a || !b) |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
318 goto nomem; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
319 |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
320 l = diff(a, an, b, bn); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
321 rl = PyList_New(l.head - l.base); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
322 if (!l.head || !rl) |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
323 goto nomem; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
324 |
1542
8e80eefb3de6
made C src formatting more consistent
twaldmann@thinkmo.de
parents:
1397
diff
changeset
|
325 for (h = l.base; h != l.head; h++) { |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
326 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
327 PyList_SetItem(rl, pos, m); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
328 pos++; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
329 } |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
330 |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
331 nomem: |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
332 free(a); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
333 free(b); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
334 free(l.base); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
335 return rl ? rl : PyErr_NoMemory(); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
336 } |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
337 |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
338 static PyObject *bdiff(PyObject *self, PyObject *args) |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
339 { |
3335
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
340 char *sa, *sb; |
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
341 PyObject *result = NULL; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
342 struct line *al, *bl; |
970 | 343 struct hunklist l = {NULL, NULL}; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
344 struct hunk *h; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
345 char encode[12], *rb; |
3335
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
346 int an, bn, len = 0, la, lb; |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
347 |
3369
4bad632913d8
python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents:
3335
diff
changeset
|
348 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb)) |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
349 return NULL; |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
350 |
3335
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
351 an = splitlines(sa, la, &al); |
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
352 bn = splitlines(sb, lb, &bl); |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
353 if (!al || !bl) |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
354 goto nomem; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
355 |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
356 l = diff(al, an, bl, bn); |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
357 if (!l.head) |
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
358 goto nomem; |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
359 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
360 /* calculate length of output */ |
3335
9061613c1593
Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents:
2859
diff
changeset
|
361 la = lb = 0; |
1542
8e80eefb3de6
made C src formatting more consistent
twaldmann@thinkmo.de
parents:
1397
diff
changeset
|
362 for (h = l.base; h != l.head; h++) { |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
363 if (h->a1 != la || h->b1 != lb) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
364 len += 12 + bl[h->b1].l - bl[lb].l; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
365 la = h->a2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
366 lb = h->b2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
367 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
368 |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
369 result = PyBytes_FromStringAndSize(NULL, len); |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
370 |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
371 if (!result) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
372 goto nomem; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
373 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
374 /* build binary patch */ |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
375 rb = PyBytes_AsString(result); |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
376 la = lb = 0; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
377 |
1542
8e80eefb3de6
made C src formatting more consistent
twaldmann@thinkmo.de
parents:
1397
diff
changeset
|
378 for (h = l.base; h != l.head; h++) { |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
379 if (h->a1 != la || h->b1 != lb) { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
380 len = bl[h->b1].l - bl[lb].l; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
381 *(uint32_t *)(encode) = htonl(al[la].l - al->l); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
382 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
383 *(uint32_t *)(encode + 8) = htonl(len); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
384 memcpy(rb, encode, 12); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
385 memcpy(rb + 12, bl[lb].l, len); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
386 rb += 12 + len; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
387 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
388 la = h->a2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
389 lb = h->b2; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
390 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
391 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
392 nomem: |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
393 free(al); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
394 free(bl); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
395 free(l.base); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
396 return result ? result : PyErr_NoMemory(); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
397 } |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
398 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
399 static char mdiff_doc[] = "Efficient binary diff."; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
400 |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
401 static PyMethodDef methods[] = { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
402 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
433
79c694462294
Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents:
411
diff
changeset
|
403 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
404 {NULL, NULL} |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
405 }; |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
406 |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
407 #ifdef IS_PY3K |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
408 static struct PyModuleDef bdiff_module = { |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
409 PyModuleDef_HEAD_INIT, |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
410 "bdiff", |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
411 mdiff_doc, |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
412 -1, |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
413 methods |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
414 }; |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
415 |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
416 PyMODINIT_FUNC PyInit_bdiff(void) |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
417 { |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
418 return PyModule_Create(&bdiff_module); |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
419 } |
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
420 #else |
400
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
421 PyMODINIT_FUNC initbdiff(void) |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
422 { |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
423 Py_InitModule3("bdiff", methods, mdiff_doc); |
8b067bde6679
Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff
changeset
|
424 } |
11364
0044193a1c45
bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10500
diff
changeset
|
425 #endif |
1542
8e80eefb3de6
made C src formatting more consistent
twaldmann@thinkmo.de
parents:
1397
diff
changeset
|
426 |