Mercurial > hg
annotate mercurial/mpatch.c @ 1794:98b6c1cad58b
only write the dirstate when something changed
author | Benoit Boissinot <benoit.boissinot@ens-lyon.org> |
---|---|
date | Thu, 23 Feb 2006 02:17:08 +0100 |
parents | 299c3e26ee45 |
children | 10606ee61107 |
rev | line source |
---|---|
72 | 1 /* |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
17 Copyright 2005 Matt Mackall <mpm@selenic.com> | |
18 | |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <Python.h> | |
24 #include <stdlib.h> | |
25 #include <string.h> | |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
26 #ifdef _WIN32 |
551 | 27 #ifdef _MSC_VER |
28 #define inline __inline | |
29 typedef unsigned long uint32_t; | |
30 #else | |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
31 #include <stdint.h> |
551 | 32 #endif |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
33 static uint32_t ntohl(uint32_t x) |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
34 { |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
35 return ((x & 0x000000ffUL) << 24) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
36 ((x & 0x0000ff00UL) << 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
37 ((x & 0x00ff0000UL) >> 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
38 ((x & 0xff000000UL) >> 24); |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
39 } |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
40 #else |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
41 #include <sys/types.h> |
597
e530637ea060
[PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents:
553
diff
changeset
|
42 #include <arpa/inet.h> |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
43 #endif |
72 | 44 |
45 static char mpatch_doc[] = "Efficient binary patching."; | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
46 static PyObject *mpatch_Error; |
72 | 47 |
48 struct frag { | |
49 int start, end, len; | |
50 char *data; | |
51 }; | |
52 | |
53 struct flist { | |
54 struct frag *base, *head, *tail; | |
55 }; | |
56 | |
57 static struct flist *lalloc(int size) | |
58 { | |
128 | 59 struct flist *a = NULL; |
72 | 60 |
61 a = malloc(sizeof(struct flist)); | |
128 | 62 if (a) { |
63 a->base = malloc(sizeof(struct frag) * size); | |
282 | 64 if (!a->base) { |
128 | 65 free(a); |
282 | 66 a = NULL; |
67 } else | |
128 | 68 a->head = a->tail = a->base; |
1746
299c3e26ee45
Fixed misleading indentation in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1722
diff
changeset
|
69 return a; |
128 | 70 } |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
71 if (!PyErr_Occurred()) |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
72 PyErr_NoMemory(); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
73 return NULL; |
72 | 74 } |
75 | |
76 static void lfree(struct flist *a) | |
77 { | |
128 | 78 if (a) { |
79 free(a->base); | |
80 free(a); | |
81 } | |
72 | 82 } |
83 | |
84 static int lsize(struct flist *a) | |
85 { | |
86 return a->tail - a->head; | |
87 } | |
88 | |
89 /* move hunks in source that are less cut to dest, compensating | |
90 for changes in offset. the last hunk may be split if necessary. | |
91 */ | |
92 static int gather(struct flist *dest, struct flist *src, int cut, int offset) | |
93 { | |
94 struct frag *d = dest->tail, *s = src->head; | |
95 int postend, c, l; | |
96 | |
97 while (s != src->tail) { | |
98 if (s->start + offset >= cut) | |
82 | 99 break; /* we've gone far enough */ |
72 | 100 |
101 postend = offset + s->start + s->len; | |
102 if (postend <= cut) { | |
103 /* save this hunk */ | |
104 offset += s->start + s->len - s->end; | |
105 *d++ = *s++; | |
106 } | |
107 else { | |
108 /* break up this hunk */ | |
109 c = cut - offset; | |
110 if (s->end < c) | |
111 c = s->end; | |
112 l = cut - offset - s->start; | |
113 if (s->len < l) | |
114 l = s->len; | |
115 | |
116 offset += s->start + l - c; | |
117 | |
118 d->start = s->start; | |
119 d->end = c; | |
120 d->len = l; | |
121 d->data = s->data; | |
122 d++; | |
123 s->start = c; | |
124 s->len = s->len - l; | |
125 s->data = s->data + l; | |
126 | |
82 | 127 break; |
72 | 128 } |
129 } | |
130 | |
131 dest->tail = d; | |
132 src->head = s; | |
133 return offset; | |
134 } | |
135 | |
136 /* like gather, but with no output list */ | |
137 static int discard(struct flist *src, int cut, int offset) | |
138 { | |
139 struct frag *s = src->head; | |
140 int postend, c, l; | |
141 | |
142 while (s != src->tail) { | |
143 if (s->start + offset >= cut) | |
82 | 144 break; |
72 | 145 |
146 postend = offset + s->start + s->len; | |
147 if (postend <= cut) { | |
148 offset += s->start + s->len - s->end; | |
149 s++; | |
150 } | |
151 else { | |
152 c = cut - offset; | |
153 if (s->end < c) | |
154 c = s->end; | |
155 l = cut - offset - s->start; | |
156 if (s->len < l) | |
157 l = s->len; | |
158 | |
159 offset += s->start + l - c; | |
160 s->start = c; | |
161 s->len = s->len - l; | |
162 s->data = s->data + l; | |
163 | |
82 | 164 break; |
72 | 165 } |
166 } | |
167 | |
168 src->head = s; | |
169 return offset; | |
170 } | |
171 | |
172 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
173 this deletes a and b and returns the resultant list. */ | |
174 static struct flist *combine(struct flist *a, struct flist *b) | |
175 { | |
128 | 176 struct flist *c = NULL; |
177 struct frag *bh, *ct; | |
72 | 178 int offset = 0, post; |
179 | |
128 | 180 if (a && b) |
181 c = lalloc((lsize(a) + lsize(b)) * 2); | |
182 | |
183 if (c) { | |
72 | 184 |
128 | 185 for (bh = b->head; bh != b->tail; bh++) { |
186 /* save old hunks */ | |
187 offset = gather(c, a, bh->start, offset); | |
72 | 188 |
128 | 189 /* discard replaced hunks */ |
190 post = discard(a, bh->end, offset); | |
72 | 191 |
128 | 192 /* insert new hunk */ |
193 ct = c->tail; | |
194 ct->start = bh->start - offset; | |
195 ct->end = bh->end - post; | |
196 ct->len = bh->len; | |
197 ct->data = bh->data; | |
198 c->tail++; | |
199 offset = post; | |
200 } | |
201 | |
202 /* hold on to tail from a */ | |
203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); | |
204 c->tail += lsize(a); | |
72 | 205 } |
206 | |
207 lfree(a); | |
208 lfree(b); | |
209 return c; | |
210 } | |
211 | |
212 /* decode a binary patch into a hunk list */ | |
213 static struct flist *decode(char *bin, int len) | |
214 { | |
215 struct flist *l; | |
216 struct frag *lt; | |
217 char *end = bin + len; | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
218 char decode[12]; /* for dealing with alignment issues */ |
72 | 219 |
220 /* assume worst case size, we won't have many of these lists */ | |
221 l = lalloc(len / 12); | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
222 if (!l) |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
223 return NULL; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
224 |
72 | 225 lt = l->tail; |
226 | |
227 while (bin < end) { | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
228 memcpy(decode, bin, 12); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
229 lt->start = ntohl(*(uint32_t *)decode); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
230 lt->end = ntohl(*(uint32_t *)(decode + 4)); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
231 lt->len = ntohl(*(uint32_t *)(decode + 8)); |
72 | 232 lt->data = bin + 12; |
233 bin += 12 + lt->len; | |
234 lt++; | |
235 } | |
236 | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
237 if (bin != end) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
238 if (!PyErr_Occurred()) |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded"); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
240 lfree(l); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
241 return NULL; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
242 } |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
243 |
72 | 244 l->tail = lt; |
245 return l; | |
246 } | |
247 | |
248 /* calculate the size of resultant text */ | |
249 static int calcsize(int len, struct flist *l) | |
250 { | |
251 int outlen = 0, last = 0; | |
252 struct frag *f = l->head; | |
253 | |
254 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
255 if (f->start < last || f->end > len) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
256 if (!PyErr_Occurred()) |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
257 PyErr_SetString(mpatch_Error, |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
258 "invalid patch"); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
259 return -1; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
260 } |
72 | 261 outlen += f->start - last; |
262 last = f->end; | |
263 outlen += f->len; | |
264 f++; | |
265 } | |
266 | |
267 outlen += len - last; | |
268 return outlen; | |
269 } | |
270 | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
271 static int apply(char *buf, char *orig, int len, struct flist *l) |
72 | 272 { |
273 struct frag *f = l->head; | |
274 int last = 0; | |
275 char *p = buf; | |
276 | |
277 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
278 if (f->start < last || f->end > len) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
279 if (!PyErr_Occurred()) |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
280 PyErr_SetString(mpatch_Error, |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
281 "invalid patch"); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
282 return 0; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
283 } |
72 | 284 memcpy(p, orig + last, f->start - last); |
285 p += f->start - last; | |
286 memcpy(p, f->data, f->len); | |
287 last = f->end; | |
288 p += f->len; | |
289 f++; | |
290 } | |
291 memcpy(p, orig + last, len - last); | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
292 return 1; |
72 | 293 } |
294 | |
295 /* recursively generate a patch of all bins between start and end */ | |
296 static struct flist *fold(PyObject *bins, int start, int end) | |
297 { | |
298 int len; | |
299 | |
300 if (start + 1 == end) { | |
301 /* trivial case, output a decoded list */ | |
302 PyObject *tmp = PyList_GetItem(bins, start); | |
128 | 303 if (!tmp) |
304 return NULL; | |
72 | 305 return decode(PyString_AsString(tmp), PyString_Size(tmp)); |
306 } | |
307 | |
308 /* divide and conquer, memory management is elsewhere */ | |
309 len = (end - start) / 2; | |
310 return combine(fold(bins, start, start + len), | |
311 fold(bins, start + len, end)); | |
312 } | |
313 | |
314 static PyObject * | |
315 patches(PyObject *self, PyObject *args) | |
316 { | |
317 PyObject *text, *bins, *result; | |
318 struct flist *patch; | |
319 char *in, *out; | |
320 int len, outlen; | |
321 | |
128 | 322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins)) |
72 | 323 return NULL; |
324 | |
325 len = PyList_Size(bins); | |
326 if (!len) { | |
327 /* nothing to do */ | |
328 Py_INCREF(text); | |
329 return text; | |
330 } | |
331 | |
332 patch = fold(bins, 0, len); | |
128 | 333 if (!patch) |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
334 return NULL; |
128 | 335 |
72 | 336 outlen = calcsize(PyString_Size(text), patch); |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
337 if (outlen < 0) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
338 result = NULL; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
339 goto cleanup; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
340 } |
72 | 341 result = PyString_FromStringAndSize(NULL, outlen); |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
342 if (!result) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
343 result = NULL; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
344 goto cleanup; |
128 | 345 } |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
346 in = PyString_AsString(text); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
347 out = PyString_AsString(result); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
348 if (!apply(out, in, PyString_Size(text), patch)) { |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
349 Py_DECREF(result); |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
350 result = NULL; |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
351 } |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
352 cleanup: |
72 | 353 lfree(patch); |
354 return result; | |
355 } | |
356 | |
357 static PyMethodDef methods[] = { | |
358 {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, | |
359 {NULL, NULL} | |
360 }; | |
361 | |
362 PyMODINIT_FUNC | |
363 initmpatch(void) | |
364 { | |
365 Py_InitModule3("mpatch", methods, mpatch_doc); | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
366 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL); |
72 | 367 } |
368 |