Mercurial > hg
annotate mercurial/mpatch.c @ 464:50da4bb9cab6
Merge from http://moffetthome.net:8012/
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Merge from http://moffetthome.net:8012/
manifest hash: 3a67864af6b0276eabe640274633ca2625dbd4a5
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
iD8DBQFCvQKkywK+sNU5EO8RArUHAKCSGtpEfJwYGoAIpj4mBDvcPted9wCgnzML
xr8WQ8DdPkJ9DVsLjvsbhJU=
=O7Fd
-----END PGP SIGNATURE-----
author | mpm@selenic.com |
---|---|
date | Fri, 24 Jun 2005 23:07:16 -0800 |
parents | 688d03d6997a ea93402b81b9 |
children | aa3d592df9b9 |
rev | line source |
---|---|
72 | 1 /* |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
17 Copyright 2005 Matt Mackall <mpm@selenic.com> | |
18 | |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <Python.h> | |
24 #include <stdlib.h> | |
25 #include <string.h> | |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
26 #ifdef _WIN32 |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
27 |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
28 typedef unsigned long uint32_t; |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
29 |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
30 static uint32_t ntohl(uint32_t x) |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
31 { |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
32 return ((x & 0x000000ffUL) << 24) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
33 ((x & 0x0000ff00UL) << 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
34 ((x & 0x00ff0000UL) >> 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
35 ((x & 0xff000000UL) >> 24); |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
36 } |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
37 |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
38 #else |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
39 #include <netinet/in.h> |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
40 #include <sys/types.h> |
463
ea93402b81b9
Added stdint.h include to fix build on Mac OS X Tiger [v10.4]
kyle@zeus.moffetthome.net
parents:
429
diff
changeset
|
41 #include <stdint.h> |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
42 #endif |
72 | 43 |
44 static char mpatch_doc[] = "Efficient binary patching."; | |
45 | |
46 struct frag { | |
47 int start, end, len; | |
48 char *data; | |
49 }; | |
50 | |
51 struct flist { | |
52 struct frag *base, *head, *tail; | |
53 }; | |
54 | |
55 static struct flist *lalloc(int size) | |
56 { | |
128 | 57 struct flist *a = NULL; |
72 | 58 |
59 a = malloc(sizeof(struct flist)); | |
128 | 60 if (a) { |
61 a->base = malloc(sizeof(struct frag) * size); | |
282 | 62 if (!a->base) { |
128 | 63 free(a); |
282 | 64 a = NULL; |
65 } else | |
128 | 66 a->head = a->tail = a->base; |
67 } | |
72 | 68 return a; |
69 } | |
70 | |
71 static void lfree(struct flist *a) | |
72 { | |
128 | 73 if (a) { |
74 free(a->base); | |
75 free(a); | |
76 } | |
72 | 77 } |
78 | |
79 static int lsize(struct flist *a) | |
80 { | |
81 return a->tail - a->head; | |
82 } | |
83 | |
84 /* move hunks in source that are less cut to dest, compensating | |
85 for changes in offset. the last hunk may be split if necessary. | |
86 */ | |
87 static int gather(struct flist *dest, struct flist *src, int cut, int offset) | |
88 { | |
89 struct frag *d = dest->tail, *s = src->head; | |
90 int postend, c, l; | |
91 | |
92 while (s != src->tail) { | |
93 if (s->start + offset >= cut) | |
82 | 94 break; /* we've gone far enough */ |
72 | 95 |
96 postend = offset + s->start + s->len; | |
97 if (postend <= cut) { | |
98 /* save this hunk */ | |
99 offset += s->start + s->len - s->end; | |
100 *d++ = *s++; | |
101 } | |
102 else { | |
103 /* break up this hunk */ | |
104 c = cut - offset; | |
105 if (s->end < c) | |
106 c = s->end; | |
107 l = cut - offset - s->start; | |
108 if (s->len < l) | |
109 l = s->len; | |
110 | |
111 offset += s->start + l - c; | |
112 | |
113 d->start = s->start; | |
114 d->end = c; | |
115 d->len = l; | |
116 d->data = s->data; | |
117 d++; | |
118 s->start = c; | |
119 s->len = s->len - l; | |
120 s->data = s->data + l; | |
121 | |
82 | 122 break; |
72 | 123 } |
124 } | |
125 | |
126 dest->tail = d; | |
127 src->head = s; | |
128 return offset; | |
129 } | |
130 | |
131 /* like gather, but with no output list */ | |
132 static int discard(struct flist *src, int cut, int offset) | |
133 { | |
134 struct frag *s = src->head; | |
135 int postend, c, l; | |
136 | |
137 while (s != src->tail) { | |
138 if (s->start + offset >= cut) | |
82 | 139 break; |
72 | 140 |
141 postend = offset + s->start + s->len; | |
142 if (postend <= cut) { | |
143 offset += s->start + s->len - s->end; | |
144 s++; | |
145 } | |
146 else { | |
147 c = cut - offset; | |
148 if (s->end < c) | |
149 c = s->end; | |
150 l = cut - offset - s->start; | |
151 if (s->len < l) | |
152 l = s->len; | |
153 | |
154 offset += s->start + l - c; | |
155 s->start = c; | |
156 s->len = s->len - l; | |
157 s->data = s->data + l; | |
158 | |
82 | 159 break; |
72 | 160 } |
161 } | |
162 | |
163 src->head = s; | |
164 return offset; | |
165 } | |
166 | |
167 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
168 this deletes a and b and returns the resultant list. */ | |
169 static struct flist *combine(struct flist *a, struct flist *b) | |
170 { | |
128 | 171 struct flist *c = NULL; |
172 struct frag *bh, *ct; | |
72 | 173 int offset = 0, post; |
174 | |
128 | 175 if (a && b) |
176 c = lalloc((lsize(a) + lsize(b)) * 2); | |
177 | |
178 if (c) { | |
72 | 179 |
128 | 180 for (bh = b->head; bh != b->tail; bh++) { |
181 /* save old hunks */ | |
182 offset = gather(c, a, bh->start, offset); | |
72 | 183 |
128 | 184 /* discard replaced hunks */ |
185 post = discard(a, bh->end, offset); | |
72 | 186 |
128 | 187 /* insert new hunk */ |
188 ct = c->tail; | |
189 ct->start = bh->start - offset; | |
190 ct->end = bh->end - post; | |
191 ct->len = bh->len; | |
192 ct->data = bh->data; | |
193 c->tail++; | |
194 offset = post; | |
195 } | |
196 | |
197 /* hold on to tail from a */ | |
198 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); | |
199 c->tail += lsize(a); | |
72 | 200 } |
201 | |
202 lfree(a); | |
203 lfree(b); | |
204 return c; | |
205 } | |
206 | |
207 /* decode a binary patch into a hunk list */ | |
208 static struct flist *decode(char *bin, int len) | |
209 { | |
210 struct flist *l; | |
211 struct frag *lt; | |
212 char *end = bin + len; | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
213 char decode[12]; /* for dealing with alignment issues */ |
72 | 214 |
215 /* assume worst case size, we won't have many of these lists */ | |
216 l = lalloc(len / 12); | |
217 lt = l->tail; | |
218 | |
219 while (bin < end) { | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
220 memcpy(decode, bin, 12); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
221 lt->start = ntohl(*(uint32_t *)decode); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
222 lt->end = ntohl(*(uint32_t *)(decode + 4)); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
223 lt->len = ntohl(*(uint32_t *)(decode + 8)); |
72 | 224 lt->data = bin + 12; |
225 bin += 12 + lt->len; | |
226 lt++; | |
227 } | |
228 | |
229 l->tail = lt; | |
230 return l; | |
231 } | |
232 | |
233 /* calculate the size of resultant text */ | |
234 static int calcsize(int len, struct flist *l) | |
235 { | |
236 int outlen = 0, last = 0; | |
237 struct frag *f = l->head; | |
238 | |
239 while (f != l->tail) { | |
240 outlen += f->start - last; | |
241 last = f->end; | |
242 outlen += f->len; | |
243 f++; | |
244 } | |
245 | |
246 outlen += len - last; | |
247 return outlen; | |
248 } | |
249 | |
250 static void apply(char *buf, char *orig, int len, struct flist *l) | |
251 { | |
252 struct frag *f = l->head; | |
253 int last = 0; | |
254 char *p = buf; | |
255 | |
256 while (f != l->tail) { | |
257 memcpy(p, orig + last, f->start - last); | |
258 p += f->start - last; | |
259 memcpy(p, f->data, f->len); | |
260 last = f->end; | |
261 p += f->len; | |
262 f++; | |
263 } | |
264 memcpy(p, orig + last, len - last); | |
265 } | |
266 | |
267 /* recursively generate a patch of all bins between start and end */ | |
268 static struct flist *fold(PyObject *bins, int start, int end) | |
269 { | |
270 int len; | |
271 | |
272 if (start + 1 == end) { | |
273 /* trivial case, output a decoded list */ | |
274 PyObject *tmp = PyList_GetItem(bins, start); | |
128 | 275 if (!tmp) |
276 return NULL; | |
72 | 277 return decode(PyString_AsString(tmp), PyString_Size(tmp)); |
278 } | |
279 | |
280 /* divide and conquer, memory management is elsewhere */ | |
281 len = (end - start) / 2; | |
282 return combine(fold(bins, start, start + len), | |
283 fold(bins, start + len, end)); | |
284 } | |
285 | |
286 static PyObject * | |
287 patches(PyObject *self, PyObject *args) | |
288 { | |
289 PyObject *text, *bins, *result; | |
290 struct flist *patch; | |
291 char *in, *out; | |
292 int len, outlen; | |
293 | |
128 | 294 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins)) |
72 | 295 return NULL; |
296 | |
297 len = PyList_Size(bins); | |
298 if (!len) { | |
299 /* nothing to do */ | |
300 Py_INCREF(text); | |
301 return text; | |
302 } | |
303 | |
304 patch = fold(bins, 0, len); | |
128 | 305 if (!patch) |
306 return PyErr_NoMemory(); | |
307 | |
72 | 308 outlen = calcsize(PyString_Size(text), patch); |
309 result = PyString_FromStringAndSize(NULL, outlen); | |
128 | 310 if (result) { |
311 in = PyString_AsString(text); | |
312 out = PyString_AsString(result); | |
313 apply(out, in, PyString_Size(text), patch); | |
314 } | |
315 | |
72 | 316 lfree(patch); |
317 return result; | |
318 } | |
319 | |
320 static PyMethodDef methods[] = { | |
321 {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, | |
322 {NULL, NULL} | |
323 }; | |
324 | |
325 PyMODINIT_FUNC | |
326 initmpatch(void) | |
327 { | |
328 Py_InitModule3("mpatch", methods, mpatch_doc); | |
329 } | |
330 |