Mercurial > hg-stable
annotate mercurial/mpatch.c @ 766:b444a7e053f1
Get addremove to use new walk code.
It is now more verbose than it used to be. If given file names, it
prints nothing, as before. But if given patterns or nothing, it prints
the names of the files it is operating on, to remove that air of mystery.
It also now operates at or below the current directory.
author | Bryan O'Sullivan <bos@serpentine.com> |
---|---|
date | Fri, 22 Jul 2005 19:45:48 -0800 |
parents | e530637ea060 |
children | 681c5c211b92 |
rev | line source |
---|---|
72 | 1 /* |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
17 Copyright 2005 Matt Mackall <mpm@selenic.com> | |
18 | |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <Python.h> | |
24 #include <stdlib.h> | |
25 #include <string.h> | |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
26 #ifdef _WIN32 |
551 | 27 #ifdef _MSC_VER |
28 #define inline __inline | |
29 typedef unsigned long uint32_t; | |
30 #else | |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
31 #include <stdint.h> |
551 | 32 #endif |
411
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
33 static uint32_t ntohl(uint32_t x) |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
34 { |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
35 return ((x & 0x000000ffUL) << 24) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
36 ((x & 0x0000ff00UL) << 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
37 ((x & 0x00ff0000UL) >> 8) | |
9e9f7ab43ce2
Add 'other OS' bits to bdiff.c / style cleanups
mpm@selenic.com
parents:
410
diff
changeset
|
38 ((x & 0xff000000UL) >> 24); |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
39 } |
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
40 #else |
510
7f3fc8fd427e
More fiddling with uint32_t includes for extensions
mpm@selenic.com
parents:
495
diff
changeset
|
41 #include <sys/types.h> |
597
e530637ea060
[PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl
mpm@selenic.com
parents:
553
diff
changeset
|
42 #include <arpa/inet.h> |
410
7c678976df3e
Make mpatch.c compilable under the other `OS'
mpm@selenic.com
parents:
384
diff
changeset
|
43 #endif |
72 | 44 |
45 static char mpatch_doc[] = "Efficient binary patching."; | |
46 | |
47 struct frag { | |
48 int start, end, len; | |
49 char *data; | |
50 }; | |
51 | |
52 struct flist { | |
53 struct frag *base, *head, *tail; | |
54 }; | |
55 | |
56 static struct flist *lalloc(int size) | |
57 { | |
128 | 58 struct flist *a = NULL; |
72 | 59 |
60 a = malloc(sizeof(struct flist)); | |
128 | 61 if (a) { |
62 a->base = malloc(sizeof(struct frag) * size); | |
282 | 63 if (!a->base) { |
128 | 64 free(a); |
282 | 65 a = NULL; |
66 } else | |
128 | 67 a->head = a->tail = a->base; |
68 } | |
72 | 69 return a; |
70 } | |
71 | |
72 static void lfree(struct flist *a) | |
73 { | |
128 | 74 if (a) { |
75 free(a->base); | |
76 free(a); | |
77 } | |
72 | 78 } |
79 | |
80 static int lsize(struct flist *a) | |
81 { | |
82 return a->tail - a->head; | |
83 } | |
84 | |
85 /* move hunks in source that are less cut to dest, compensating | |
86 for changes in offset. the last hunk may be split if necessary. | |
87 */ | |
88 static int gather(struct flist *dest, struct flist *src, int cut, int offset) | |
89 { | |
90 struct frag *d = dest->tail, *s = src->head; | |
91 int postend, c, l; | |
92 | |
93 while (s != src->tail) { | |
94 if (s->start + offset >= cut) | |
82 | 95 break; /* we've gone far enough */ |
72 | 96 |
97 postend = offset + s->start + s->len; | |
98 if (postend <= cut) { | |
99 /* save this hunk */ | |
100 offset += s->start + s->len - s->end; | |
101 *d++ = *s++; | |
102 } | |
103 else { | |
104 /* break up this hunk */ | |
105 c = cut - offset; | |
106 if (s->end < c) | |
107 c = s->end; | |
108 l = cut - offset - s->start; | |
109 if (s->len < l) | |
110 l = s->len; | |
111 | |
112 offset += s->start + l - c; | |
113 | |
114 d->start = s->start; | |
115 d->end = c; | |
116 d->len = l; | |
117 d->data = s->data; | |
118 d++; | |
119 s->start = c; | |
120 s->len = s->len - l; | |
121 s->data = s->data + l; | |
122 | |
82 | 123 break; |
72 | 124 } |
125 } | |
126 | |
127 dest->tail = d; | |
128 src->head = s; | |
129 return offset; | |
130 } | |
131 | |
132 /* like gather, but with no output list */ | |
133 static int discard(struct flist *src, int cut, int offset) | |
134 { | |
135 struct frag *s = src->head; | |
136 int postend, c, l; | |
137 | |
138 while (s != src->tail) { | |
139 if (s->start + offset >= cut) | |
82 | 140 break; |
72 | 141 |
142 postend = offset + s->start + s->len; | |
143 if (postend <= cut) { | |
144 offset += s->start + s->len - s->end; | |
145 s++; | |
146 } | |
147 else { | |
148 c = cut - offset; | |
149 if (s->end < c) | |
150 c = s->end; | |
151 l = cut - offset - s->start; | |
152 if (s->len < l) | |
153 l = s->len; | |
154 | |
155 offset += s->start + l - c; | |
156 s->start = c; | |
157 s->len = s->len - l; | |
158 s->data = s->data + l; | |
159 | |
82 | 160 break; |
72 | 161 } |
162 } | |
163 | |
164 src->head = s; | |
165 return offset; | |
166 } | |
167 | |
168 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
169 this deletes a and b and returns the resultant list. */ | |
170 static struct flist *combine(struct flist *a, struct flist *b) | |
171 { | |
128 | 172 struct flist *c = NULL; |
173 struct frag *bh, *ct; | |
72 | 174 int offset = 0, post; |
175 | |
128 | 176 if (a && b) |
177 c = lalloc((lsize(a) + lsize(b)) * 2); | |
178 | |
179 if (c) { | |
72 | 180 |
128 | 181 for (bh = b->head; bh != b->tail; bh++) { |
182 /* save old hunks */ | |
183 offset = gather(c, a, bh->start, offset); | |
72 | 184 |
128 | 185 /* discard replaced hunks */ |
186 post = discard(a, bh->end, offset); | |
72 | 187 |
128 | 188 /* insert new hunk */ |
189 ct = c->tail; | |
190 ct->start = bh->start - offset; | |
191 ct->end = bh->end - post; | |
192 ct->len = bh->len; | |
193 ct->data = bh->data; | |
194 c->tail++; | |
195 offset = post; | |
196 } | |
197 | |
198 /* hold on to tail from a */ | |
199 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); | |
200 c->tail += lsize(a); | |
72 | 201 } |
202 | |
203 lfree(a); | |
204 lfree(b); | |
205 return c; | |
206 } | |
207 | |
208 /* decode a binary patch into a hunk list */ | |
209 static struct flist *decode(char *bin, int len) | |
210 { | |
211 struct flist *l; | |
212 struct frag *lt; | |
213 char *end = bin + len; | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
214 char decode[12]; /* for dealing with alignment issues */ |
72 | 215 |
216 /* assume worst case size, we won't have many of these lists */ | |
217 l = lalloc(len / 12); | |
218 lt = l->tail; | |
219 | |
220 while (bin < end) { | |
384
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
221 memcpy(decode, bin, 12); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
222 lt->start = ntohl(*(uint32_t *)decode); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
223 lt->end = ntohl(*(uint32_t *)(decode + 4)); |
a29decbf7475
mpatch: attempt to handle unpack alignment issues on Solaris
mpm@selenic.com
parents:
282
diff
changeset
|
224 lt->len = ntohl(*(uint32_t *)(decode + 8)); |
72 | 225 lt->data = bin + 12; |
226 bin += 12 + lt->len; | |
227 lt++; | |
228 } | |
229 | |
230 l->tail = lt; | |
231 return l; | |
232 } | |
233 | |
234 /* calculate the size of resultant text */ | |
235 static int calcsize(int len, struct flist *l) | |
236 { | |
237 int outlen = 0, last = 0; | |
238 struct frag *f = l->head; | |
239 | |
240 while (f != l->tail) { | |
241 outlen += f->start - last; | |
242 last = f->end; | |
243 outlen += f->len; | |
244 f++; | |
245 } | |
246 | |
247 outlen += len - last; | |
248 return outlen; | |
249 } | |
250 | |
251 static void apply(char *buf, char *orig, int len, struct flist *l) | |
252 { | |
253 struct frag *f = l->head; | |
254 int last = 0; | |
255 char *p = buf; | |
256 | |
257 while (f != l->tail) { | |
258 memcpy(p, orig + last, f->start - last); | |
259 p += f->start - last; | |
260 memcpy(p, f->data, f->len); | |
261 last = f->end; | |
262 p += f->len; | |
263 f++; | |
264 } | |
265 memcpy(p, orig + last, len - last); | |
266 } | |
267 | |
268 /* recursively generate a patch of all bins between start and end */ | |
269 static struct flist *fold(PyObject *bins, int start, int end) | |
270 { | |
271 int len; | |
272 | |
273 if (start + 1 == end) { | |
274 /* trivial case, output a decoded list */ | |
275 PyObject *tmp = PyList_GetItem(bins, start); | |
128 | 276 if (!tmp) |
277 return NULL; | |
72 | 278 return decode(PyString_AsString(tmp), PyString_Size(tmp)); |
279 } | |
280 | |
281 /* divide and conquer, memory management is elsewhere */ | |
282 len = (end - start) / 2; | |
283 return combine(fold(bins, start, start + len), | |
284 fold(bins, start + len, end)); | |
285 } | |
286 | |
287 static PyObject * | |
288 patches(PyObject *self, PyObject *args) | |
289 { | |
290 PyObject *text, *bins, *result; | |
291 struct flist *patch; | |
292 char *in, *out; | |
293 int len, outlen; | |
294 | |
128 | 295 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins)) |
72 | 296 return NULL; |
297 | |
298 len = PyList_Size(bins); | |
299 if (!len) { | |
300 /* nothing to do */ | |
301 Py_INCREF(text); | |
302 return text; | |
303 } | |
304 | |
305 patch = fold(bins, 0, len); | |
128 | 306 if (!patch) |
307 return PyErr_NoMemory(); | |
308 | |
72 | 309 outlen = calcsize(PyString_Size(text), patch); |
310 result = PyString_FromStringAndSize(NULL, outlen); | |
128 | 311 if (result) { |
312 in = PyString_AsString(text); | |
313 out = PyString_AsString(result); | |
314 apply(out, in, PyString_Size(text), patch); | |
315 } | |
316 | |
72 | 317 lfree(patch); |
318 return result; | |
319 } | |
320 | |
321 static PyMethodDef methods[] = { | |
322 {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, | |
323 {NULL, NULL} | |
324 }; | |
325 | |
326 PyMODINIT_FUNC | |
327 initmpatch(void) | |
328 { | |
329 Py_InitModule3("mpatch", methods, mpatch_doc); | |
330 } | |
331 |