72
|
1 /*
|
|
2 mpatch.c - efficient binary patching for Mercurial
|
|
3
|
|
4 This implements a patch algorithm that's O(m + nlog n) where m is the
|
|
5 size of the output and n is the number of patches.
|
|
6
|
|
7 Given a list of binary patches, it unpacks each into a hunk list,
|
|
8 then combines the hunk lists with a treewise recursion to form a
|
|
9 single hunk list. This hunk list is then applied to the original
|
|
10 text.
|
|
11
|
|
12 The text (or binary) fragments are copied directly from their source
|
|
13 Python objects into a preallocated output string to avoid the
|
|
14 allocation of intermediate Python objects. Working memory is about 2x
|
|
15 the total number of hunks.
|
|
16
|
|
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
|
|
18
|
|
19 This software may be used and distributed according to the terms
|
|
20 of the GNU General Public License, incorporated herein by reference.
|
|
21 */
|
|
22
|
|
23 #include <Python.h>
|
|
24 #include <stdlib.h>
|
|
25 #include <string.h>
|
|
26 #include <netinet/in.h>
|
|
27 #include <sys/types.h>
|
|
28
|
|
29 static char mpatch_doc[] = "Efficient binary patching.";
|
|
30
|
|
31 struct frag {
|
|
32 int start, end, len;
|
|
33 char *data;
|
|
34 };
|
|
35
|
|
36 struct flist {
|
|
37 struct frag *base, *head, *tail;
|
|
38 };
|
|
39
|
|
40 static struct flist *lalloc(int size)
|
|
41 {
|
128
|
42 struct flist *a = NULL;
|
72
|
43
|
|
44 a = malloc(sizeof(struct flist));
|
128
|
45 if (a) {
|
|
46 a->base = malloc(sizeof(struct frag) * size);
|
282
|
47 if (!a->base) {
|
128
|
48 free(a);
|
282
|
49 a = NULL;
|
|
50 } else
|
128
|
51 a->head = a->tail = a->base;
|
|
52 }
|
72
|
53 return a;
|
|
54 }
|
|
55
|
|
56 static void lfree(struct flist *a)
|
|
57 {
|
128
|
58 if (a) {
|
|
59 free(a->base);
|
|
60 free(a);
|
|
61 }
|
72
|
62 }
|
|
63
|
|
64 static int lsize(struct flist *a)
|
|
65 {
|
|
66 return a->tail - a->head;
|
|
67 }
|
|
68
|
|
69 /* move hunks in source that are less cut to dest, compensating
|
|
70 for changes in offset. the last hunk may be split if necessary.
|
|
71 */
|
|
72 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
|
|
73 {
|
|
74 struct frag *d = dest->tail, *s = src->head;
|
|
75 int postend, c, l;
|
|
76
|
|
77 while (s != src->tail) {
|
|
78 if (s->start + offset >= cut)
|
82
|
79 break; /* we've gone far enough */
|
72
|
80
|
|
81 postend = offset + s->start + s->len;
|
|
82 if (postend <= cut) {
|
|
83 /* save this hunk */
|
|
84 offset += s->start + s->len - s->end;
|
|
85 *d++ = *s++;
|
|
86 }
|
|
87 else {
|
|
88 /* break up this hunk */
|
|
89 c = cut - offset;
|
|
90 if (s->end < c)
|
|
91 c = s->end;
|
|
92 l = cut - offset - s->start;
|
|
93 if (s->len < l)
|
|
94 l = s->len;
|
|
95
|
|
96 offset += s->start + l - c;
|
|
97
|
|
98 d->start = s->start;
|
|
99 d->end = c;
|
|
100 d->len = l;
|
|
101 d->data = s->data;
|
|
102 d++;
|
|
103 s->start = c;
|
|
104 s->len = s->len - l;
|
|
105 s->data = s->data + l;
|
|
106
|
82
|
107 break;
|
72
|
108 }
|
|
109 }
|
|
110
|
|
111 dest->tail = d;
|
|
112 src->head = s;
|
|
113 return offset;
|
|
114 }
|
|
115
|
|
116 /* like gather, but with no output list */
|
|
117 static int discard(struct flist *src, int cut, int offset)
|
|
118 {
|
|
119 struct frag *s = src->head;
|
|
120 int postend, c, l;
|
|
121
|
|
122 while (s != src->tail) {
|
|
123 if (s->start + offset >= cut)
|
82
|
124 break;
|
72
|
125
|
|
126 postend = offset + s->start + s->len;
|
|
127 if (postend <= cut) {
|
|
128 offset += s->start + s->len - s->end;
|
|
129 s++;
|
|
130 }
|
|
131 else {
|
|
132 c = cut - offset;
|
|
133 if (s->end < c)
|
|
134 c = s->end;
|
|
135 l = cut - offset - s->start;
|
|
136 if (s->len < l)
|
|
137 l = s->len;
|
|
138
|
|
139 offset += s->start + l - c;
|
|
140 s->start = c;
|
|
141 s->len = s->len - l;
|
|
142 s->data = s->data + l;
|
|
143
|
82
|
144 break;
|
72
|
145 }
|
|
146 }
|
|
147
|
|
148 src->head = s;
|
|
149 return offset;
|
|
150 }
|
|
151
|
|
152 /* combine hunk lists a and b, while adjusting b for offset changes in a/
|
|
153 this deletes a and b and returns the resultant list. */
|
|
154 static struct flist *combine(struct flist *a, struct flist *b)
|
|
155 {
|
128
|
156 struct flist *c = NULL;
|
|
157 struct frag *bh, *ct;
|
72
|
158 int offset = 0, post;
|
|
159
|
128
|
160 if (a && b)
|
|
161 c = lalloc((lsize(a) + lsize(b)) * 2);
|
|
162
|
|
163 if (c) {
|
72
|
164
|
128
|
165 for (bh = b->head; bh != b->tail; bh++) {
|
|
166 /* save old hunks */
|
|
167 offset = gather(c, a, bh->start, offset);
|
72
|
168
|
128
|
169 /* discard replaced hunks */
|
|
170 post = discard(a, bh->end, offset);
|
72
|
171
|
128
|
172 /* insert new hunk */
|
|
173 ct = c->tail;
|
|
174 ct->start = bh->start - offset;
|
|
175 ct->end = bh->end - post;
|
|
176 ct->len = bh->len;
|
|
177 ct->data = bh->data;
|
|
178 c->tail++;
|
|
179 offset = post;
|
|
180 }
|
|
181
|
|
182 /* hold on to tail from a */
|
|
183 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
|
|
184 c->tail += lsize(a);
|
72
|
185 }
|
|
186
|
|
187 lfree(a);
|
|
188 lfree(b);
|
|
189 return c;
|
|
190 }
|
|
191
|
|
192 /* decode a binary patch into a hunk list */
|
|
193 static struct flist *decode(char *bin, int len)
|
|
194 {
|
|
195 struct flist *l;
|
|
196 struct frag *lt;
|
|
197 char *end = bin + len;
|
|
198
|
|
199 /* assume worst case size, we won't have many of these lists */
|
|
200 l = lalloc(len / 12);
|
|
201 lt = l->tail;
|
|
202
|
|
203 while (bin < end) {
|
|
204 lt->start = ntohl(*(uint32_t *)bin);
|
|
205 lt->end = ntohl(*(uint32_t *)(bin + 4));
|
|
206 lt->len = ntohl(*(uint32_t *)(bin + 8));
|
|
207 lt->data = bin + 12;
|
|
208 bin += 12 + lt->len;
|
|
209 lt++;
|
|
210 }
|
|
211
|
|
212 l->tail = lt;
|
|
213 return l;
|
|
214 }
|
|
215
|
|
216 /* calculate the size of resultant text */
|
|
217 static int calcsize(int len, struct flist *l)
|
|
218 {
|
|
219 int outlen = 0, last = 0;
|
|
220 struct frag *f = l->head;
|
|
221
|
|
222 while (f != l->tail) {
|
|
223 outlen += f->start - last;
|
|
224 last = f->end;
|
|
225 outlen += f->len;
|
|
226 f++;
|
|
227 }
|
|
228
|
|
229 outlen += len - last;
|
|
230 return outlen;
|
|
231 }
|
|
232
|
|
233 static void apply(char *buf, char *orig, int len, struct flist *l)
|
|
234 {
|
|
235 struct frag *f = l->head;
|
|
236 int last = 0;
|
|
237 char *p = buf;
|
|
238
|
|
239 while (f != l->tail) {
|
|
240 memcpy(p, orig + last, f->start - last);
|
|
241 p += f->start - last;
|
|
242 memcpy(p, f->data, f->len);
|
|
243 last = f->end;
|
|
244 p += f->len;
|
|
245 f++;
|
|
246 }
|
|
247 memcpy(p, orig + last, len - last);
|
|
248 }
|
|
249
|
|
250 /* recursively generate a patch of all bins between start and end */
|
|
251 static struct flist *fold(PyObject *bins, int start, int end)
|
|
252 {
|
|
253 int len;
|
|
254
|
|
255 if (start + 1 == end) {
|
|
256 /* trivial case, output a decoded list */
|
|
257 PyObject *tmp = PyList_GetItem(bins, start);
|
128
|
258 if (!tmp)
|
|
259 return NULL;
|
72
|
260 return decode(PyString_AsString(tmp), PyString_Size(tmp));
|
|
261 }
|
|
262
|
|
263 /* divide and conquer, memory management is elsewhere */
|
|
264 len = (end - start) / 2;
|
|
265 return combine(fold(bins, start, start + len),
|
|
266 fold(bins, start + len, end));
|
|
267 }
|
|
268
|
|
269 static PyObject *
|
|
270 patches(PyObject *self, PyObject *args)
|
|
271 {
|
|
272 PyObject *text, *bins, *result;
|
|
273 struct flist *patch;
|
|
274 char *in, *out;
|
|
275 int len, outlen;
|
|
276
|
128
|
277 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
|
72
|
278 return NULL;
|
|
279
|
|
280 len = PyList_Size(bins);
|
|
281 if (!len) {
|
|
282 /* nothing to do */
|
|
283 Py_INCREF(text);
|
|
284 return text;
|
|
285 }
|
|
286
|
|
287 patch = fold(bins, 0, len);
|
128
|
288 if (!patch)
|
|
289 return PyErr_NoMemory();
|
|
290
|
72
|
291 outlen = calcsize(PyString_Size(text), patch);
|
|
292 result = PyString_FromStringAndSize(NULL, outlen);
|
128
|
293 if (result) {
|
|
294 in = PyString_AsString(text);
|
|
295 out = PyString_AsString(result);
|
|
296 apply(out, in, PyString_Size(text), patch);
|
|
297 }
|
|
298
|
72
|
299 lfree(patch);
|
|
300 return result;
|
|
301 }
|
|
302
|
|
303 static PyMethodDef methods[] = {
|
|
304 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
|
|
305 {NULL, NULL}
|
|
306 };
|
|
307
|
|
308 PyMODINIT_FUNC
|
|
309 initmpatch(void)
|
|
310 {
|
|
311 Py_InitModule3("mpatch", methods, mpatch_doc);
|
|
312 }
|
|
313
|