Mercurial > hg
comparison mercurial/mpatch.c @ 72:4a6ab4d80dc4
Add an O(m + nlog n) patching extension
author | mpm@selenic.com |
---|---|
date | Mon, 16 May 2005 22:08:33 -0800 |
parents | |
children | 7ed96baa7caa |
comparison
equal
deleted
inserted
replaced
71:47c9a869adee | 72:4a6ab4d80dc4 |
---|---|
1 /* | |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
17 Copyright 2005 Matt Mackall <mpm@selenic.com> | |
18 | |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <Python.h> | |
24 #include <stdlib.h> | |
25 #include <string.h> | |
26 #include <netinet/in.h> | |
27 #include <sys/types.h> | |
28 | |
29 static char mpatch_doc[] = "Efficient binary patching."; | |
30 | |
31 struct frag { | |
32 int start, end, len; | |
33 char *data; | |
34 }; | |
35 | |
36 struct flist { | |
37 struct frag *base, *head, *tail; | |
38 }; | |
39 | |
40 static struct flist *lalloc(int size) | |
41 { | |
42 struct flist *a; | |
43 | |
44 a = malloc(sizeof(struct flist)); | |
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size); | |
46 return a; | |
47 } | |
48 | |
49 static void lfree(struct flist *a) | |
50 { | |
51 free(a->base); | |
52 free(a); | |
53 } | |
54 | |
55 static int lsize(struct flist *a) | |
56 { | |
57 return a->tail - a->head; | |
58 } | |
59 | |
60 /* move hunks in source that are less cut to dest, compensating | |
61 for changes in offset. the last hunk may be split if necessary. | |
62 */ | |
63 static int gather(struct flist *dest, struct flist *src, int cut, int offset) | |
64 { | |
65 struct frag *d = dest->tail, *s = src->head; | |
66 int postend, c, l; | |
67 | |
68 while (s != src->tail) { | |
69 if (s->start + offset >= cut) | |
70 goto exit; /* we've gone far enough */ | |
71 | |
72 postend = offset + s->start + s->len; | |
73 if (postend <= cut) { | |
74 /* save this hunk */ | |
75 offset += s->start + s->len - s->end; | |
76 *d++ = *s++; | |
77 } | |
78 else { | |
79 /* break up this hunk */ | |
80 c = cut - offset; | |
81 if (s->end < c) | |
82 c = s->end; | |
83 l = cut - offset - s->start; | |
84 if (s->len < l) | |
85 l = s->len; | |
86 | |
87 offset += s->start + l - c; | |
88 | |
89 d->start = s->start; | |
90 d->end = c; | |
91 d->len = l; | |
92 d->data = s->data; | |
93 d++; | |
94 s->start = c; | |
95 s->len = s->len - l; | |
96 s->data = s->data + l; | |
97 | |
98 goto exit; | |
99 } | |
100 } | |
101 | |
102 exit: | |
103 dest->tail = d; | |
104 src->head = s; | |
105 return offset; | |
106 } | |
107 | |
108 /* like gather, but with no output list */ | |
109 static int discard(struct flist *src, int cut, int offset) | |
110 { | |
111 struct frag *s = src->head; | |
112 int postend, c, l; | |
113 | |
114 while (s != src->tail) { | |
115 if (s->start + offset >= cut) | |
116 goto exit; | |
117 | |
118 postend = offset + s->start + s->len; | |
119 if (postend <= cut) { | |
120 offset += s->start + s->len - s->end; | |
121 s++; | |
122 } | |
123 else { | |
124 c = cut - offset; | |
125 if (s->end < c) | |
126 c = s->end; | |
127 l = cut - offset - s->start; | |
128 if (s->len < l) | |
129 l = s->len; | |
130 | |
131 offset += s->start + l - c; | |
132 s->start = c; | |
133 s->len = s->len - l; | |
134 s->data = s->data + l; | |
135 | |
136 goto exit; | |
137 } | |
138 } | |
139 | |
140 exit: | |
141 src->head = s; | |
142 return offset; | |
143 } | |
144 | |
145 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
146 this deletes a and b and returns the resultant list. */ | |
147 static struct flist *combine(struct flist *a, struct flist *b) | |
148 { | |
149 struct flist *c; | |
150 struct frag *bh = b->head, *ct; | |
151 int offset = 0, post; | |
152 | |
153 c = lalloc((lsize(a) + lsize(b)) * 2); | |
154 | |
155 while (bh != b->tail) { | |
156 /* save old hunks */ | |
157 offset = gather(c, a, bh->start, offset); | |
158 | |
159 /* discard replaced hunks */ | |
160 post = discard(a, bh->end, offset); | |
161 | |
162 /* insert new hunk */ | |
163 ct = c->tail; | |
164 ct->start = bh->start - offset; | |
165 ct->end = bh->end - post; | |
166 ct->len = bh->len; | |
167 ct->data = bh->data; | |
168 c->tail++; | |
169 bh++; | |
170 offset = post; | |
171 } | |
172 | |
173 /* hold on to tail from a */ | |
174 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); | |
175 c->tail += lsize(a); | |
176 lfree(a); | |
177 lfree(b); | |
178 return c; | |
179 } | |
180 | |
181 /* decode a binary patch into a hunk list */ | |
182 static struct flist *decode(char *bin, int len) | |
183 { | |
184 struct flist *l; | |
185 struct frag *lt; | |
186 char *end = bin + len; | |
187 | |
188 /* assume worst case size, we won't have many of these lists */ | |
189 l = lalloc(len / 12); | |
190 lt = l->tail; | |
191 | |
192 while (bin < end) { | |
193 lt->start = ntohl(*(uint32_t *)bin); | |
194 lt->end = ntohl(*(uint32_t *)(bin + 4)); | |
195 lt->len = ntohl(*(uint32_t *)(bin + 8)); | |
196 lt->data = bin + 12; | |
197 bin += 12 + lt->len; | |
198 lt++; | |
199 } | |
200 | |
201 l->tail = lt; | |
202 return l; | |
203 } | |
204 | |
205 /* calculate the size of resultant text */ | |
206 static int calcsize(int len, struct flist *l) | |
207 { | |
208 int outlen = 0, last = 0; | |
209 struct frag *f = l->head; | |
210 | |
211 while (f != l->tail) { | |
212 outlen += f->start - last; | |
213 last = f->end; | |
214 outlen += f->len; | |
215 f++; | |
216 } | |
217 | |
218 outlen += len - last; | |
219 return outlen; | |
220 } | |
221 | |
222 static void apply(char *buf, char *orig, int len, struct flist *l) | |
223 { | |
224 struct frag *f = l->head; | |
225 int last = 0; | |
226 char *p = buf; | |
227 | |
228 while (f != l->tail) { | |
229 memcpy(p, orig + last, f->start - last); | |
230 p += f->start - last; | |
231 memcpy(p, f->data, f->len); | |
232 last = f->end; | |
233 p += f->len; | |
234 f++; | |
235 } | |
236 memcpy(p, orig + last, len - last); | |
237 } | |
238 | |
239 /* recursively generate a patch of all bins between start and end */ | |
240 static struct flist *fold(PyObject *bins, int start, int end) | |
241 { | |
242 int len; | |
243 | |
244 if (start + 1 == end) { | |
245 /* trivial case, output a decoded list */ | |
246 PyObject *tmp = PyList_GetItem(bins, start); | |
247 return decode(PyString_AsString(tmp), PyString_Size(tmp)); | |
248 } | |
249 | |
250 /* divide and conquer, memory management is elsewhere */ | |
251 len = (end - start) / 2; | |
252 return combine(fold(bins, start, start + len), | |
253 fold(bins, start + len, end)); | |
254 } | |
255 | |
256 static PyObject * | |
257 patches(PyObject *self, PyObject *args) | |
258 { | |
259 PyObject *text, *bins, *result; | |
260 struct flist *patch; | |
261 char *in, *out; | |
262 int len, outlen; | |
263 | |
264 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins)) | |
265 return NULL; | |
266 | |
267 len = PyList_Size(bins); | |
268 if (!len) { | |
269 /* nothing to do */ | |
270 Py_INCREF(text); | |
271 return text; | |
272 } | |
273 | |
274 patch = fold(bins, 0, len); | |
275 outlen = calcsize(PyString_Size(text), patch); | |
276 result = PyString_FromStringAndSize(NULL, outlen); | |
277 in = PyString_AsString(text); | |
278 out = PyString_AsString(result); | |
279 apply(out, in, PyString_Size(text), patch); | |
280 lfree(patch); | |
281 | |
282 return result; | |
283 } | |
284 | |
285 static PyMethodDef methods[] = { | |
286 {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, | |
287 {NULL, NULL} | |
288 }; | |
289 | |
290 PyMODINIT_FUNC | |
291 initmpatch(void) | |
292 { | |
293 Py_InitModule3("mpatch", methods, mpatch_doc); | |
294 } | |
295 |