Mercurial > hg
annotate mercurial/mpatch.c @ 33901:f488223a87ab
context: add `decodeddata()` to basefilectx
This will be used as an abstraction by simplemerge to get the data it used to
read off the filesystem.
Differential Revision: https://phab.mercurial-scm.org/D434
author | Phil Cohen <phillco@fb.com> |
---|---|
date | Thu, 24 Aug 2017 21:26:40 -0700 |
parents | 155f0cc3f813 |
children | 347c0f4232e1 |
rev | line source |
---|---|
72 | 1 /* |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
2859 | 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
72 | 18 |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <stdlib.h> | |
24 #include <string.h> | |
2468
1ac0574f1768
mac os x: fixes for 10.2 from chris monson <monpublic@gmail.com>
Vadim Gelfer <vadim.gelfer@gmail.com>
parents:
2083
diff
changeset
|
25 |
29444
284d742e5611
internals: move the bitmanipulation routines into its own file
Maciej Fijalkowski <fijall@gmail.com>
parents:
28782
diff
changeset
|
26 #include "bitmanipulation.h" |
29691
e9a0bcc9314d
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
Maciej Fijalkowski <fijall@gmail.com>
parents:
29444
diff
changeset
|
27 #include "compat.h" |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
28 #include "mpatch.h" |
72 | 29 |
29741
9a1685c70db4
mpatch: change lalloc() to local function
Yuya Nishihara <yuya@tcha.org>
parents:
29740
diff
changeset
|
30 static struct mpatch_flist *lalloc(ssize_t size) |
72 | 31 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
32 struct mpatch_flist *a = NULL; |
72 | 33 |
3138
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
34 if (size < 1) |
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
35 size = 1; |
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
36 |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
37 a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist)); |
128 | 38 if (a) { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
39 a->base = (struct mpatch_frag *)malloc(sizeof(struct mpatch_frag) * size); |
2048
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
40 if (a->base) { |
128 | 41 a->head = a->tail = a->base; |
2048
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
42 return a; |
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
43 } |
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
44 free(a); |
128 | 45 } |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
46 return NULL; |
72 | 47 } |
48 | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
49 void mpatch_lfree(struct mpatch_flist *a) |
72 | 50 { |
128 | 51 if (a) { |
52 free(a->base); | |
53 free(a); | |
54 } | |
72 | 55 } |
56 | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
57 static ssize_t lsize(struct mpatch_flist *a) |
72 | 58 { |
59 return a->tail - a->head; | |
60 } | |
61 | |
62 /* move hunks in source that are less cut to dest, compensating | |
63 for changes in offset. the last hunk may be split if necessary. | |
64 */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
65 static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut, |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
66 int offset) |
72 | 67 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
68 struct mpatch_frag *d = dest->tail, *s = src->head; |
72 | 69 int postend, c, l; |
70 | |
71 while (s != src->tail) { | |
72 if (s->start + offset >= cut) | |
82 | 73 break; /* we've gone far enough */ |
72 | 74 |
75 postend = offset + s->start + s->len; | |
76 if (postend <= cut) { | |
77 /* save this hunk */ | |
78 offset += s->start + s->len - s->end; | |
79 *d++ = *s++; | |
80 } | |
81 else { | |
82 /* break up this hunk */ | |
83 c = cut - offset; | |
84 if (s->end < c) | |
85 c = s->end; | |
86 l = cut - offset - s->start; | |
87 if (s->len < l) | |
88 l = s->len; | |
89 | |
90 offset += s->start + l - c; | |
91 | |
92 d->start = s->start; | |
93 d->end = c; | |
94 d->len = l; | |
95 d->data = s->data; | |
96 d++; | |
97 s->start = c; | |
98 s->len = s->len - l; | |
99 s->data = s->data + l; | |
100 | |
82 | 101 break; |
72 | 102 } |
103 } | |
104 | |
105 dest->tail = d; | |
106 src->head = s; | |
107 return offset; | |
108 } | |
109 | |
110 /* like gather, but with no output list */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
111 static int discard(struct mpatch_flist *src, int cut, int offset) |
72 | 112 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
113 struct mpatch_frag *s = src->head; |
72 | 114 int postend, c, l; |
115 | |
116 while (s != src->tail) { | |
117 if (s->start + offset >= cut) | |
82 | 118 break; |
72 | 119 |
120 postend = offset + s->start + s->len; | |
121 if (postend <= cut) { | |
122 offset += s->start + s->len - s->end; | |
123 s++; | |
124 } | |
125 else { | |
126 c = cut - offset; | |
127 if (s->end < c) | |
128 c = s->end; | |
129 l = cut - offset - s->start; | |
130 if (s->len < l) | |
131 l = s->len; | |
132 | |
133 offset += s->start + l - c; | |
134 s->start = c; | |
135 s->len = s->len - l; | |
136 s->data = s->data + l; | |
137 | |
82 | 138 break; |
72 | 139 } |
140 } | |
141 | |
142 src->head = s; | |
143 return offset; | |
144 } | |
145 | |
146 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
147 this deletes a and b and returns the resultant list. */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
148 static struct mpatch_flist *combine(struct mpatch_flist *a, |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
149 struct mpatch_flist *b) |
72 | 150 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
151 struct mpatch_flist *c = NULL; |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
152 struct mpatch_frag *bh, *ct; |
72 | 153 int offset = 0, post; |
154 | |
128 | 155 if (a && b) |
156 c = lalloc((lsize(a) + lsize(b)) * 2); | |
157 | |
158 if (c) { | |
72 | 159 |
128 | 160 for (bh = b->head; bh != b->tail; bh++) { |
161 /* save old hunks */ | |
162 offset = gather(c, a, bh->start, offset); | |
72 | 163 |
128 | 164 /* discard replaced hunks */ |
165 post = discard(a, bh->end, offset); | |
72 | 166 |
128 | 167 /* insert new hunk */ |
168 ct = c->tail; | |
169 ct->start = bh->start - offset; | |
170 ct->end = bh->end - post; | |
171 ct->len = bh->len; | |
172 ct->data = bh->data; | |
173 c->tail++; | |
174 offset = post; | |
175 } | |
176 | |
177 /* hold on to tail from a */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
178 memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a)); |
128 | 179 c->tail += lsize(a); |
72 | 180 } |
181 | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
182 mpatch_lfree(a); |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
183 mpatch_lfree(b); |
72 | 184 return c; |
185 } | |
186 | |
187 /* decode a binary patch into a hunk list */ | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
188 int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res) |
72 | 189 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
190 struct mpatch_flist *l; |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
191 struct mpatch_frag *lt; |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
192 int pos = 0; |
72 | 193 |
194 /* assume worst case size, we won't have many of these lists */ | |
28656
b6ed2505d6cf
parsers: fix list sizing rounding error (SEC)
Matt Mackall <mpm@selenic.com>
parents:
20167
diff
changeset
|
195 l = lalloc(len / 12 + 1); |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
196 if (!l) |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
197 return MPATCH_ERR_NO_MEM; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
198 |
72 | 199 lt = l->tail; |
200 | |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
201 while (pos >= 0 && pos < len) { |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
202 lt->start = getbe32(bin + pos); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
203 lt->end = getbe32(bin + pos + 4); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
204 lt->len = getbe32(bin + pos + 8); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
205 lt->data = bin + pos + 12; |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
206 pos += 12 + lt->len; |
28657
b9714d958e89
parsers: detect short records (SEC)
Matt Mackall <mpm@selenic.com>
parents:
28656
diff
changeset
|
207 if (lt->start > lt->end || lt->len < 0) |
b9714d958e89
parsers: detect short records (SEC)
Matt Mackall <mpm@selenic.com>
parents:
28656
diff
changeset
|
208 break; /* sanity check */ |
72 | 209 lt++; |
210 } | |
211 | |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
212 if (pos != len) { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
213 mpatch_lfree(l); |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
214 return MPATCH_ERR_CANNOT_BE_DECODED; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
215 } |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
216 |
72 | 217 l->tail = lt; |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
218 *res = l; |
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
219 return 0; |
72 | 220 } |
221 | |
222 /* calculate the size of resultant text */ | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
223 ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l) |
72 | 224 { |
29691
e9a0bcc9314d
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
Maciej Fijalkowski <fijall@gmail.com>
parents:
29444
diff
changeset
|
225 ssize_t outlen = 0, last = 0; |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
226 struct mpatch_frag *f = l->head; |
72 | 227 |
228 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
229 if (f->start < last || f->end > len) { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
230 return MPATCH_ERR_INVALID_PATCH; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
231 } |
72 | 232 outlen += f->start - last; |
233 last = f->end; | |
234 outlen += f->len; | |
235 f++; | |
236 } | |
237 | |
238 outlen += len - last; | |
239 return outlen; | |
240 } | |
241 | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
242 int mpatch_apply(char *buf, const char *orig, ssize_t len, |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
243 struct mpatch_flist *l) |
72 | 244 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
245 struct mpatch_frag *f = l->head; |
72 | 246 int last = 0; |
247 char *p = buf; | |
248 | |
249 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
250 if (f->start < last || f->end > len) { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
251 return MPATCH_ERR_INVALID_PATCH; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
252 } |
72 | 253 memcpy(p, orig + last, f->start - last); |
254 p += f->start - last; | |
255 memcpy(p, f->data, f->len); | |
256 last = f->end; | |
257 p += f->len; | |
258 f++; | |
259 } | |
260 memcpy(p, orig + last, len - last); | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
261 return 0; |
72 | 262 } |
263 | |
264 /* recursively generate a patch of all bins between start and end */ | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
265 struct mpatch_flist *mpatch_fold(void *bins, |
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
266 struct mpatch_flist* (*get_next_item)(void*, ssize_t), |
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
267 ssize_t start, ssize_t end) |
72 | 268 { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
269 ssize_t len; |
72 | 270 |
271 if (start + 1 == end) { | |
272 /* trivial case, output a decoded list */ | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
273 return get_next_item(bins, start); |
72 | 274 } |
275 | |
276 /* divide and conquer, memory management is elsewhere */ | |
277 len = (end - start) / 2; | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
278 return combine(mpatch_fold(bins, get_next_item, start, start + len), |
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
279 mpatch_fold(bins, get_next_item, start + len, end)); |
72 | 280 } |