Mercurial > hg
annotate mercurial/mpatch.c @ 35616:706aa203b396
fileset: add a lightweight file filtering language
This patch was inspired by one that Jun Wu authored for the fb-experimental
repo, to avoid using matcher for efficiency[1]. We want a way to specify what
files will be converted to LFS at commit time. And per discussion, we also want
to specify what files to skip, text diff, or merge in another config option.
The current `lfs.threshold` config option could not satisfy complex needs. I'm
putting it in a core package because Augie floated the idea of also using it for
narrow and sparse.
Yuya suggested farming out to fileset.parse(), which added support for more
symbols. The only fileset element not supported here is 'negate'. (List isn't
supported by filesets either.) I also changed the 'always' token to the 'all()'
predicate for consistency, and introduced 'none()' to improve readability in a
future tracked file based config. The extension operator was changed from '.'
to '**', to match how recursive path globs are specified. Finally, I changed
the path matcher from '/' to 'path:' at Yuya's suggestion, for consistency with
matcher. Unfortunately, ':' is currently reserved in filesets, so this has to
be quoted to be processed as a string instead of a symbol[2]. We should
probably revisit that, because it's seriously ugly. But it's only used by an
experimental extension, and I think using a file based config for LFS may drive
some more tweaks, so I'm settling for this for now.
I reserved all of the glob characters in fileset except '.' and '_' for the
extension test because those are likely valid extension characters.
Sample filter settings:
all() # everything
size(">20MB") # larger than 20MB
!**.txt # except for .txt files
**.zip | **.tar.gz | **.7z # some types of compressed files
"path:bin" # files under "bin" in the project root
[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109387.html
[2] https://www.mercurial-scm.org/pipermail/mercurial-devel/2018-January/109729.html
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Wed, 10 Jan 2018 22:23:34 -0500 |
parents | 1f4249c764f1 |
children | 90a274965de7 |
rev | line source |
---|---|
72 | 1 /* |
2 mpatch.c - efficient binary patching for Mercurial | |
3 | |
4 This implements a patch algorithm that's O(m + nlog n) where m is the | |
5 size of the output and n is the number of patches. | |
6 | |
7 Given a list of binary patches, it unpacks each into a hunk list, | |
8 then combines the hunk lists with a treewise recursion to form a | |
9 single hunk list. This hunk list is then applied to the original | |
10 text. | |
11 | |
12 The text (or binary) fragments are copied directly from their source | |
13 Python objects into a preallocated output string to avoid the | |
14 allocation of intermediate Python objects. Working memory is about 2x | |
15 the total number of hunks. | |
16 | |
2859 | 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
72 | 18 |
19 This software may be used and distributed according to the terms | |
20 of the GNU General Public License, incorporated herein by reference. | |
21 */ | |
22 | |
23 #include <stdlib.h> | |
24 #include <string.h> | |
2468
1ac0574f1768
mac os x: fixes for 10.2 from chris monson <monpublic@gmail.com>
Vadim Gelfer <vadim.gelfer@gmail.com>
parents:
2083
diff
changeset
|
25 |
29444
284d742e5611
internals: move the bitmanipulation routines into its own file
Maciej Fijalkowski <fijall@gmail.com>
parents:
28782
diff
changeset
|
26 #include "bitmanipulation.h" |
29691
e9a0bcc9314d
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
Maciej Fijalkowski <fijall@gmail.com>
parents:
29444
diff
changeset
|
27 #include "compat.h" |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
28 #include "mpatch.h" |
72 | 29 |
29741
9a1685c70db4
mpatch: change lalloc() to local function
Yuya Nishihara <yuya@tcha.org>
parents:
29740
diff
changeset
|
30 static struct mpatch_flist *lalloc(ssize_t size) |
72 | 31 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
32 struct mpatch_flist *a = NULL; |
72 | 33 |
3138
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
34 if (size < 1) |
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
35 size = 1; |
cc856c4d91ca
mpatch: Fix for malloc corner case on AIX
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
36 |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
37 a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist)); |
128 | 38 if (a) { |
34633
347c0f4232e1
mpatch: re-wrap wide line with clang-format
Augie Fackler <augie@google.com>
parents:
29749
diff
changeset
|
39 a->base = (struct mpatch_frag *)malloc( |
347c0f4232e1
mpatch: re-wrap wide line with clang-format
Augie Fackler <augie@google.com>
parents:
29749
diff
changeset
|
40 sizeof(struct mpatch_frag) * size); |
2048
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
41 if (a->base) { |
128 | 42 a->head = a->tail = a->base; |
2048
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
43 return a; |
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
44 } |
8f9660c568b8
Set correct exception for another possible malloc error in mpatch.c
Thomas Arendsen Hein <thomas@intevation.de>
parents:
1978
diff
changeset
|
45 free(a); |
128 | 46 } |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
47 return NULL; |
72 | 48 } |
49 | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
50 void mpatch_lfree(struct mpatch_flist *a) |
72 | 51 { |
128 | 52 if (a) { |
53 free(a->base); | |
54 free(a); | |
55 } | |
72 | 56 } |
57 | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
58 static ssize_t lsize(struct mpatch_flist *a) |
72 | 59 { |
60 return a->tail - a->head; | |
61 } | |
62 | |
63 /* move hunks in source that are less cut to dest, compensating | |
64 for changes in offset. the last hunk may be split if necessary. | |
65 */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
66 static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut, |
34800
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
67 int offset) |
72 | 68 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
69 struct mpatch_frag *d = dest->tail, *s = src->head; |
72 | 70 int postend, c, l; |
71 | |
72 while (s != src->tail) { | |
73 if (s->start + offset >= cut) | |
82 | 74 break; /* we've gone far enough */ |
72 | 75 |
76 postend = offset + s->start + s->len; | |
77 if (postend <= cut) { | |
78 /* save this hunk */ | |
79 offset += s->start + s->len - s->end; | |
80 *d++ = *s++; | |
34634
2e08b69bcd29
mpatch: reflow two oddly formatted else blocks with clang-format
Augie Fackler <augie@google.com>
parents:
34633
diff
changeset
|
81 } else { |
72 | 82 /* break up this hunk */ |
83 c = cut - offset; | |
84 if (s->end < c) | |
85 c = s->end; | |
86 l = cut - offset - s->start; | |
87 if (s->len < l) | |
88 l = s->len; | |
89 | |
90 offset += s->start + l - c; | |
91 | |
92 d->start = s->start; | |
93 d->end = c; | |
94 d->len = l; | |
95 d->data = s->data; | |
96 d++; | |
97 s->start = c; | |
98 s->len = s->len - l; | |
99 s->data = s->data + l; | |
100 | |
82 | 101 break; |
72 | 102 } |
103 } | |
104 | |
105 dest->tail = d; | |
106 src->head = s; | |
107 return offset; | |
108 } | |
109 | |
110 /* like gather, but with no output list */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
111 static int discard(struct mpatch_flist *src, int cut, int offset) |
72 | 112 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
113 struct mpatch_frag *s = src->head; |
72 | 114 int postend, c, l; |
115 | |
116 while (s != src->tail) { | |
117 if (s->start + offset >= cut) | |
82 | 118 break; |
72 | 119 |
120 postend = offset + s->start + s->len; | |
121 if (postend <= cut) { | |
122 offset += s->start + s->len - s->end; | |
123 s++; | |
34634
2e08b69bcd29
mpatch: reflow two oddly formatted else blocks with clang-format
Augie Fackler <augie@google.com>
parents:
34633
diff
changeset
|
124 } else { |
72 | 125 c = cut - offset; |
126 if (s->end < c) | |
127 c = s->end; | |
128 l = cut - offset - s->start; | |
129 if (s->len < l) | |
130 l = s->len; | |
131 | |
132 offset += s->start + l - c; | |
133 s->start = c; | |
134 s->len = s->len - l; | |
135 s->data = s->data + l; | |
136 | |
82 | 137 break; |
72 | 138 } |
139 } | |
140 | |
141 src->head = s; | |
142 return offset; | |
143 } | |
144 | |
145 /* combine hunk lists a and b, while adjusting b for offset changes in a/ | |
146 this deletes a and b and returns the resultant list. */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
147 static struct mpatch_flist *combine(struct mpatch_flist *a, |
34800
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
148 struct mpatch_flist *b) |
72 | 149 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
150 struct mpatch_flist *c = NULL; |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
151 struct mpatch_frag *bh, *ct; |
72 | 152 int offset = 0, post; |
153 | |
128 | 154 if (a && b) |
155 c = lalloc((lsize(a) + lsize(b)) * 2); | |
156 | |
157 if (c) { | |
72 | 158 |
128 | 159 for (bh = b->head; bh != b->tail; bh++) { |
160 /* save old hunks */ | |
161 offset = gather(c, a, bh->start, offset); | |
72 | 162 |
128 | 163 /* discard replaced hunks */ |
164 post = discard(a, bh->end, offset); | |
72 | 165 |
128 | 166 /* insert new hunk */ |
167 ct = c->tail; | |
168 ct->start = bh->start - offset; | |
169 ct->end = bh->end - post; | |
170 ct->len = bh->len; | |
171 ct->data = bh->data; | |
172 c->tail++; | |
173 offset = post; | |
174 } | |
175 | |
176 /* hold on to tail from a */ | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
177 memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a)); |
128 | 178 c->tail += lsize(a); |
72 | 179 } |
180 | |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
181 mpatch_lfree(a); |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
182 mpatch_lfree(b); |
72 | 183 return c; |
184 } | |
185 | |
186 /* decode a binary patch into a hunk list */ | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
187 int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res) |
72 | 188 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
189 struct mpatch_flist *l; |
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
190 struct mpatch_frag *lt; |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
191 int pos = 0; |
72 | 192 |
193 /* assume worst case size, we won't have many of these lists */ | |
28656
b6ed2505d6cf
parsers: fix list sizing rounding error (SEC)
Matt Mackall <mpm@selenic.com>
parents:
20167
diff
changeset
|
194 l = lalloc(len / 12 + 1); |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
195 if (!l) |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
196 return MPATCH_ERR_NO_MEM; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
197 |
72 | 198 lt = l->tail; |
199 | |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
200 while (pos >= 0 && pos < len) { |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
201 lt->start = getbe32(bin + pos); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
202 lt->end = getbe32(bin + pos + 4); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
203 lt->len = getbe32(bin + pos + 8); |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
204 lt->data = bin + pos + 12; |
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
205 pos += 12 + lt->len; |
28657
b9714d958e89
parsers: detect short records (SEC)
Matt Mackall <mpm@selenic.com>
parents:
28656
diff
changeset
|
206 if (lt->start > lt->end || lt->len < 0) |
b9714d958e89
parsers: detect short records (SEC)
Matt Mackall <mpm@selenic.com>
parents:
28656
diff
changeset
|
207 break; /* sanity check */ |
72 | 208 lt++; |
209 } | |
210 | |
20167
09e41ac6289d
mpatch: rewrite pointer overflow checks
Matt Mackall <mpm@selenic.com>
parents:
16758
diff
changeset
|
211 if (pos != len) { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
212 mpatch_lfree(l); |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
213 return MPATCH_ERR_CANNOT_BE_DECODED; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
214 } |
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
215 |
72 | 216 l->tail = lt; |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
217 *res = l; |
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
218 return 0; |
72 | 219 } |
220 | |
221 /* calculate the size of resultant text */ | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
222 ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l) |
72 | 223 { |
29691
e9a0bcc9314d
mpatch: change Py_ssize_t to ssize_t in places that will be later copied
Maciej Fijalkowski <fijall@gmail.com>
parents:
29444
diff
changeset
|
224 ssize_t outlen = 0, last = 0; |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
225 struct mpatch_frag *f = l->head; |
72 | 226 |
227 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
228 if (f->start < last || f->end > len) { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
229 return MPATCH_ERR_INVALID_PATCH; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
230 } |
72 | 231 outlen += f->start - last; |
232 last = f->end; | |
233 outlen += f->len; | |
234 f++; | |
235 } | |
236 | |
237 outlen += len - last; | |
238 return outlen; | |
239 } | |
240 | |
29693
b9b9f9a92481
mpatch: split mpatch into two files
Maciej Fijalkowski <fijall@gmail.com>
parents:
29692
diff
changeset
|
241 int mpatch_apply(char *buf, const char *orig, ssize_t len, |
34800
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
242 struct mpatch_flist *l) |
72 | 243 { |
29692
6b3a8d034b69
mpatch: provide things that will be exported later with a prefixed name
Maciej Fijalkowski <fijall@gmail.com>
parents:
29691
diff
changeset
|
244 struct mpatch_frag *f = l->head; |
72 | 245 int last = 0; |
246 char *p = buf; | |
247 | |
248 while (f != l->tail) { | |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
249 if (f->start < last || f->end > len) { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
250 return MPATCH_ERR_INVALID_PATCH; |
1722
681c5c211b92
catch errors and throw exception with invalid binary patch data
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
597
diff
changeset
|
251 } |
72 | 252 memcpy(p, orig + last, f->start - last); |
253 p += f->start - last; | |
254 memcpy(p, f->data, f->len); | |
255 last = f->end; | |
256 p += f->len; | |
257 f++; | |
258 } | |
259 memcpy(p, orig + last, len - last); | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
260 return 0; |
72 | 261 } |
262 | |
263 /* recursively generate a patch of all bins between start and end */ | |
34800
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
264 struct mpatch_flist * |
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
265 mpatch_fold(void *bins, struct mpatch_flist *(*get_next_item)(void *, ssize_t), |
761355833867
mpatch: reformat function prototypes with clang-format
Augie Fackler <augie@google.com>
parents:
34634
diff
changeset
|
266 ssize_t start, ssize_t end) |
72 | 267 { |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
268 ssize_t len; |
72 | 269 |
270 if (start + 1 == end) { | |
271 /* trivial case, output a decoded list */ | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
272 return get_next_item(bins, start); |
72 | 273 } |
274 | |
275 /* divide and conquer, memory management is elsewhere */ | |
276 len = (end - start) / 2; | |
29694
55dd12204b8e
mpatch: remove dependency on Python.h in mpatch.c
Maciej Fijalkowski <fijall@gmail.com>
parents:
29693
diff
changeset
|
277 return combine(mpatch_fold(bins, get_next_item, start, start + len), |
34801
1f4249c764f1
mpatch: switch alignment of wrapped line from tab to spaces with clang-format
Augie Fackler <augie@google.com>
parents:
34800
diff
changeset
|
278 mpatch_fold(bins, get_next_item, start + len, end)); |
72 | 279 } |