102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) |
102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) |
103 prevf = f |
103 prevf = f |
104 _checkforbidden(files) |
104 _checkforbidden(files) |
105 return ''.join(lines) |
105 return ''.join(lines) |
106 |
106 |
107 class _lazymanifest(dict): |
107 class lazymanifestiter(object): |
108 """This is the pure implementation of lazymanifest. |
108 def __init__(self, lm): |
109 |
109 self.pos = 0 |
110 It has not been optimized *at all* and is not lazy. |
110 self.lm = lm |
111 """ |
|
112 |
|
113 def __init__(self, data): |
|
114 dict.__init__(self) |
|
115 for f, n, fl in _parse(data): |
|
116 self[f] = n, fl |
|
117 |
|
118 def __setitem__(self, k, v): |
|
119 node, flag = v |
|
120 assert node is not None |
|
121 if len(node) > 21: |
|
122 node = node[:21] # match c implementation behavior |
|
123 dict.__setitem__(self, k, (node, flag)) |
|
124 |
111 |
125 def __iter__(self): |
112 def __iter__(self): |
126 return iter(sorted(dict.keys(self))) |
113 return self |
127 |
114 |
128 def iterkeys(self): |
115 def next(self): |
129 return iter(sorted(dict.keys(self))) |
116 try: |
130 |
117 data, pos = self.lm._get(self.pos) |
131 def iterentries(self): |
118 except IndexError: |
132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) |
119 raise StopIteration |
|
120 if pos == -1: |
|
121 self.pos += 1 |
|
122 return data[0] |
|
123 self.pos += 1 |
|
124 zeropos = data.find('\x00', pos) |
|
125 return data[pos:zeropos] |
|
126 |
|
127 class lazymanifestiterentries(object): |
|
128 def __init__(self, lm): |
|
129 self.lm = lm |
|
130 self.pos = 0 |
|
131 |
|
132 def __iter__(self): |
|
133 return self |
|
134 |
|
135 def next(self): |
|
136 try: |
|
137 data, pos = self.lm._get(self.pos) |
|
138 except IndexError: |
|
139 raise StopIteration |
|
140 if pos == -1: |
|
141 self.pos += 1 |
|
142 return data |
|
143 zeropos = data.find('\x00', pos) |
|
144 hashval = unhexlify(data, self.lm.extrainfo[self.pos], |
|
145 zeropos + 1, 40) |
|
146 flags = self.lm._getflags(data, self.pos, zeropos) |
|
147 self.pos += 1 |
|
148 return (data[pos:zeropos], hashval, flags) |
|
149 |
|
150 def unhexlify(data, extra, pos, length): |
|
151 s = data[pos:pos + length].decode('hex') |
|
152 if extra: |
|
153 s += chr(extra & 0xff) |
|
154 return s |
|
155 |
|
156 def _cmp(a, b): |
|
157 return (a > b) - (a < b) |
|
158 |
|
159 class _lazymanifest(object): |
|
160 def __init__(self, data, positions=None, extrainfo=None, extradata=None): |
|
161 if positions is None: |
|
162 self.positions = self.findlines(data) |
|
163 self.extrainfo = [0] * len(self.positions) |
|
164 self.data = data |
|
165 self.extradata = [] |
|
166 else: |
|
167 self.positions = positions[:] |
|
168 self.extrainfo = extrainfo[:] |
|
169 self.extradata = extradata[:] |
|
170 self.data = data |
|
171 |
|
172 def findlines(self, data): |
|
173 if not data: |
|
174 return [] |
|
175 pos = data.find("\n") |
|
176 if pos == -1 or data[-1] != '\n': |
|
177 raise ValueError("Manifest did not end in a newline.") |
|
178 positions = [0] |
|
179 prev = data[:data.find('\x00')] |
|
180 while pos < len(data) - 1 and pos != -1: |
|
181 positions.append(pos + 1) |
|
182 nexts = data[pos + 1:data.find('\x00', pos + 1)] |
|
183 if nexts < prev: |
|
184 raise ValueError("Manifest lines not in sorted order.") |
|
185 prev = nexts |
|
186 pos = data.find("\n", pos + 1) |
|
187 return positions |
|
188 |
|
189 def _get(self, index): |
|
190 # get the position encoded in pos: |
|
191 # positive number is an index in 'data' |
|
192 # negative number is in extrapieces |
|
193 pos = self.positions[index] |
|
194 if pos >= 0: |
|
195 return self.data, pos |
|
196 return self.extradata[-pos - 1], -1 |
|
197 |
|
198 def _getkey(self, pos): |
|
199 if pos >= 0: |
|
200 return self.data[pos:self.data.find('\x00', pos + 1)] |
|
201 return self.extradata[-pos - 1][0] |
|
202 |
|
203 def bsearch(self, key): |
|
204 first = 0 |
|
205 last = len(self.positions) - 1 |
|
206 |
|
207 while first <= last: |
|
208 midpoint = (first + last)//2 |
|
209 nextpos = self.positions[midpoint] |
|
210 candidate = self._getkey(nextpos) |
|
211 r = _cmp(key, candidate) |
|
212 if r == 0: |
|
213 return midpoint |
|
214 else: |
|
215 if r < 0: |
|
216 last = midpoint - 1 |
|
217 else: |
|
218 first = midpoint + 1 |
|
219 return -1 |
|
220 |
|
221 def bsearch2(self, key): |
|
222 # same as the above, but will always return the position |
|
223 # done for performance reasons |
|
224 first = 0 |
|
225 last = len(self.positions) - 1 |
|
226 |
|
227 while first <= last: |
|
228 midpoint = (first + last)//2 |
|
229 nextpos = self.positions[midpoint] |
|
230 candidate = self._getkey(nextpos) |
|
231 r = _cmp(key, candidate) |
|
232 if r == 0: |
|
233 return (midpoint, True) |
|
234 else: |
|
235 if r < 0: |
|
236 last = midpoint - 1 |
|
237 else: |
|
238 first = midpoint + 1 |
|
239 return (first, False) |
|
240 |
|
241 def __contains__(self, key): |
|
242 return self.bsearch(key) != -1 |
|
243 |
|
244 def _getflags(self, data, needle, pos): |
|
245 start = pos + 41 |
|
246 end = data.find("\n", start) |
|
247 if end == -1: |
|
248 end = len(data) - 1 |
|
249 if start == end: |
|
250 return '' |
|
251 return self.data[start:end] |
|
252 |
|
253 def __getitem__(self, key): |
|
254 if not isinstance(key, str): |
|
255 raise TypeError("getitem: manifest keys must be a string.") |
|
256 needle = self.bsearch(key) |
|
257 if needle == -1: |
|
258 raise KeyError |
|
259 data, pos = self._get(needle) |
|
260 if pos == -1: |
|
261 return (data[1], data[2]) |
|
262 zeropos = data.find('\x00', pos) |
|
263 assert 0 <= needle <= len(self.positions) |
|
264 assert len(self.extrainfo) == len(self.positions) |
|
265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) |
|
266 flags = self._getflags(data, needle, zeropos) |
|
267 return (hashval, flags) |
|
268 |
|
269 def __delitem__(self, key): |
|
270 needle, found = self.bsearch2(key) |
|
271 if not found: |
|
272 raise KeyError |
|
273 cur = self.positions[needle] |
|
274 self.positions = self.positions[:needle] + self.positions[needle + 1:] |
|
275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] |
|
276 if cur >= 0: |
|
277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] |
|
278 |
|
279 def __setitem__(self, key, value): |
|
280 if not isinstance(key, str): |
|
281 raise TypeError("setitem: manifest keys must be a string.") |
|
282 if not isinstance(value, tuple) or len(value) != 2: |
|
283 raise TypeError("Manifest values must be a tuple of (node, flags).") |
|
284 hashval = value[0] |
|
285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: |
|
286 raise TypeError("node must be a 20-byte string") |
|
287 flags = value[1] |
|
288 if len(hashval) == 22: |
|
289 hashval = hashval[:-1] |
|
290 if not isinstance(flags, str) or len(flags) > 1: |
|
291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags) |
|
292 needle, found = self.bsearch2(key) |
|
293 if found: |
|
294 # put the item |
|
295 pos = self.positions[needle] |
|
296 if pos < 0: |
|
297 self.extradata[-pos - 1] = (key, hashval, value[1]) |
|
298 else: |
|
299 # just don't bother |
|
300 self.extradata.append((key, hashval, value[1])) |
|
301 self.positions[needle] = -len(self.extradata) |
|
302 else: |
|
303 # not found, put it in with extra positions |
|
304 self.extradata.append((key, hashval, value[1])) |
|
305 self.positions = (self.positions[:needle] + [-len(self.extradata)] |
|
306 + self.positions[needle:]) |
|
307 self.extrainfo = (self.extrainfo[:needle] + [0] + |
|
308 self.extrainfo[needle:]) |
133 |
309 |
134 def copy(self): |
310 def copy(self): |
135 c = _lazymanifest('') |
311 # XXX call _compact like in C? |
136 c.update(self) |
312 return _lazymanifest(self.data, self.positions, self.extrainfo, |
137 return c |
313 self.extradata) |
|
314 |
|
315 def _compact(self): |
|
316 # hopefully not called TOO often |
|
317 if len(self.extradata) == 0: |
|
318 return |
|
319 l = [] |
|
320 last_cut = 0 |
|
321 i = 0 |
|
322 offset = 0 |
|
323 self.extrainfo = [0] * len(self.positions) |
|
324 while i < len(self.positions): |
|
325 if self.positions[i] >= 0: |
|
326 cur = self.positions[i] |
|
327 last_cut = cur |
|
328 while True: |
|
329 self.positions[i] = offset |
|
330 i += 1 |
|
331 if i == len(self.positions) or self.positions[i] < 0: |
|
332 break |
|
333 offset += self.positions[i] - cur |
|
334 cur = self.positions[i] |
|
335 end_cut = self.data.find('\n', cur) |
|
336 if end_cut != -1: |
|
337 end_cut += 1 |
|
338 offset += end_cut - cur |
|
339 l.append(self.data[last_cut:end_cut]) |
|
340 else: |
|
341 while i < len(self.positions) and self.positions[i] < 0: |
|
342 cur = self.positions[i] |
|
343 t = self.extradata[-cur - 1] |
|
344 l.append(self._pack(t)) |
|
345 self.positions[i] = offset |
|
346 if len(t[1]) > 20: |
|
347 self.extrainfo[i] = ord(t[1][21]) |
|
348 offset += len(l[-1]) |
|
349 i += 1 |
|
350 self.data = ''.join(l) |
|
351 self.extradata = [] |
|
352 |
|
353 def _pack(self, d): |
|
354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' |
|
355 |
|
356 def text(self): |
|
357 self._compact() |
|
358 return self.data |
138 |
359 |
139 def diff(self, m2, clean=False): |
360 def diff(self, m2, clean=False): |
140 '''Finds changes between the current manifest and m2.''' |
361 '''Finds changes between the current manifest and m2.''' |
|
362 # XXX think whether efficiency matters here |
141 diff = {} |
363 diff = {} |
142 |
364 |
143 for fn, e1 in self.iteritems(): |
365 for fn, e1, flags in self.iterentries(): |
144 if fn not in m2: |
366 if fn not in m2: |
145 diff[fn] = e1, (None, '') |
367 diff[fn] = (e1, flags), (None, '') |
146 else: |
368 else: |
147 e2 = m2[fn] |
369 e2 = m2[fn] |
148 if e1 != e2: |
370 if (e1, flags) != e2: |
149 diff[fn] = e1, e2 |
371 diff[fn] = (e1, flags), e2 |
150 elif clean: |
372 elif clean: |
151 diff[fn] = None |
373 diff[fn] = None |
152 |
374 |
153 for fn, e2 in m2.iteritems(): |
375 for fn, e2, flags in m2.iterentries(): |
154 if fn not in self: |
376 if fn not in self: |
155 diff[fn] = (None, ''), e2 |
377 diff[fn] = (None, ''), (e2, flags) |
156 |
378 |
157 return diff |
379 return diff |
158 |
380 |
|
381 def iterentries(self): |
|
382 return lazymanifestiterentries(self) |
|
383 |
|
384 def iterkeys(self): |
|
385 return lazymanifestiter(self) |
|
386 |
|
387 def __iter__(self): |
|
388 return lazymanifestiter(self) |
|
389 |
|
390 def __len__(self): |
|
391 return len(self.positions) |
|
392 |
159 def filtercopy(self, filterfn): |
393 def filtercopy(self, filterfn): |
|
394 # XXX should be optimized |
160 c = _lazymanifest('') |
395 c = _lazymanifest('') |
161 for f, n, fl in self.iterentries(): |
396 for f, n, fl in self.iterentries(): |
162 if filterfn(f): |
397 if filterfn(f): |
163 c[f] = n, fl |
398 c[f] = n, fl |
164 return c |
399 return c |
165 |
|
166 def text(self): |
|
167 """Get the full data of this manifest as a bytestring.""" |
|
168 return _textv1(self.iterentries()) |
|
169 |
400 |
170 try: |
401 try: |
171 _lazymanifest = parsers.lazymanifest |
402 _lazymanifest = parsers.lazymanifest |
172 except AttributeError: |
403 except AttributeError: |
173 pass |
404 pass |