|
1 from __future__ import absolute_import |
|
2 |
|
3 import struct |
|
4 |
|
5 from mercurial.node import hex, nullid |
|
6 from mercurial.i18n import _ |
|
7 from mercurial import ( |
|
8 error, |
|
9 pycompat, |
|
10 util, |
|
11 ) |
|
12 from . import ( |
|
13 basepack, |
|
14 constants, |
|
15 lz4wrapper, |
|
16 shallowutil, |
|
17 ) |
|
18 |
|
19 NODELENGTH = 20 |
|
20 |
|
21 # The indicator value in the index for a fulltext entry. |
|
22 FULLTEXTINDEXMARK = -1 |
|
23 NOBASEINDEXMARK = -2 |
|
24 |
|
25 INDEXSUFFIX = '.dataidx' |
|
26 PACKSUFFIX = '.datapack' |
|
27 |
|
28 class datapackstore(basepack.basepackstore): |
|
29 INDEXSUFFIX = INDEXSUFFIX |
|
30 PACKSUFFIX = PACKSUFFIX |
|
31 |
|
32 def __init__(self, ui, path): |
|
33 super(datapackstore, self).__init__(ui, path) |
|
34 |
|
35 def getpack(self, path): |
|
36 return datapack(path) |
|
37 |
|
38 def get(self, name, node): |
|
39 raise RuntimeError("must use getdeltachain with datapackstore") |
|
40 |
|
41 def getmeta(self, name, node): |
|
42 for pack in self.packs: |
|
43 try: |
|
44 return pack.getmeta(name, node) |
|
45 except KeyError: |
|
46 pass |
|
47 |
|
48 for pack in self.refresh(): |
|
49 try: |
|
50 return pack.getmeta(name, node) |
|
51 except KeyError: |
|
52 pass |
|
53 |
|
54 raise KeyError((name, hex(node))) |
|
55 |
|
56 def getdelta(self, name, node): |
|
57 for pack in self.packs: |
|
58 try: |
|
59 return pack.getdelta(name, node) |
|
60 except KeyError: |
|
61 pass |
|
62 |
|
63 for pack in self.refresh(): |
|
64 try: |
|
65 return pack.getdelta(name, node) |
|
66 except KeyError: |
|
67 pass |
|
68 |
|
69 raise KeyError((name, hex(node))) |
|
70 |
|
71 def getdeltachain(self, name, node): |
|
72 for pack in self.packs: |
|
73 try: |
|
74 return pack.getdeltachain(name, node) |
|
75 except KeyError: |
|
76 pass |
|
77 |
|
78 for pack in self.refresh(): |
|
79 try: |
|
80 return pack.getdeltachain(name, node) |
|
81 except KeyError: |
|
82 pass |
|
83 |
|
84 raise KeyError((name, hex(node))) |
|
85 |
|
86 def add(self, name, node, data): |
|
87 raise RuntimeError("cannot add to datapackstore") |
|
88 |
|
89 class datapack(basepack.basepack): |
|
90 INDEXSUFFIX = INDEXSUFFIX |
|
91 PACKSUFFIX = PACKSUFFIX |
|
92 |
|
93 # Format is <node><delta offset><pack data offset><pack data size> |
|
94 # See the mutabledatapack doccomment for more details. |
|
95 INDEXFORMAT = '!20siQQ' |
|
96 INDEXENTRYLENGTH = 40 |
|
97 |
|
98 SUPPORTED_VERSIONS = [0, 1] |
|
99 |
|
100 def getmissing(self, keys): |
|
101 missing = [] |
|
102 for name, node in keys: |
|
103 value = self._find(node) |
|
104 if not value: |
|
105 missing.append((name, node)) |
|
106 |
|
107 return missing |
|
108 |
|
109 def get(self, name, node): |
|
110 raise RuntimeError("must use getdeltachain with datapack (%s:%s)" |
|
111 % (name, hex(node))) |
|
112 |
|
113 def getmeta(self, name, node): |
|
114 value = self._find(node) |
|
115 if value is None: |
|
116 raise KeyError((name, hex(node))) |
|
117 |
|
118 # version 0 does not support metadata |
|
119 if self.VERSION == 0: |
|
120 return {} |
|
121 |
|
122 node, deltabaseoffset, offset, size = value |
|
123 rawentry = self._data[offset:offset + size] |
|
124 |
|
125 # see docstring of mutabledatapack for the format |
|
126 offset = 0 |
|
127 offset += struct.unpack_from('!H', rawentry, offset)[0] + 2 # filename |
|
128 offset += 40 # node, deltabase node |
|
129 offset += struct.unpack_from('!Q', rawentry, offset)[0] + 8 # delta |
|
130 |
|
131 metalen = struct.unpack_from('!I', rawentry, offset)[0] |
|
132 offset += 4 |
|
133 |
|
134 meta = shallowutil.parsepackmeta(rawentry[offset:offset + metalen]) |
|
135 |
|
136 return meta |
|
137 |
|
138 def getdelta(self, name, node): |
|
139 value = self._find(node) |
|
140 if value is None: |
|
141 raise KeyError((name, hex(node))) |
|
142 |
|
143 node, deltabaseoffset, offset, size = value |
|
144 entry = self._readentry(offset, size, getmeta=True) |
|
145 filename, node, deltabasenode, delta, meta = entry |
|
146 |
|
147 # If we've read a lot of data from the mmap, free some memory. |
|
148 self.freememory() |
|
149 |
|
150 return delta, filename, deltabasenode, meta |
|
151 |
|
152 def getdeltachain(self, name, node): |
|
153 value = self._find(node) |
|
154 if value is None: |
|
155 raise KeyError((name, hex(node))) |
|
156 |
|
157 params = self.params |
|
158 |
|
159 # Precompute chains |
|
160 chain = [value] |
|
161 deltabaseoffset = value[1] |
|
162 entrylen = self.INDEXENTRYLENGTH |
|
163 while (deltabaseoffset != FULLTEXTINDEXMARK |
|
164 and deltabaseoffset != NOBASEINDEXMARK): |
|
165 loc = params.indexstart + deltabaseoffset |
|
166 value = struct.unpack(self.INDEXFORMAT, |
|
167 self._index[loc:loc + entrylen]) |
|
168 deltabaseoffset = value[1] |
|
169 chain.append(value) |
|
170 |
|
171 # Read chain data |
|
172 deltachain = [] |
|
173 for node, deltabaseoffset, offset, size in chain: |
|
174 filename, node, deltabasenode, delta = self._readentry(offset, size) |
|
175 deltachain.append((filename, node, filename, deltabasenode, delta)) |
|
176 |
|
177 # If we've read a lot of data from the mmap, free some memory. |
|
178 self.freememory() |
|
179 |
|
180 return deltachain |
|
181 |
|
182 def _readentry(self, offset, size, getmeta=False): |
|
183 rawentry = self._data[offset:offset + size] |
|
184 self._pagedin += len(rawentry) |
|
185 |
|
186 # <2 byte len> + <filename> |
|
187 lengthsize = 2 |
|
188 filenamelen = struct.unpack('!H', rawentry[:2])[0] |
|
189 filename = rawentry[lengthsize:lengthsize + filenamelen] |
|
190 |
|
191 # <20 byte node> + <20 byte deltabase> |
|
192 nodestart = lengthsize + filenamelen |
|
193 deltabasestart = nodestart + NODELENGTH |
|
194 node = rawentry[nodestart:deltabasestart] |
|
195 deltabasenode = rawentry[deltabasestart:deltabasestart + NODELENGTH] |
|
196 |
|
197 # <8 byte len> + <delta> |
|
198 deltastart = deltabasestart + NODELENGTH |
|
199 rawdeltalen = rawentry[deltastart:deltastart + 8] |
|
200 deltalen = struct.unpack('!Q', rawdeltalen)[0] |
|
201 |
|
202 delta = rawentry[deltastart + 8:deltastart + 8 + deltalen] |
|
203 delta = lz4wrapper.lz4decompress(delta) |
|
204 |
|
205 if getmeta: |
|
206 if self.VERSION == 0: |
|
207 meta = {} |
|
208 else: |
|
209 metastart = deltastart + 8 + deltalen |
|
210 metalen = struct.unpack_from('!I', rawentry, metastart)[0] |
|
211 |
|
212 rawmeta = rawentry[metastart + 4:metastart + 4 + metalen] |
|
213 meta = shallowutil.parsepackmeta(rawmeta) |
|
214 return filename, node, deltabasenode, delta, meta |
|
215 else: |
|
216 return filename, node, deltabasenode, delta |
|
217 |
|
218 def add(self, name, node, data): |
|
219 raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node)) |
|
220 |
|
221 def _find(self, node): |
|
222 params = self.params |
|
223 fanoutkey = struct.unpack(params.fanoutstruct, |
|
224 node[:params.fanoutprefix])[0] |
|
225 fanout = self._fanouttable |
|
226 |
|
227 start = fanout[fanoutkey] + params.indexstart |
|
228 indexend = self._indexend |
|
229 |
|
230 # Scan forward to find the first non-same entry, which is the upper |
|
231 # bound. |
|
232 for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): |
|
233 end = fanout[i] + params.indexstart |
|
234 if end != start: |
|
235 break |
|
236 else: |
|
237 end = indexend |
|
238 |
|
239 # Bisect between start and end to find node |
|
240 index = self._index |
|
241 startnode = index[start:start + NODELENGTH] |
|
242 endnode = index[end:end + NODELENGTH] |
|
243 entrylen = self.INDEXENTRYLENGTH |
|
244 if startnode == node: |
|
245 entry = index[start:start + entrylen] |
|
246 elif endnode == node: |
|
247 entry = index[end:end + entrylen] |
|
248 else: |
|
249 while start < end - entrylen: |
|
250 mid = start + (end - start) / 2 |
|
251 mid = mid - ((mid - params.indexstart) % entrylen) |
|
252 midnode = index[mid:mid + NODELENGTH] |
|
253 if midnode == node: |
|
254 entry = index[mid:mid + entrylen] |
|
255 break |
|
256 if node > midnode: |
|
257 start = mid |
|
258 startnode = midnode |
|
259 elif node < midnode: |
|
260 end = mid |
|
261 endnode = midnode |
|
262 else: |
|
263 return None |
|
264 |
|
265 return struct.unpack(self.INDEXFORMAT, entry) |
|
266 |
|
267 def markledger(self, ledger, options=None): |
|
268 for filename, node in self: |
|
269 ledger.markdataentry(self, filename, node) |
|
270 |
|
271 def cleanup(self, ledger): |
|
272 entries = ledger.sources.get(self, []) |
|
273 allkeys = set(self) |
|
274 repackedkeys = set((e.filename, e.node) for e in entries if |
|
275 e.datarepacked or e.gced) |
|
276 |
|
277 if len(allkeys - repackedkeys) == 0: |
|
278 if self.path not in ledger.created: |
|
279 util.unlinkpath(self.indexpath, ignoremissing=True) |
|
280 util.unlinkpath(self.packpath, ignoremissing=True) |
|
281 |
|
282 def __iter__(self): |
|
283 for f, n, deltabase, deltalen in self.iterentries(): |
|
284 yield f, n |
|
285 |
|
286 def iterentries(self): |
|
287 # Start at 1 to skip the header |
|
288 offset = 1 |
|
289 data = self._data |
|
290 while offset < self.datasize: |
|
291 oldoffset = offset |
|
292 |
|
293 # <2 byte len> + <filename> |
|
294 filenamelen = struct.unpack('!H', data[offset:offset + 2])[0] |
|
295 offset += 2 |
|
296 filename = data[offset:offset + filenamelen] |
|
297 offset += filenamelen |
|
298 |
|
299 # <20 byte node> |
|
300 node = data[offset:offset + constants.NODESIZE] |
|
301 offset += constants.NODESIZE |
|
302 # <20 byte deltabase> |
|
303 deltabase = data[offset:offset + constants.NODESIZE] |
|
304 offset += constants.NODESIZE |
|
305 |
|
306 # <8 byte len> + <delta> |
|
307 rawdeltalen = data[offset:offset + 8] |
|
308 deltalen = struct.unpack('!Q', rawdeltalen)[0] |
|
309 offset += 8 |
|
310 |
|
311 # it has to be at least long enough for the lz4 header. |
|
312 assert deltalen >= 4 |
|
313 |
|
314 # python-lz4 stores the length of the uncompressed field as a |
|
315 # little-endian 32-bit integer at the start of the data. |
|
316 uncompressedlen = struct.unpack('<I', data[offset:offset + 4])[0] |
|
317 offset += deltalen |
|
318 |
|
319 if self.VERSION == 1: |
|
320 # <4 byte len> + <metadata-list> |
|
321 metalen = struct.unpack_from('!I', data, offset)[0] |
|
322 offset += 4 + metalen |
|
323 |
|
324 yield (filename, node, deltabase, uncompressedlen) |
|
325 |
|
326 # If we've read a lot of data from the mmap, free some memory. |
|
327 self._pagedin += offset - oldoffset |
|
328 if self.freememory(): |
|
329 data = self._data |
|
330 |
|
331 class mutabledatapack(basepack.mutablebasepack): |
|
332 """A class for constructing and serializing a datapack file and index. |
|
333 |
|
334 A datapack is a pair of files that contain the revision contents for various |
|
335 file revisions in Mercurial. It contains only revision contents (like file |
|
336 contents), not any history information. |
|
337 |
|
338 It consists of two files, with the following format. All bytes are in |
|
339 network byte order (big endian). |
|
340 |
|
341 .datapack |
|
342 The pack itself is a series of revision deltas with some basic header |
|
343 information on each. A revision delta may be a fulltext, represented by |
|
344 a deltabasenode equal to the nullid. |
|
345 |
|
346 datapack = <version: 1 byte> |
|
347 [<revision>,...] |
|
348 revision = <filename len: 2 byte unsigned int> |
|
349 <filename> |
|
350 <node: 20 byte> |
|
351 <deltabasenode: 20 byte> |
|
352 <delta len: 8 byte unsigned int> |
|
353 <delta> |
|
354 <metadata-list len: 4 byte unsigned int> [1] |
|
355 <metadata-list> [1] |
|
356 metadata-list = [<metadata-item>, ...] |
|
357 metadata-item = <metadata-key: 1 byte> |
|
358 <metadata-value len: 2 byte unsigned> |
|
359 <metadata-value> |
|
360 |
|
361 metadata-key could be METAKEYFLAG or METAKEYSIZE or other single byte |
|
362 value in the future. |
|
363 |
|
364 .dataidx |
|
365 The index file consists of two parts, the fanout and the index. |
|
366 |
|
367 The index is a list of index entries, sorted by node (one per revision |
|
368 in the pack). Each entry has: |
|
369 |
|
370 - node (The 20 byte node of the entry; i.e. the commit hash, file node |
|
371 hash, etc) |
|
372 - deltabase index offset (The location in the index of the deltabase for |
|
373 this entry. The deltabase is the next delta in |
|
374 the chain, with the chain eventually |
|
375 terminating in a full-text, represented by a |
|
376 deltabase offset of -1. This lets us compute |
|
377 delta chains from the index, then do |
|
378 sequential reads from the pack if the revision |
|
379 are nearby on disk.) |
|
380 - pack entry offset (The location of this entry in the datapack) |
|
381 - pack content size (The on-disk length of this entry's pack data) |
|
382 |
|
383 The fanout is a quick lookup table to reduce the number of steps for |
|
384 bisecting the index. It is a series of 4 byte pointers to positions |
|
385 within the index. It has 2^16 entries, which corresponds to hash |
|
386 prefixes [0000, 0001,..., FFFE, FFFF]. Example: the pointer in slot |
|
387 4F0A points to the index position of the first revision whose node |
|
388 starts with 4F0A. This saves log(2^16)=16 bisect steps. |
|
389 |
|
390 dataidx = <fanouttable> |
|
391 <index> |
|
392 fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) |
|
393 index = [<index entry>,...] |
|
394 indexentry = <node: 20 byte> |
|
395 <deltabase location: 4 byte signed int> |
|
396 <pack entry offset: 8 byte unsigned int> |
|
397 <pack entry size: 8 byte unsigned int> |
|
398 |
|
399 [1]: new in version 1. |
|
400 """ |
|
401 INDEXSUFFIX = INDEXSUFFIX |
|
402 PACKSUFFIX = PACKSUFFIX |
|
403 |
|
404 # v[01] index format: <node><delta offset><pack data offset><pack data size> |
|
405 INDEXFORMAT = datapack.INDEXFORMAT |
|
406 INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH |
|
407 |
|
408 # v1 has metadata support |
|
409 SUPPORTED_VERSIONS = [0, 1] |
|
410 |
|
411 def add(self, name, node, deltabasenode, delta, metadata=None): |
|
412 # metadata is a dict, ex. {METAKEYFLAG: flag} |
|
413 if len(name) > 2**16: |
|
414 raise RuntimeError(_("name too long %s") % name) |
|
415 if len(node) != 20: |
|
416 raise RuntimeError(_("node should be 20 bytes %s") % node) |
|
417 |
|
418 if node in self.entries: |
|
419 # The revision has already been added |
|
420 return |
|
421 |
|
422 # TODO: allow configurable compression |
|
423 delta = lz4wrapper.lz4compress(delta) |
|
424 |
|
425 rawdata = ''.join(( |
|
426 struct.pack('!H', len(name)), # unsigned 2 byte int |
|
427 name, |
|
428 node, |
|
429 deltabasenode, |
|
430 struct.pack('!Q', len(delta)), # unsigned 8 byte int |
|
431 delta, |
|
432 )) |
|
433 |
|
434 if self.VERSION == 1: |
|
435 # v1 support metadata |
|
436 rawmeta = shallowutil.buildpackmeta(metadata) |
|
437 rawdata += struct.pack('!I', len(rawmeta)) # unsigned 4 byte |
|
438 rawdata += rawmeta |
|
439 else: |
|
440 # v0 cannot store metadata, raise if metadata contains flag |
|
441 if metadata and metadata.get(constants.METAKEYFLAG, 0) != 0: |
|
442 raise error.ProgrammingError('v0 pack cannot store flags') |
|
443 |
|
444 offset = self.packfp.tell() |
|
445 |
|
446 size = len(rawdata) |
|
447 |
|
448 self.entries[node] = (deltabasenode, offset, size) |
|
449 |
|
450 self.writeraw(rawdata) |
|
451 |
|
452 def createindex(self, nodelocations, indexoffset): |
|
453 entries = sorted((n, db, o, s) for n, (db, o, s) |
|
454 in self.entries.iteritems()) |
|
455 |
|
456 rawindex = '' |
|
457 fmt = self.INDEXFORMAT |
|
458 for node, deltabase, offset, size in entries: |
|
459 if deltabase == nullid: |
|
460 deltabaselocation = FULLTEXTINDEXMARK |
|
461 else: |
|
462 # Instead of storing the deltabase node in the index, let's |
|
463 # store a pointer directly to the index entry for the deltabase. |
|
464 deltabaselocation = nodelocations.get(deltabase, |
|
465 NOBASEINDEXMARK) |
|
466 |
|
467 entry = struct.pack(fmt, node, deltabaselocation, offset, size) |
|
468 rawindex += entry |
|
469 |
|
470 return rawindex |