|
1 from __future__ import absolute_import |
|
2 |
|
3 import hashlib |
|
4 import struct |
|
5 |
|
6 from mercurial.node import hex, nullid |
|
7 from mercurial import ( |
|
8 pycompat, |
|
9 util, |
|
10 ) |
|
11 from . import ( |
|
12 basepack, |
|
13 constants, |
|
14 shallowutil, |
|
15 ) |
|
16 |
|
17 # (filename hash, offset, size) |
|
18 INDEXFORMAT0 = '!20sQQ' |
|
19 INDEXENTRYLENGTH0 = struct.calcsize(INDEXFORMAT0) |
|
20 INDEXFORMAT1 = '!20sQQII' |
|
21 INDEXENTRYLENGTH1 = struct.calcsize(INDEXFORMAT1) |
|
22 NODELENGTH = 20 |
|
23 |
|
24 NODEINDEXFORMAT = '!20sQ' |
|
25 NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT) |
|
26 |
|
27 # (node, p1, p2, linknode) |
|
28 PACKFORMAT = "!20s20s20s20sH" |
|
29 PACKENTRYLENGTH = 82 |
|
30 |
|
31 ENTRYCOUNTSIZE = 4 |
|
32 |
|
33 INDEXSUFFIX = '.histidx' |
|
34 PACKSUFFIX = '.histpack' |
|
35 |
|
36 ANC_NODE = 0 |
|
37 ANC_P1NODE = 1 |
|
38 ANC_P2NODE = 2 |
|
39 ANC_LINKNODE = 3 |
|
40 ANC_COPYFROM = 4 |
|
41 |
|
42 class historypackstore(basepack.basepackstore): |
|
43 INDEXSUFFIX = INDEXSUFFIX |
|
44 PACKSUFFIX = PACKSUFFIX |
|
45 |
|
46 def getpack(self, path): |
|
47 return historypack(path) |
|
48 |
|
49 def getancestors(self, name, node, known=None): |
|
50 for pack in self.packs: |
|
51 try: |
|
52 return pack.getancestors(name, node, known=known) |
|
53 except KeyError: |
|
54 pass |
|
55 |
|
56 for pack in self.refresh(): |
|
57 try: |
|
58 return pack.getancestors(name, node, known=known) |
|
59 except KeyError: |
|
60 pass |
|
61 |
|
62 raise KeyError((name, node)) |
|
63 |
|
64 def getnodeinfo(self, name, node): |
|
65 for pack in self.packs: |
|
66 try: |
|
67 return pack.getnodeinfo(name, node) |
|
68 except KeyError: |
|
69 pass |
|
70 |
|
71 for pack in self.refresh(): |
|
72 try: |
|
73 return pack.getnodeinfo(name, node) |
|
74 except KeyError: |
|
75 pass |
|
76 |
|
77 raise KeyError((name, node)) |
|
78 |
|
79 def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
80 raise RuntimeError("cannot add to historypackstore (%s:%s)" |
|
81 % (filename, hex(node))) |
|
82 |
|
83 class historypack(basepack.basepack): |
|
84 INDEXSUFFIX = INDEXSUFFIX |
|
85 PACKSUFFIX = PACKSUFFIX |
|
86 |
|
87 SUPPORTED_VERSIONS = [0, 1] |
|
88 |
|
89 def __init__(self, path): |
|
90 super(historypack, self).__init__(path) |
|
91 |
|
92 if self.VERSION == 0: |
|
93 self.INDEXFORMAT = INDEXFORMAT0 |
|
94 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH0 |
|
95 else: |
|
96 self.INDEXFORMAT = INDEXFORMAT1 |
|
97 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH1 |
|
98 |
|
99 def getmissing(self, keys): |
|
100 missing = [] |
|
101 for name, node in keys: |
|
102 try: |
|
103 self._findnode(name, node) |
|
104 except KeyError: |
|
105 missing.append((name, node)) |
|
106 |
|
107 return missing |
|
108 |
|
109 def getancestors(self, name, node, known=None): |
|
110 """Returns as many ancestors as we're aware of. |
|
111 |
|
112 return value: { |
|
113 node: (p1, p2, linknode, copyfrom), |
|
114 ... |
|
115 } |
|
116 """ |
|
117 if known and node in known: |
|
118 return [] |
|
119 |
|
120 ancestors = self._getancestors(name, node, known=known) |
|
121 results = {} |
|
122 for ancnode, p1, p2, linknode, copyfrom in ancestors: |
|
123 results[ancnode] = (p1, p2, linknode, copyfrom) |
|
124 |
|
125 if not results: |
|
126 raise KeyError((name, node)) |
|
127 return results |
|
128 |
|
129 def getnodeinfo(self, name, node): |
|
130 # Drop the node from the tuple before returning, since the result should |
|
131 # just be (p1, p2, linknode, copyfrom) |
|
132 return self._findnode(name, node)[1:] |
|
133 |
|
134 def _getancestors(self, name, node, known=None): |
|
135 if known is None: |
|
136 known = set() |
|
137 section = self._findsection(name) |
|
138 filename, offset, size, nodeindexoffset, nodeindexsize = section |
|
139 pending = set((node,)) |
|
140 o = 0 |
|
141 while o < size: |
|
142 if not pending: |
|
143 break |
|
144 entry, copyfrom = self._readentry(offset + o) |
|
145 o += PACKENTRYLENGTH |
|
146 if copyfrom: |
|
147 o += len(copyfrom) |
|
148 |
|
149 ancnode = entry[ANC_NODE] |
|
150 if ancnode in pending: |
|
151 pending.remove(ancnode) |
|
152 p1node = entry[ANC_P1NODE] |
|
153 p2node = entry[ANC_P2NODE] |
|
154 if p1node != nullid and p1node not in known: |
|
155 pending.add(p1node) |
|
156 if p2node != nullid and p2node not in known: |
|
157 pending.add(p2node) |
|
158 |
|
159 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom) |
|
160 |
|
161 def _readentry(self, offset): |
|
162 data = self._data |
|
163 entry = struct.unpack(PACKFORMAT, data[offset:offset + PACKENTRYLENGTH]) |
|
164 copyfrom = None |
|
165 copyfromlen = entry[ANC_COPYFROM] |
|
166 if copyfromlen != 0: |
|
167 offset += PACKENTRYLENGTH |
|
168 copyfrom = data[offset:offset + copyfromlen] |
|
169 return entry, copyfrom |
|
170 |
|
171 def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
172 raise RuntimeError("cannot add to historypack (%s:%s)" % |
|
173 (filename, hex(node))) |
|
174 |
|
175 def _findnode(self, name, node): |
|
176 if self.VERSION == 0: |
|
177 ancestors = self._getancestors(name, node) |
|
178 for ancnode, p1node, p2node, linknode, copyfrom in ancestors: |
|
179 if ancnode == node: |
|
180 return (ancnode, p1node, p2node, linknode, copyfrom) |
|
181 else: |
|
182 section = self._findsection(name) |
|
183 nodeindexoffset, nodeindexsize = section[3:] |
|
184 entry = self._bisect(node, nodeindexoffset, |
|
185 nodeindexoffset + nodeindexsize, |
|
186 NODEINDEXENTRYLENGTH) |
|
187 if entry is not None: |
|
188 node, offset = struct.unpack(NODEINDEXFORMAT, entry) |
|
189 entry, copyfrom = self._readentry(offset) |
|
190 # Drop the copyfromlen from the end of entry, and replace it |
|
191 # with the copyfrom string. |
|
192 return entry[:4] + (copyfrom,) |
|
193 |
|
194 raise KeyError("unable to find history for %s:%s" % (name, hex(node))) |
|
195 |
|
196 def _findsection(self, name): |
|
197 params = self.params |
|
198 namehash = hashlib.sha1(name).digest() |
|
199 fanoutkey = struct.unpack(params.fanoutstruct, |
|
200 namehash[:params.fanoutprefix])[0] |
|
201 fanout = self._fanouttable |
|
202 |
|
203 start = fanout[fanoutkey] + params.indexstart |
|
204 indexend = self._indexend |
|
205 |
|
206 for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount): |
|
207 end = fanout[i] + params.indexstart |
|
208 if end != start: |
|
209 break |
|
210 else: |
|
211 end = indexend |
|
212 |
|
213 entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH) |
|
214 if not entry: |
|
215 raise KeyError(name) |
|
216 |
|
217 rawentry = struct.unpack(self.INDEXFORMAT, entry) |
|
218 if self.VERSION == 0: |
|
219 x, offset, size = rawentry |
|
220 nodeindexoffset = None |
|
221 nodeindexsize = None |
|
222 else: |
|
223 x, offset, size, nodeindexoffset, nodeindexsize = rawentry |
|
224 rawnamelen = self._index[nodeindexoffset:nodeindexoffset + |
|
225 constants.FILENAMESIZE] |
|
226 actualnamelen = struct.unpack('!H', rawnamelen)[0] |
|
227 nodeindexoffset += constants.FILENAMESIZE |
|
228 actualname = self._index[nodeindexoffset:nodeindexoffset + |
|
229 actualnamelen] |
|
230 if actualname != name: |
|
231 raise KeyError("found file name %s when looking for %s" % |
|
232 (actualname, name)) |
|
233 nodeindexoffset += actualnamelen |
|
234 |
|
235 filenamelength = struct.unpack('!H', self._data[offset:offset + |
|
236 constants.FILENAMESIZE])[0] |
|
237 offset += constants.FILENAMESIZE |
|
238 |
|
239 actualname = self._data[offset:offset + filenamelength] |
|
240 offset += filenamelength |
|
241 |
|
242 if name != actualname: |
|
243 raise KeyError("found file name %s when looking for %s" % |
|
244 (actualname, name)) |
|
245 |
|
246 # Skip entry list size |
|
247 offset += ENTRYCOUNTSIZE |
|
248 |
|
249 nodelistoffset = offset |
|
250 nodelistsize = (size - constants.FILENAMESIZE - filenamelength - |
|
251 ENTRYCOUNTSIZE) |
|
252 return (name, nodelistoffset, nodelistsize, |
|
253 nodeindexoffset, nodeindexsize) |
|
254 |
|
255 def _bisect(self, node, start, end, entrylen): |
|
256 # Bisect between start and end to find node |
|
257 origstart = start |
|
258 startnode = self._index[start:start + NODELENGTH] |
|
259 endnode = self._index[end:end + NODELENGTH] |
|
260 |
|
261 if startnode == node: |
|
262 return self._index[start:start + entrylen] |
|
263 elif endnode == node: |
|
264 return self._index[end:end + entrylen] |
|
265 else: |
|
266 while start < end - entrylen: |
|
267 mid = start + (end - start) / 2 |
|
268 mid = mid - ((mid - origstart) % entrylen) |
|
269 midnode = self._index[mid:mid + NODELENGTH] |
|
270 if midnode == node: |
|
271 return self._index[mid:mid + entrylen] |
|
272 if node > midnode: |
|
273 start = mid |
|
274 startnode = midnode |
|
275 elif node < midnode: |
|
276 end = mid |
|
277 endnode = midnode |
|
278 return None |
|
279 |
|
280 def markledger(self, ledger, options=None): |
|
281 for filename, node in self: |
|
282 ledger.markhistoryentry(self, filename, node) |
|
283 |
|
284 def cleanup(self, ledger): |
|
285 entries = ledger.sources.get(self, []) |
|
286 allkeys = set(self) |
|
287 repackedkeys = set((e.filename, e.node) for e in entries if |
|
288 e.historyrepacked) |
|
289 |
|
290 if len(allkeys - repackedkeys) == 0: |
|
291 if self.path not in ledger.created: |
|
292 util.unlinkpath(self.indexpath, ignoremissing=True) |
|
293 util.unlinkpath(self.packpath, ignoremissing=True) |
|
294 |
|
295 def __iter__(self): |
|
296 for f, n, x, x, x, x in self.iterentries(): |
|
297 yield f, n |
|
298 |
|
299 def iterentries(self): |
|
300 # Start at 1 to skip the header |
|
301 offset = 1 |
|
302 while offset < self.datasize: |
|
303 data = self._data |
|
304 # <2 byte len> + <filename> |
|
305 filenamelen = struct.unpack('!H', data[offset:offset + |
|
306 constants.FILENAMESIZE])[0] |
|
307 offset += constants.FILENAMESIZE |
|
308 filename = data[offset:offset + filenamelen] |
|
309 offset += filenamelen |
|
310 |
|
311 revcount = struct.unpack('!I', data[offset:offset + |
|
312 ENTRYCOUNTSIZE])[0] |
|
313 offset += ENTRYCOUNTSIZE |
|
314 |
|
315 for i in pycompat.xrange(revcount): |
|
316 entry = struct.unpack(PACKFORMAT, data[offset:offset + |
|
317 PACKENTRYLENGTH]) |
|
318 offset += PACKENTRYLENGTH |
|
319 |
|
320 copyfrom = data[offset:offset + entry[ANC_COPYFROM]] |
|
321 offset += entry[ANC_COPYFROM] |
|
322 |
|
323 yield (filename, entry[ANC_NODE], entry[ANC_P1NODE], |
|
324 entry[ANC_P2NODE], entry[ANC_LINKNODE], copyfrom) |
|
325 |
|
326 self._pagedin += PACKENTRYLENGTH |
|
327 |
|
328 # If we've read a lot of data from the mmap, free some memory. |
|
329 self.freememory() |
|
330 |
|
331 class mutablehistorypack(basepack.mutablebasepack): |
|
332 """A class for constructing and serializing a histpack file and index. |
|
333 |
|
334 A history pack is a pair of files that contain the revision history for |
|
335 various file revisions in Mercurial. It contains only revision history (like |
|
336 parent pointers and linknodes), not any revision content information. |
|
337 |
|
338 It consists of two files, with the following format: |
|
339 |
|
340 .histpack |
|
341 The pack itself is a series of file revisions with some basic header |
|
342 information on each. |
|
343 |
|
344 datapack = <version: 1 byte> |
|
345 [<filesection>,...] |
|
346 filesection = <filename len: 2 byte unsigned int> |
|
347 <filename> |
|
348 <revision count: 4 byte unsigned int> |
|
349 [<revision>,...] |
|
350 revision = <node: 20 byte> |
|
351 <p1node: 20 byte> |
|
352 <p2node: 20 byte> |
|
353 <linknode: 20 byte> |
|
354 <copyfromlen: 2 byte> |
|
355 <copyfrom> |
|
356 |
|
357 The revisions within each filesection are stored in topological order |
|
358 (newest first). If a given entry has a parent from another file (a copy) |
|
359 then p1node is the node from the other file, and copyfrom is the |
|
360 filepath of the other file. |
|
361 |
|
362 .histidx |
|
363 The index file provides a mapping from filename to the file section in |
|
364 the histpack. In V1 it also contains sub-indexes for specific nodes |
|
365 within each file. It consists of three parts, the fanout, the file index |
|
366 and the node indexes. |
|
367 |
|
368 The file index is a list of index entries, sorted by filename hash (one |
|
369 per file section in the pack). Each entry has: |
|
370 |
|
371 - node (The 20 byte hash of the filename) |
|
372 - pack entry offset (The location of this file section in the histpack) |
|
373 - pack content size (The on-disk length of this file section's pack |
|
374 data) |
|
375 - node index offset (The location of the file's node index in the index |
|
376 file) [1] |
|
377 - node index size (the on-disk length of this file's node index) [1] |
|
378 |
|
379 The fanout is a quick lookup table to reduce the number of steps for |
|
380 bisecting the index. It is a series of 4 byte pointers to positions |
|
381 within the index. It has 2^16 entries, which corresponds to hash |
|
382 prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F |
|
383 points to the index position of the first revision whose node starts |
|
384 with 4F. This saves log(2^16) bisect steps. |
|
385 |
|
386 dataidx = <fanouttable> |
|
387 <file count: 8 byte unsigned> [1] |
|
388 <fileindex> |
|
389 <node count: 8 byte unsigned> [1] |
|
390 [<nodeindex>,...] [1] |
|
391 fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries) |
|
392 |
|
393 fileindex = [<file index entry>,...] |
|
394 fileindexentry = <node: 20 byte> |
|
395 <pack file section offset: 8 byte unsigned int> |
|
396 <pack file section size: 8 byte unsigned int> |
|
397 <node index offset: 4 byte unsigned int> [1] |
|
398 <node index size: 4 byte unsigned int> [1] |
|
399 nodeindex = <filename>[<node index entry>,...] [1] |
|
400 filename = <filename len : 2 byte unsigned int><filename value> [1] |
|
401 nodeindexentry = <node: 20 byte> [1] |
|
402 <pack file node offset: 8 byte unsigned int> [1] |
|
403 |
|
404 [1]: new in version 1. |
|
405 """ |
|
406 INDEXSUFFIX = INDEXSUFFIX |
|
407 PACKSUFFIX = PACKSUFFIX |
|
408 |
|
409 SUPPORTED_VERSIONS = [0, 1] |
|
410 |
|
411 def __init__(self, ui, packpath, version=0): |
|
412 # internal config: remotefilelog.historypackv1 |
|
413 if version == 0 and ui.configbool('remotefilelog', 'historypackv1'): |
|
414 version = 1 |
|
415 |
|
416 super(mutablehistorypack, self).__init__(ui, packpath, version=version) |
|
417 self.files = {} |
|
418 self.entrylocations = {} |
|
419 self.fileentries = {} |
|
420 |
|
421 if version == 0: |
|
422 self.INDEXFORMAT = INDEXFORMAT0 |
|
423 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH0 |
|
424 else: |
|
425 self.INDEXFORMAT = INDEXFORMAT1 |
|
426 self.INDEXENTRYLENGTH = INDEXENTRYLENGTH1 |
|
427 |
|
428 self.NODEINDEXFORMAT = NODEINDEXFORMAT |
|
429 self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH |
|
430 |
|
431 def add(self, filename, node, p1, p2, linknode, copyfrom): |
|
432 copyfrom = copyfrom or '' |
|
433 copyfromlen = struct.pack('!H', len(copyfrom)) |
|
434 self.fileentries.setdefault(filename, []).append((node, p1, p2, |
|
435 linknode, |
|
436 copyfromlen, |
|
437 copyfrom)) |
|
438 |
|
439 def _write(self): |
|
440 for filename in sorted(self.fileentries): |
|
441 entries = self.fileentries[filename] |
|
442 sectionstart = self.packfp.tell() |
|
443 |
|
444 # Write the file section content |
|
445 entrymap = dict((e[0], e) for e in entries) |
|
446 def parentfunc(node): |
|
447 x, p1, p2, x, x, x = entrymap[node] |
|
448 parents = [] |
|
449 if p1 != nullid: |
|
450 parents.append(p1) |
|
451 if p2 != nullid: |
|
452 parents.append(p2) |
|
453 return parents |
|
454 |
|
455 sortednodes = list(reversed(shallowutil.sortnodes( |
|
456 (e[0] for e in entries), |
|
457 parentfunc))) |
|
458 |
|
459 # Write the file section header |
|
460 self.writeraw("%s%s%s" % ( |
|
461 struct.pack('!H', len(filename)), |
|
462 filename, |
|
463 struct.pack('!I', len(sortednodes)), |
|
464 )) |
|
465 |
|
466 sectionlen = constants.FILENAMESIZE + len(filename) + 4 |
|
467 |
|
468 rawstrings = [] |
|
469 |
|
470 # Record the node locations for the index |
|
471 locations = self.entrylocations.setdefault(filename, {}) |
|
472 offset = sectionstart + sectionlen |
|
473 for node in sortednodes: |
|
474 locations[node] = offset |
|
475 raw = '%s%s%s%s%s%s' % entrymap[node] |
|
476 rawstrings.append(raw) |
|
477 offset += len(raw) |
|
478 |
|
479 rawdata = ''.join(rawstrings) |
|
480 sectionlen += len(rawdata) |
|
481 |
|
482 self.writeraw(rawdata) |
|
483 |
|
484 # Record metadata for the index |
|
485 self.files[filename] = (sectionstart, sectionlen) |
|
486 node = hashlib.sha1(filename).digest() |
|
487 self.entries[node] = node |
|
488 |
|
489 def close(self, ledger=None): |
|
490 if self._closed: |
|
491 return |
|
492 |
|
493 self._write() |
|
494 |
|
495 return super(mutablehistorypack, self).close(ledger=ledger) |
|
496 |
|
497 def createindex(self, nodelocations, indexoffset): |
|
498 fileindexformat = self.INDEXFORMAT |
|
499 fileindexlength = self.INDEXENTRYLENGTH |
|
500 nodeindexformat = self.NODEINDEXFORMAT |
|
501 nodeindexlength = self.NODEINDEXENTRYLENGTH |
|
502 version = self.VERSION |
|
503 |
|
504 files = ((hashlib.sha1(filename).digest(), filename, offset, size) |
|
505 for filename, (offset, size) in self.files.iteritems()) |
|
506 files = sorted(files) |
|
507 |
|
508 # node index is after file index size, file index, and node index size |
|
509 indexlensize = struct.calcsize('!Q') |
|
510 nodeindexoffset = (indexoffset + indexlensize + |
|
511 (len(files) * fileindexlength) + indexlensize) |
|
512 |
|
513 fileindexentries = [] |
|
514 nodeindexentries = [] |
|
515 nodecount = 0 |
|
516 for namehash, filename, offset, size in files: |
|
517 # File section index |
|
518 if version == 0: |
|
519 rawentry = struct.pack(fileindexformat, namehash, offset, size) |
|
520 else: |
|
521 nodelocations = self.entrylocations[filename] |
|
522 |
|
523 nodeindexsize = len(nodelocations) * nodeindexlength |
|
524 |
|
525 rawentry = struct.pack(fileindexformat, namehash, offset, size, |
|
526 nodeindexoffset, nodeindexsize) |
|
527 # Node index |
|
528 nodeindexentries.append(struct.pack(constants.FILENAMESTRUCT, |
|
529 len(filename)) + filename) |
|
530 nodeindexoffset += constants.FILENAMESIZE + len(filename) |
|
531 |
|
532 for node, location in sorted(nodelocations.iteritems()): |
|
533 nodeindexentries.append(struct.pack(nodeindexformat, node, |
|
534 location)) |
|
535 nodecount += 1 |
|
536 |
|
537 nodeindexoffset += len(nodelocations) * nodeindexlength |
|
538 |
|
539 fileindexentries.append(rawentry) |
|
540 |
|
541 nodecountraw = '' |
|
542 if version == 1: |
|
543 nodecountraw = struct.pack('!Q', nodecount) |
|
544 return (''.join(fileindexentries) + nodecountraw + |
|
545 ''.join(nodeindexentries)) |