13 from node import nullid, bin, hex, short |
13 from node import nullid, bin, hex, short |
14 from i18n import _ |
14 from i18n import _ |
15 import util |
15 import util |
16 import encoding |
16 import encoding |
17 import error |
17 import error |
|
18 from array import array |
18 import errno |
19 import errno |
19 import time |
20 import time |
20 |
21 |
21 # The tags cache stores information about heads and the history of tags. |
22 # Tags computation can be expensive and caches exist to make it fast in |
|
23 # the common case. |
|
24 # |
|
25 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for |
|
26 # each revision in the repository. The file is effectively an array of |
|
27 # fixed length records. Read the docs for "hgtagsfnodescache" for technical |
|
28 # details. |
|
29 # |
|
30 # The .hgtags filenode cache grows in proportion to the length of the |
|
31 # changelog. The file is truncated when the # changelog is stripped. |
|
32 # |
|
33 # The purpose of the filenode cache is to avoid the most expensive part |
|
34 # of finding global tags, which is looking up the .hgtags filenode in the |
|
35 # manifest for each head. This can take dozens or over 100ms for |
|
36 # repositories with very large manifests. Multiplied by dozens or even |
|
37 # hundreds of heads and there is a significant performance concern. |
|
38 # |
|
39 # The "tags" cache stores information about heads and the history of tags. |
22 # |
40 # |
23 # The cache file consists of two parts. The first part maps head nodes |
41 # The cache file consists of two parts. The first part maps head nodes |
24 # to .hgtags filenodes. The second part is a history of tags. The two |
42 # to .hgtags filenodes. The second part is a history of tags. The two |
25 # parts are separated by an empty line. |
43 # parts are separated by an empty line. |
|
44 # |
|
45 # The filenodes part of "tags" has effectively been superseded by |
|
46 # "hgtagsfnodes1." It is being kept around for backwards compatbility. |
26 # |
47 # |
27 # The first part consists of lines of the form: |
48 # The first part consists of lines of the form: |
28 # |
49 # |
29 # <headrev> <headnode> [<hgtagsnode>] |
50 # <headrev> <headnode> [<hgtagsnode>] |
30 # |
51 # |
322 |
338 |
323 # Now we have to lookup the .hgtags filenode for every new head. |
339 # Now we have to lookup the .hgtags filenode for every new head. |
324 # This is the most expensive part of finding tags, so performance |
340 # This is the most expensive part of finding tags, so performance |
325 # depends primarily on the size of newheads. Worst case: no cache |
341 # depends primarily on the size of newheads. Worst case: no cache |
326 # file, so newheads == repoheads. |
342 # file, so newheads == repoheads. |
|
343 fnodescache = hgtagsfnodescache(repo.unfiltered()) |
327 for head in reversed(newheads): |
344 for head in reversed(newheads): |
328 cctx = repo[head] |
345 fnode = fnodescache.getfnode(head) |
329 try: |
346 if fnode != nullid: |
330 fnode = cctx.filenode('.hgtags') |
|
331 cachefnode[head] = fnode |
347 cachefnode[head] = fnode |
332 except error.LookupError: |
348 |
333 # no .hgtags file on this head |
349 fnodescache.write() |
334 pass |
|
335 |
350 |
336 duration = time.time() - starttime |
351 duration = time.time() - starttime |
337 ui.log('tagscache', |
352 ui.log('tagscache', |
338 'resolved %d tags cache entries from %d manifests in %0.4f ' |
353 '%d/%d cache hits/lookups in %0.4f ' |
339 'seconds\n', |
354 'seconds\n', |
340 len(cachefnode), len(newheads), duration) |
355 fnodescache.hitcount, fnodescache.lookupcount, duration) |
341 |
356 |
342 # Caller has to iterate over all heads, but can use the filenodes in |
357 # Caller has to iterate over all heads, but can use the filenodes in |
343 # cachefnode to get to each .hgtags revision quickly. |
358 # cachefnode to get to each .hgtags revision quickly. |
344 return (repoheads, cachefnode, None, True) |
359 return (repoheads, cachefnode, None, True) |
345 |
360 |
386 |
401 |
387 try: |
402 try: |
388 cachefile.close() |
403 cachefile.close() |
389 except (OSError, IOError): |
404 except (OSError, IOError): |
390 pass |
405 pass |
|
406 |
|
407 _fnodescachefile = 'cache/hgtagsfnodes1' |
|
408 _fnodesrecsize = 4 + 20 # changeset fragment + filenode |
|
409 _fnodesmissingrec = '\xff' * 24 |
|
410 |
|
411 class hgtagsfnodescache(object): |
|
412 """Persistent cache mapping revisions to .hgtags filenodes. |
|
413 |
|
414 The cache is an array of records. Each item in the array corresponds to |
|
415 a changelog revision. Values in the array contain the first 4 bytes of |
|
416 the node hash and the 20 bytes .hgtags filenode for that revision. |
|
417 |
|
418 The first 4 bytes are present as a form of verification. Repository |
|
419 stripping and rewriting may change the node at a numeric revision in the |
|
420 changelog. The changeset fragment serves as a verifier to detect |
|
421 rewriting. This logic is shared with the rev branch cache (see |
|
422 branchmap.py). |
|
423 |
|
424 The instance holds in memory the full cache content but entries are |
|
425 only parsed on read. |
|
426 |
|
427 Instances behave like lists. ``c[i]`` works where i is a rev or |
|
428 changeset node. Missing indexes are populated automatically on access. |
|
429 """ |
|
430 def __init__(self, repo): |
|
431 assert repo.filtername is None |
|
432 |
|
433 self._repo = repo |
|
434 |
|
435 # Only for reporting purposes. |
|
436 self.lookupcount = 0 |
|
437 self.hitcount = 0 |
|
438 |
|
439 self._raw = array('c') |
|
440 |
|
441 data = repo.vfs.tryread(_fnodescachefile) |
|
442 self._raw.fromstring(data) |
|
443 |
|
444 # The end state of self._raw is an array that is of the exact length |
|
445 # required to hold a record for every revision in the repository. |
|
446 # We truncate or extend the array as necessary. self._dirtyoffset is |
|
447 # defined to be the start offset at which we need to write the output |
|
448 # file. This offset is also adjusted when new entries are calculated |
|
449 # for array members. |
|
450 cllen = len(repo.changelog) |
|
451 wantedlen = cllen * _fnodesrecsize |
|
452 rawlen = len(self._raw) |
|
453 |
|
454 self._dirtyoffset = None |
|
455 |
|
456 if rawlen < wantedlen: |
|
457 self._dirtyoffset = rawlen |
|
458 self._raw.extend('\xff' * (wantedlen - rawlen)) |
|
459 elif rawlen > wantedlen: |
|
460 # There's no easy way to truncate array instances. This seems |
|
461 # slightly less evil than copying a potentially large array slice. |
|
462 for i in range(rawlen - wantedlen): |
|
463 self._raw.pop() |
|
464 self._dirtyoffset = len(self._raw) |
|
465 |
|
466 def getfnode(self, node): |
|
467 """Obtain the filenode of the .hgtags file at a specified revision. |
|
468 |
|
469 If the value is in the cache, the entry will be validated and returned. |
|
470 Otherwise, the filenode will be computed and returned. |
|
471 |
|
472 If an .hgtags does not exist at the specified revision, nullid is |
|
473 returned. |
|
474 """ |
|
475 ctx = self._repo[node] |
|
476 rev = ctx.rev() |
|
477 |
|
478 self.lookupcount += 1 |
|
479 |
|
480 offset = rev * _fnodesrecsize |
|
481 record = self._raw[offset:offset + _fnodesrecsize].tostring() |
|
482 properprefix = node[0:4] |
|
483 |
|
484 # Validate and return existing entry. |
|
485 if record != _fnodesmissingrec: |
|
486 fileprefix = record[0:4] |
|
487 |
|
488 if fileprefix == properprefix: |
|
489 self.hitcount += 1 |
|
490 return record[4:] |
|
491 |
|
492 # Fall through. |
|
493 |
|
494 # If we get here, the entry is either missing or invalid. Populate it. |
|
495 try: |
|
496 fnode = ctx.filenode('.hgtags') |
|
497 except error.LookupError: |
|
498 # No .hgtags file on this revision. |
|
499 fnode = nullid |
|
500 |
|
501 # Slices on array instances only accept other array. |
|
502 entry = array('c', properprefix + fnode) |
|
503 self._raw[offset:offset + _fnodesrecsize] = entry |
|
504 # self._dirtyoffset could be None. |
|
505 self._dirtyoffset = min(self._dirtyoffset, offset) or 0 |
|
506 |
|
507 return fnode |
|
508 |
|
509 def write(self): |
|
510 """Perform all necessary writes to cache file. |
|
511 |
|
512 This may no-op if no writes are needed or if a write lock could |
|
513 not be obtained. |
|
514 """ |
|
515 if self._dirtyoffset is None: |
|
516 return |
|
517 |
|
518 data = self._raw[self._dirtyoffset:] |
|
519 if not data: |
|
520 return |
|
521 |
|
522 repo = self._repo |
|
523 |
|
524 try: |
|
525 lock = repo.wlock(wait=False) |
|
526 except error.LockHeld: |
|
527 repo.ui.log('tagscache', |
|
528 'not writing .hg/%s because lock held\n' % |
|
529 (_fnodescachefile)) |
|
530 return |
|
531 |
|
532 try: |
|
533 try: |
|
534 f = repo.vfs.open(_fnodescachefile, 'ab') |
|
535 try: |
|
536 # if the file has been truncated |
|
537 actualoffset = f.tell() |
|
538 if actualoffset < self._dirtyoffset: |
|
539 self._dirtyoffset = actualoffset |
|
540 data = self._raw[self._dirtyoffset:] |
|
541 f.seek(self._dirtyoffset) |
|
542 f.truncate() |
|
543 repo.ui.log('tagscache', |
|
544 'writing %d bytes to %s\n' % ( |
|
545 len(data), _fnodescachefile)) |
|
546 f.write(data) |
|
547 self._dirtyoffset = None |
|
548 finally: |
|
549 f.close() |
|
550 except (IOError, OSError), inst: |
|
551 repo.ui.log('tagscache', |
|
552 "couldn't write %s: %s\n" % ( |
|
553 _fnodescachefile, inst)) |
|
554 finally: |
|
555 lock.release() |