mercurial/tags.py
changeset 24735 07200e3332a1
parent 24532 f5de208a635c
child 24737 b061a2049662
equal deleted inserted replaced
24734:fb6cb1b82f4f 24735:07200e3332a1
    13 from node import nullid, bin, hex, short
    13 from node import nullid, bin, hex, short
    14 from i18n import _
    14 from i18n import _
    15 import util
    15 import util
    16 import encoding
    16 import encoding
    17 import error
    17 import error
       
    18 from array import array
    18 import errno
    19 import errno
    19 import time
    20 import time
    20 
    21 
    21 # The tags cache stores information about heads and the history of tags.
    22 # Tags computation can be expensive and caches exist to make it fast in
       
    23 # the common case.
       
    24 #
       
    25 # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
       
    26 # each revision in the repository. The file is effectively an array of
       
    27 # fixed length records. Read the docs for "hgtagsfnodescache" for technical
       
    28 # details.
       
    29 #
       
    30 # The .hgtags filenode cache grows in proportion to the length of the
       
    31 # changelog. The file is truncated when the # changelog is stripped.
       
    32 #
       
    33 # The purpose of the filenode cache is to avoid the most expensive part
       
    34 # of finding global tags, which is looking up the .hgtags filenode in the
       
    35 # manifest for each head. This can take dozens or over 100ms for
       
    36 # repositories with very large manifests. Multiplied by dozens or even
       
    37 # hundreds of heads and there is a significant performance concern.
       
    38 #
       
    39 # The "tags" cache stores information about heads and the history of tags.
    22 #
    40 #
    23 # The cache file consists of two parts. The first part maps head nodes
    41 # The cache file consists of two parts. The first part maps head nodes
    24 # to .hgtags filenodes. The second part is a history of tags. The two
    42 # to .hgtags filenodes. The second part is a history of tags. The two
    25 # parts are separated by an empty line.
    43 # parts are separated by an empty line.
       
    44 #
       
    45 # The filenodes part of "tags" has effectively been superseded by
       
    46 # "hgtagsfnodes1." It is being kept around for backwards compatbility.
    26 #
    47 #
    27 # The first part consists of lines of the form:
    48 # The first part consists of lines of the form:
    28 #
    49 #
    29 #   <headrev> <headnode> [<hgtagsnode>]
    50 #   <headrev> <headnode> [<hgtagsnode>]
    30 #
    51 #
    37 #
    58 #
    38 # The filenode cache is ordered from tip to oldest (which is part of why
    59 # The filenode cache is ordered from tip to oldest (which is part of why
    39 # <headrev> is there: a quick check of the tip from when the cache was
    60 # <headrev> is there: a quick check of the tip from when the cache was
    40 # written against the current tip is all that is needed to check whether
    61 # written against the current tip is all that is needed to check whether
    41 # the cache is up to date).
    62 # the cache is up to date).
    42 #
       
    43 # The purpose of the filenode cache is to avoid the most expensive part
       
    44 # of finding global tags, which is looking up the .hgtags filenode in the
       
    45 # manifest for each head. This can take over a minute on repositories
       
    46 # that have large manifests and many heads.
       
    47 #
    63 #
    48 # The second part of the tags cache consists of lines of the form:
    64 # The second part of the tags cache consists of lines of the form:
    49 #
    65 #
    50 #   <node> <tag>
    66 #   <node> <tag>
    51 #
    67 #
   322 
   338 
   323     # Now we have to lookup the .hgtags filenode for every new head.
   339     # Now we have to lookup the .hgtags filenode for every new head.
   324     # This is the most expensive part of finding tags, so performance
   340     # This is the most expensive part of finding tags, so performance
   325     # depends primarily on the size of newheads.  Worst case: no cache
   341     # depends primarily on the size of newheads.  Worst case: no cache
   326     # file, so newheads == repoheads.
   342     # file, so newheads == repoheads.
       
   343     fnodescache = hgtagsfnodescache(repo.unfiltered())
   327     for head in reversed(newheads):
   344     for head in reversed(newheads):
   328         cctx = repo[head]
   345         fnode = fnodescache.getfnode(head)
   329         try:
   346         if fnode != nullid:
   330             fnode = cctx.filenode('.hgtags')
       
   331             cachefnode[head] = fnode
   347             cachefnode[head] = fnode
   332         except error.LookupError:
   348 
   333             # no .hgtags file on this head
   349     fnodescache.write()
   334             pass
       
   335 
   350 
   336     duration = time.time() - starttime
   351     duration = time.time() - starttime
   337     ui.log('tagscache',
   352     ui.log('tagscache',
   338            'resolved %d tags cache entries from %d manifests in %0.4f '
   353            '%d/%d cache hits/lookups in %0.4f '
   339            'seconds\n',
   354            'seconds\n',
   340            len(cachefnode), len(newheads), duration)
   355            fnodescache.hitcount, fnodescache.lookupcount, duration)
   341 
   356 
   342     # Caller has to iterate over all heads, but can use the filenodes in
   357     # Caller has to iterate over all heads, but can use the filenodes in
   343     # cachefnode to get to each .hgtags revision quickly.
   358     # cachefnode to get to each .hgtags revision quickly.
   344     return (repoheads, cachefnode, None, True)
   359     return (repoheads, cachefnode, None, True)
   345 
   360 
   386 
   401 
   387     try:
   402     try:
   388         cachefile.close()
   403         cachefile.close()
   389     except (OSError, IOError):
   404     except (OSError, IOError):
   390         pass
   405         pass
       
   406 
       
   407 _fnodescachefile = 'cache/hgtagsfnodes1'
       
   408 _fnodesrecsize = 4 + 20 # changeset fragment + filenode
       
   409 _fnodesmissingrec = '\xff' * 24
       
   410 
       
   411 class hgtagsfnodescache(object):
       
   412     """Persistent cache mapping revisions to .hgtags filenodes.
       
   413 
       
   414     The cache is an array of records. Each item in the array corresponds to
       
   415     a changelog revision. Values in the array contain the first 4 bytes of
       
   416     the node hash and the 20 bytes .hgtags filenode for that revision.
       
   417 
       
   418     The first 4 bytes are present as a form of verification. Repository
       
   419     stripping and rewriting may change the node at a numeric revision in the
       
   420     changelog. The changeset fragment serves as a verifier to detect
       
   421     rewriting. This logic is shared with the rev branch cache (see
       
   422     branchmap.py).
       
   423 
       
   424     The instance holds in memory the full cache content but entries are
       
   425     only parsed on read.
       
   426 
       
   427     Instances behave like lists. ``c[i]`` works where i is a rev or
       
   428     changeset node. Missing indexes are populated automatically on access.
       
   429     """
       
   430     def __init__(self, repo):
       
   431         assert repo.filtername is None
       
   432 
       
   433         self._repo = repo
       
   434 
       
   435         # Only for reporting purposes.
       
   436         self.lookupcount = 0
       
   437         self.hitcount = 0
       
   438 
       
   439         self._raw = array('c')
       
   440 
       
   441         data = repo.vfs.tryread(_fnodescachefile)
       
   442         self._raw.fromstring(data)
       
   443 
       
   444         # The end state of self._raw is an array that is of the exact length
       
   445         # required to hold a record for every revision in the repository.
       
   446         # We truncate or extend the array as necessary. self._dirtyoffset is
       
   447         # defined to be the start offset at which we need to write the output
       
   448         # file. This offset is also adjusted when new entries are calculated
       
   449         # for array members.
       
   450         cllen = len(repo.changelog)
       
   451         wantedlen = cllen * _fnodesrecsize
       
   452         rawlen = len(self._raw)
       
   453 
       
   454         self._dirtyoffset = None
       
   455 
       
   456         if rawlen < wantedlen:
       
   457             self._dirtyoffset = rawlen
       
   458             self._raw.extend('\xff' * (wantedlen - rawlen))
       
   459         elif rawlen > wantedlen:
       
   460             # There's no easy way to truncate array instances. This seems
       
   461             # slightly less evil than copying a potentially large array slice.
       
   462             for i in range(rawlen - wantedlen):
       
   463                 self._raw.pop()
       
   464             self._dirtyoffset = len(self._raw)
       
   465 
       
   466     def getfnode(self, node):
       
   467         """Obtain the filenode of the .hgtags file at a specified revision.
       
   468 
       
   469         If the value is in the cache, the entry will be validated and returned.
       
   470         Otherwise, the filenode will be computed and returned.
       
   471 
       
   472         If an .hgtags does not exist at the specified revision, nullid is
       
   473         returned.
       
   474         """
       
   475         ctx = self._repo[node]
       
   476         rev = ctx.rev()
       
   477 
       
   478         self.lookupcount += 1
       
   479 
       
   480         offset = rev * _fnodesrecsize
       
   481         record = self._raw[offset:offset + _fnodesrecsize].tostring()
       
   482         properprefix = node[0:4]
       
   483 
       
   484         # Validate and return existing entry.
       
   485         if record != _fnodesmissingrec:
       
   486             fileprefix = record[0:4]
       
   487 
       
   488             if fileprefix == properprefix:
       
   489                 self.hitcount += 1
       
   490                 return record[4:]
       
   491 
       
   492             # Fall through.
       
   493 
       
   494         # If we get here, the entry is either missing or invalid. Populate it.
       
   495         try:
       
   496             fnode = ctx.filenode('.hgtags')
       
   497         except error.LookupError:
       
   498             # No .hgtags file on this revision.
       
   499             fnode = nullid
       
   500 
       
   501         # Slices on array instances only accept other array.
       
   502         entry = array('c', properprefix + fnode)
       
   503         self._raw[offset:offset + _fnodesrecsize] = entry
       
   504         # self._dirtyoffset could be None.
       
   505         self._dirtyoffset = min(self._dirtyoffset, offset) or 0
       
   506 
       
   507         return fnode
       
   508 
       
   509     def write(self):
       
   510         """Perform all necessary writes to cache file.
       
   511 
       
   512         This may no-op if no writes are needed or if a write lock could
       
   513         not be obtained.
       
   514         """
       
   515         if self._dirtyoffset is None:
       
   516             return
       
   517 
       
   518         data = self._raw[self._dirtyoffset:]
       
   519         if not data:
       
   520             return
       
   521 
       
   522         repo = self._repo
       
   523 
       
   524         try:
       
   525             lock = repo.wlock(wait=False)
       
   526         except error.LockHeld:
       
   527             repo.ui.log('tagscache',
       
   528                         'not writing .hg/%s because lock held\n' %
       
   529                         (_fnodescachefile))
       
   530             return
       
   531 
       
   532         try:
       
   533             try:
       
   534                 f = repo.vfs.open(_fnodescachefile, 'ab')
       
   535                 try:
       
   536                     # if the file has been truncated
       
   537                     actualoffset = f.tell()
       
   538                     if actualoffset < self._dirtyoffset:
       
   539                         self._dirtyoffset = actualoffset
       
   540                         data = self._raw[self._dirtyoffset:]
       
   541                     f.seek(self._dirtyoffset)
       
   542                     f.truncate()
       
   543                     repo.ui.log('tagscache',
       
   544                                 'writing %d bytes to %s\n' % (
       
   545                                 len(data), _fnodescachefile))
       
   546                     f.write(data)
       
   547                     self._dirtyoffset = None
       
   548                 finally:
       
   549                     f.close()
       
   550             except (IOError, OSError), inst:
       
   551                 repo.ui.log('tagscache',
       
   552                             "couldn't write %s: %s\n" % (
       
   553                             _fnodescachefile, inst))
       
   554         finally:
       
   555             lock.release()