hgext/fastannotate/context.py
changeset 39210 1ddb296e0dee
child 39380 1099d9bbdf9a
equal deleted inserted replaced
39209:1af95139e5ec 39210:1ddb296e0dee
       
     1 # Copyright 2016-present Facebook. All Rights Reserved.
       
     2 #
       
     3 # context: context needed to annotate a file
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from __future__ import absolute_import
       
     9 
       
    10 import collections
       
    11 import contextlib
       
    12 import hashlib
       
    13 import os
       
    14 
       
    15 from mercurial.i18n import _
       
    16 from mercurial import (
       
    17     error,
       
    18     linelog as linelogmod,
       
    19     lock as lockmod,
       
    20     mdiff,
       
    21     node,
       
    22     pycompat,
       
    23     scmutil,
       
    24     util,
       
    25 )
       
    26 
       
    27 from . import (
       
    28     error as faerror,
       
    29     revmap as revmapmod,
       
    30 )
       
    31 
       
    32 # given path, get filelog, cached
       
    33 @util.lrucachefunc
       
    34 def _getflog(repo, path):
       
    35     return repo.file(path)
       
    36 
       
    37 # extracted from mercurial.context.basefilectx.annotate
       
    38 def _parents(f, follow=True):
       
    39     # Cut _descendantrev here to mitigate the penalty of lazy linkrev
       
    40     # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
       
    41     # from the topmost introrev (= srcrev) down to p.linkrev() if it
       
    42     # isn't an ancestor of the srcrev.
       
    43     f._changeid
       
    44     pl = f.parents()
       
    45 
       
    46     # Don't return renamed parents if we aren't following.
       
    47     if not follow:
       
    48         pl = [p for p in pl if p.path() == f.path()]
       
    49 
       
    50     # renamed filectx won't have a filelog yet, so set it
       
    51     # from the cache to save time
       
    52     for p in pl:
       
    53         if not '_filelog' in p.__dict__:
       
    54             p._filelog = _getflog(f._repo, p.path())
       
    55 
       
    56     return pl
       
    57 
       
    58 # extracted from mercurial.context.basefilectx.annotate. slightly modified
       
    59 # so it takes a fctx instead of a pair of text and fctx.
       
    60 def _decorate(fctx):
       
    61     text = fctx.data()
       
    62     linecount = text.count('\n')
       
    63     if text and not text.endswith('\n'):
       
    64         linecount += 1
       
    65     return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
       
    66 
       
    67 # extracted from mercurial.context.basefilectx.annotate. slightly modified
       
    68 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
       
    69 # calculating diff here.
       
    70 def _pair(parent, child, blocks):
       
    71     for (a1, a2, b1, b2), t in blocks:
       
    72         # Changed blocks ('!') or blocks made only of blank lines ('~')
       
    73         # belong to the child.
       
    74         if t == '=':
       
    75             child[0][b1:b2] = parent[0][a1:a2]
       
    76     return child
       
    77 
       
    78 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
       
    79 # could be reused
       
    80 _revsingle = util.lrucachefunc(scmutil.revsingle)
       
    81 
       
    82 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
       
    83     """(repo, str, str) -> fctx
       
    84 
       
    85     get the filectx object from repo, rev, path, in an efficient way.
       
    86 
       
    87     if resolverev is True, "rev" is a revision specified by the revset
       
    88     language, otherwise "rev" is a nodeid, or a revision number that can
       
    89     be consumed by repo.__getitem__.
       
    90 
       
    91     if adjustctx is not None, the returned fctx will point to a changeset
       
    92     that introduces the change (last modified the file). if adjustctx
       
    93     is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
       
    94     faster for big repos but is incorrect for some cases.
       
    95     """
       
    96     if resolverev and not isinstance(rev, int) and rev is not None:
       
    97         ctx = _revsingle(repo, rev)
       
    98     else:
       
    99         ctx = repo[rev]
       
   100 
       
   101     # If we don't need to adjust the linkrev, create the filectx using the
       
   102     # changectx instead of using ctx[path]. This means it already has the
       
   103     # changectx information, so blame -u will be able to look directly at the
       
   104     # commitctx object instead of having to resolve it by going through the
       
   105     # manifest. In a lazy-manifest world this can prevent us from downloading a
       
   106     # lot of data.
       
   107     if adjustctx is None:
       
   108         # ctx.rev() is None means it's the working copy, which is a special
       
   109         # case.
       
   110         if ctx.rev() is None:
       
   111             fctx = ctx[path]
       
   112         else:
       
   113             fctx = repo.filectx(path, changeid=ctx.rev())
       
   114     else:
       
   115         fctx = ctx[path]
       
   116         if adjustctx == 'linkrev':
       
   117             introrev = fctx.linkrev()
       
   118         else:
       
   119             introrev = fctx.introrev()
       
   120         if introrev != ctx.rev():
       
   121             fctx._changeid = introrev
       
   122             fctx._changectx = repo[introrev]
       
   123     return fctx
       
   124 
       
   125 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
       
   126 def encodedir(path):
       
   127     return (path
       
   128             .replace('.hg/', '.hg.hg/')
       
   129             .replace('.l/', '.l.hg/')
       
   130             .replace('.m/', '.m.hg/')
       
   131             .replace('.lock/', '.lock.hg/'))
       
   132 
       
   133 def hashdiffopts(diffopts):
       
   134     diffoptstr = str(sorted(
       
   135         (k, getattr(diffopts, k))
       
   136         for k in mdiff.diffopts.defaults.iterkeys()
       
   137     ))
       
   138     return hashlib.sha1(diffoptstr).hexdigest()[:6]
       
   139 
       
   140 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
       
   141 
       
   142 class annotateopts(object):
       
   143     """like mercurial.mdiff.diffopts, but is for annotate
       
   144 
       
   145     followrename: follow renames, like "hg annotate -f"
       
   146     followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
       
   147     """
       
   148 
       
   149     defaults = {
       
   150         'diffopts': None,
       
   151         'followrename': True,
       
   152         'followmerge': True,
       
   153     }
       
   154 
       
   155     def __init__(self, **opts):
       
   156         for k, v in self.defaults.iteritems():
       
   157             setattr(self, k, opts.get(k, v))
       
   158 
       
   159     @util.propertycache
       
   160     def shortstr(self):
       
   161         """represent opts in a short string, suitable for a directory name"""
       
   162         result = ''
       
   163         if not self.followrename:
       
   164             result += 'r0'
       
   165         if not self.followmerge:
       
   166             result += 'm0'
       
   167         if self.diffopts is not None:
       
   168             assert isinstance(self.diffopts, mdiff.diffopts)
       
   169             diffopthash = hashdiffopts(self.diffopts)
       
   170             if diffopthash != _defaultdiffopthash:
       
   171                 result += 'i' + diffopthash
       
   172         return result or 'default'
       
   173 
       
   174 defaultopts = annotateopts()
       
   175 
       
   176 class _annotatecontext(object):
       
   177     """do not use this class directly as it does not use lock to protect
       
   178     writes. use "with annotatecontext(...)" instead.
       
   179     """
       
   180 
       
   181     def __init__(self, repo, path, linelogpath, revmappath, opts):
       
   182         self.repo = repo
       
   183         self.ui = repo.ui
       
   184         self.path = path
       
   185         self.opts = opts
       
   186         self.linelogpath = linelogpath
       
   187         self.revmappath = revmappath
       
   188         self._linelog = None
       
   189         self._revmap = None
       
   190         self._node2path = {} # {str: str}
       
   191 
       
   192     @property
       
   193     def linelog(self):
       
   194         if self._linelog is None:
       
   195             if os.path.exists(self.linelogpath):
       
   196                 with open(self.linelogpath, 'rb') as f:
       
   197                     try:
       
   198                         self._linelog = linelogmod.linelog.fromdata(f.read())
       
   199                     except linelogmod.LineLogError:
       
   200                         self._linelog = linelogmod.linelog()
       
   201             else:
       
   202                 self._linelog = linelogmod.linelog()
       
   203         return self._linelog
       
   204 
       
   205     @property
       
   206     def revmap(self):
       
   207         if self._revmap is None:
       
   208             self._revmap = revmapmod.revmap(self.revmappath)
       
   209         return self._revmap
       
   210 
       
   211     def close(self):
       
   212         if self._revmap is not None:
       
   213             self._revmap.flush()
       
   214             self._revmap = None
       
   215         if self._linelog is not None:
       
   216             with open(self.linelogpath, 'wb') as f:
       
   217                 f.write(self._linelog.encode())
       
   218             self._linelog = None
       
   219 
       
   220     __del__ = close
       
   221 
       
   222     def rebuild(self):
       
   223         """delete linelog and revmap, useful for rebuilding"""
       
   224         self.close()
       
   225         self._node2path.clear()
       
   226         _unlinkpaths([self.revmappath, self.linelogpath])
       
   227 
       
   228     @property
       
   229     def lastnode(self):
       
   230         """return last node in revmap, or None if revmap is empty"""
       
   231         if self._revmap is None:
       
   232             # fast path, read revmap without loading its full content
       
   233             return revmapmod.getlastnode(self.revmappath)
       
   234         else:
       
   235             return self._revmap.rev2hsh(self._revmap.maxrev)
       
   236 
       
   237     def isuptodate(self, master, strict=True):
       
   238         """return True if the revmap / linelog is up-to-date, or the file
       
   239         does not exist in the master revision. False otherwise.
       
   240 
       
   241         it tries to be fast and could return false negatives, because of the
       
   242         use of linkrev instead of introrev.
       
   243 
       
   244         useful for both server and client to decide whether to update
       
   245         fastannotate cache or not.
       
   246 
       
   247         if strict is True, even if fctx exists in the revmap, but is not the
       
   248         last node, isuptodate will return False. it's good for performance - no
       
   249         expensive check was done.
       
   250 
       
   251         if strict is False, if fctx exists in the revmap, this function may
       
   252         return True. this is useful for the client to skip downloading the
       
   253         cache if the client's master is behind the server's.
       
   254         """
       
   255         lastnode = self.lastnode
       
   256         try:
       
   257             f = self._resolvefctx(master, resolverev=True)
       
   258             # choose linkrev instead of introrev as the check is meant to be
       
   259             # *fast*.
       
   260             linknode = self.repo.changelog.node(f.linkrev())
       
   261             if not strict and lastnode and linknode != lastnode:
       
   262                 # check if f.node() is in the revmap. note: this loads the
       
   263                 # revmap and can be slow.
       
   264                 return self.revmap.hsh2rev(linknode) is not None
       
   265             # avoid resolving old manifest, or slow adjustlinkrev to be fast,
       
   266             # false negatives are acceptable in this case.
       
   267             return linknode == lastnode
       
   268         except LookupError:
       
   269             # master does not have the file, or the revmap is ahead
       
   270             return True
       
   271 
       
   272     def annotate(self, rev, master=None, showpath=False, showlines=False):
       
   273         """incrementally update the cache so it includes revisions in the main
       
   274         branch till 'master'. and run annotate on 'rev', which may or may not be
       
   275         included in the main branch.
       
   276 
       
   277         if master is None, do not update linelog.
       
   278 
       
   279         the first value returned is the annotate result, it is [(node, linenum)]
       
   280         by default. [(node, linenum, path)] if showpath is True.
       
   281 
       
   282         if showlines is True, a second value will be returned, it is a list of
       
   283         corresponding line contents.
       
   284         """
       
   285 
       
   286         # the fast path test requires commit hash, convert rev number to hash,
       
   287         # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
       
   288         # command could give us a revision number even if the user passes a
       
   289         # commit hash.
       
   290         if isinstance(rev, int):
       
   291             rev = node.hex(self.repo.changelog.node(rev))
       
   292 
       
   293         # fast path: if rev is in the main branch already
       
   294         directly, revfctx = self.canannotatedirectly(rev)
       
   295         if directly:
       
   296             if self.ui.debugflag:
       
   297                 self.ui.debug('fastannotate: %s: using fast path '
       
   298                               '(resolved fctx: %s)\n'
       
   299                               % (self.path, util.safehasattr(revfctx, 'node')))
       
   300             return self.annotatedirectly(revfctx, showpath, showlines)
       
   301 
       
   302         # resolve master
       
   303         masterfctx = None
       
   304         if master:
       
   305             try:
       
   306                 masterfctx = self._resolvefctx(master, resolverev=True,
       
   307                                                adjustctx=True)
       
   308             except LookupError: # master does not have the file
       
   309                 pass
       
   310             else:
       
   311                 if masterfctx in self.revmap: # no need to update linelog
       
   312                     masterfctx = None
       
   313 
       
   314         #                  ... - @ <- rev (can be an arbitrary changeset,
       
   315         #                 /                not necessarily a descendant
       
   316         #      master -> o                 of master)
       
   317         #                |
       
   318         #     a merge -> o         'o': new changesets in the main branch
       
   319         #                |\        '#': revisions in the main branch that
       
   320         #                o *            exist in linelog / revmap
       
   321         #                | .       '*': changesets in side branches, or
       
   322         # last master -> # .            descendants of master
       
   323         #                | .
       
   324         #                # *       joint: '#', and is a parent of a '*'
       
   325         #                |/
       
   326         #     a joint -> # ^^^^ --- side branches
       
   327         #                |
       
   328         #                ^ --- main branch (in linelog)
       
   329 
       
   330         # these DFSes are similar to the traditional annotate algorithm.
       
   331         # we cannot really reuse the code for perf reason.
       
   332 
       
   333         # 1st DFS calculates merges, joint points, and needed.
       
   334         # "needed" is a simple reference counting dict to free items in
       
   335         # "hist", reducing its memory usage otherwise could be huge.
       
   336         initvisit = [revfctx]
       
   337         if masterfctx:
       
   338             if masterfctx.rev() is None:
       
   339                 raise error.Abort(_('cannot update linelog to wdir()'),
       
   340                                   hint=_('set fastannotate.mainbranch'))
       
   341             initvisit.append(masterfctx)
       
   342         visit = initvisit[:]
       
   343         pcache = {}
       
   344         needed = {revfctx: 1}
       
   345         hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
       
   346         while visit:
       
   347             f = visit.pop()
       
   348             if f in pcache or f in hist:
       
   349                 continue
       
   350             if f in self.revmap: # in the old main branch, it's a joint
       
   351                 llrev = self.revmap.hsh2rev(f.node())
       
   352                 self.linelog.annotate(llrev)
       
   353                 result = self.linelog.annotateresult
       
   354                 hist[f] = (result, f.data())
       
   355                 continue
       
   356             pl = self._parentfunc(f)
       
   357             pcache[f] = pl
       
   358             for p in pl:
       
   359                 needed[p] = needed.get(p, 0) + 1
       
   360                 if p not in pcache:
       
   361                     visit.append(p)
       
   362 
       
   363         # 2nd (simple) DFS calculates new changesets in the main branch
       
   364         # ('o' nodes in # the above graph), so we know when to update linelog.
       
   365         newmainbranch = set()
       
   366         f = masterfctx
       
   367         while f and f not in self.revmap:
       
   368             newmainbranch.add(f)
       
   369             pl = pcache[f]
       
   370             if pl:
       
   371                 f = pl[0]
       
   372             else:
       
   373                 f = None
       
   374                 break
       
   375 
       
   376         # f, if present, is the position where the last build stopped at, and
       
   377         # should be the "master" last time. check to see if we can continue
       
   378         # building the linelog incrementally. (we cannot if diverged)
       
   379         if masterfctx is not None:
       
   380             self._checklastmasterhead(f)
       
   381 
       
   382         if self.ui.debugflag:
       
   383             if newmainbranch:
       
   384                 self.ui.debug('fastannotate: %s: %d new changesets in the main'
       
   385                               ' branch\n' % (self.path, len(newmainbranch)))
       
   386             elif not hist: # no joints, no updates
       
   387                 self.ui.debug('fastannotate: %s: linelog cannot help in '
       
   388                               'annotating this revision\n' % self.path)
       
   389 
       
   390         # prepare annotateresult so we can update linelog incrementally
       
   391         self.linelog.annotate(self.linelog.maxrev)
       
   392 
       
   393         # 3rd DFS does the actual annotate
       
   394         visit = initvisit[:]
       
   395         progress = 0
       
   396         while visit:
       
   397             f = visit[-1]
       
   398             if f in hist:
       
   399                 visit.pop()
       
   400                 continue
       
   401 
       
   402             ready = True
       
   403             pl = pcache[f]
       
   404             for p in pl:
       
   405                 if p not in hist:
       
   406                     ready = False
       
   407                     visit.append(p)
       
   408             if not ready:
       
   409                 continue
       
   410 
       
   411             visit.pop()
       
   412             blocks = None # mdiff blocks, used for appending linelog
       
   413             ismainbranch = (f in newmainbranch)
       
   414             # curr is the same as the traditional annotate algorithm,
       
   415             # if we only care about linear history (do not follow merge),
       
   416             # then curr is not actually used.
       
   417             assert f not in hist
       
   418             curr = _decorate(f)
       
   419             for i, p in enumerate(pl):
       
   420                 bs = list(self._diffblocks(hist[p][1], curr[1]))
       
   421                 if i == 0 and ismainbranch:
       
   422                     blocks = bs
       
   423                 curr = _pair(hist[p], curr, bs)
       
   424                 if needed[p] == 1:
       
   425                     del hist[p]
       
   426                     del needed[p]
       
   427                 else:
       
   428                     needed[p] -= 1
       
   429 
       
   430             hist[f] = curr
       
   431             del pcache[f]
       
   432 
       
   433             if ismainbranch: # need to write to linelog
       
   434                 if not self.ui.quiet:
       
   435                     progress += 1
       
   436                     self.ui.progress(_('building cache'), progress,
       
   437                                      total=len(newmainbranch))
       
   438                 bannotated = None
       
   439                 if len(pl) == 2 and self.opts.followmerge: # merge
       
   440                     bannotated = curr[0]
       
   441                 if blocks is None: # no parents, add an empty one
       
   442                     blocks = list(self._diffblocks('', curr[1]))
       
   443                 self._appendrev(f, blocks, bannotated)
       
   444             elif showpath: # not append linelog, but we need to record path
       
   445                 self._node2path[f.node()] = f.path()
       
   446 
       
   447         if progress: # clean progress bar
       
   448             self.ui.write()
       
   449 
       
   450         result = [
       
   451             ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
       
   452             for fr, l in hist[revfctx][0]] # [(node, linenumber)]
       
   453         return self._refineannotateresult(result, revfctx, showpath, showlines)
       
   454 
       
   455     def canannotatedirectly(self, rev):
       
   456         """(str) -> bool, fctx or node.
       
   457         return (True, f) if we can annotate without updating the linelog, pass
       
   458         f to annotatedirectly.
       
   459         return (False, f) if we need extra calculation. f is the fctx resolved
       
   460         from rev.
       
   461         """
       
   462         result = True
       
   463         f = None
       
   464         if not isinstance(rev, int) and rev is not None:
       
   465             hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
       
   466             if hsh is not None and (hsh, self.path) in self.revmap:
       
   467                 f = hsh
       
   468         if f is None:
       
   469             adjustctx = 'linkrev' if self._perfhack else True
       
   470             f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
       
   471             result = f in self.revmap
       
   472             if not result and self._perfhack:
       
   473                 # redo the resolution without perfhack - as we are going to
       
   474                 # do write operations, we need a correct fctx.
       
   475                 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
       
   476         return result, f
       
   477 
       
   478     def annotatealllines(self, rev, showpath=False, showlines=False):
       
   479         """(rev : str) -> [(node : str, linenum : int, path : str)]
       
   480 
       
   481         the result has the same format with annotate, but include all (including
       
   482         deleted) lines up to rev. call this after calling annotate(rev, ...) for
       
   483         better performance and accuracy.
       
   484         """
       
   485         revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
       
   486 
       
   487         # find a chain from rev to anything in the mainbranch
       
   488         if revfctx not in self.revmap:
       
   489             chain = [revfctx]
       
   490             a = ''
       
   491             while True:
       
   492                 f = chain[-1]
       
   493                 pl = self._parentfunc(f)
       
   494                 if not pl:
       
   495                     break
       
   496                 if pl[0] in self.revmap:
       
   497                     a = pl[0].data()
       
   498                     break
       
   499                 chain.append(pl[0])
       
   500 
       
   501             # both self.linelog and self.revmap is backed by filesystem. now
       
   502             # we want to modify them but do not want to write changes back to
       
   503             # files. so we create in-memory objects and copy them. it's like
       
   504             # a "fork".
       
   505             linelog = linelogmod.linelog()
       
   506             linelog.copyfrom(self.linelog)
       
   507             linelog.annotate(linelog.maxrev)
       
   508             revmap = revmapmod.revmap()
       
   509             revmap.copyfrom(self.revmap)
       
   510 
       
   511             for f in reversed(chain):
       
   512                 b = f.data()
       
   513                 blocks = list(self._diffblocks(a, b))
       
   514                 self._doappendrev(linelog, revmap, f, blocks)
       
   515                 a = b
       
   516         else:
       
   517             # fastpath: use existing linelog, revmap as we don't write to them
       
   518             linelog = self.linelog
       
   519             revmap = self.revmap
       
   520 
       
   521         lines = linelog.getalllines()
       
   522         hsh = revfctx.node()
       
   523         llrev = revmap.hsh2rev(hsh)
       
   524         result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
       
   525         # cannot use _refineannotateresult since we need custom logic for
       
   526         # resolving line contents
       
   527         if showpath:
       
   528             result = self._addpathtoresult(result, revmap)
       
   529         if showlines:
       
   530             linecontents = self._resolvelines(result, revmap, linelog)
       
   531             result = (result, linecontents)
       
   532         return result
       
   533 
       
   534     def _resolvelines(self, annotateresult, revmap, linelog):
       
   535         """(annotateresult) -> [line]. designed for annotatealllines.
       
   536         this is probably the most inefficient code in the whole fastannotate
       
   537         directory. but we have made a decision that the linelog does not
       
   538         store line contents. so getting them requires random accesses to
       
   539         the revlog data, since they can be many, it can be very slow.
       
   540         """
       
   541         # [llrev]
       
   542         revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
       
   543         result = [None] * len(annotateresult)
       
   544         # {(rev, linenum): [lineindex]}
       
   545         key2idxs = collections.defaultdict(list)
       
   546         for i in pycompat.xrange(len(result)):
       
   547             key2idxs[(revs[i], annotateresult[i][1])].append(i)
       
   548         while key2idxs:
       
   549             # find an unresolved line and its linelog rev to annotate
       
   550             hsh = None
       
   551             try:
       
   552                 for (rev, _linenum), idxs in key2idxs.iteritems():
       
   553                     if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
       
   554                         continue
       
   555                     hsh = annotateresult[idxs[0]][0]
       
   556                     break
       
   557             except StopIteration: # no more unresolved lines
       
   558                 return result
       
   559             if hsh is None:
       
   560                 # the remaining key2idxs are not in main branch, resolving them
       
   561                 # using the hard way...
       
   562                 revlines = {}
       
   563                 for (rev, linenum), idxs in key2idxs.iteritems():
       
   564                     if rev not in revlines:
       
   565                         hsh = annotateresult[idxs[0]][0]
       
   566                         if self.ui.debugflag:
       
   567                             self.ui.debug('fastannotate: reading %s line #%d '
       
   568                                           'to resolve lines %r\n'
       
   569                                           % (node.short(hsh), linenum, idxs))
       
   570                         fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
       
   571                         lines = mdiff.splitnewlines(fctx.data())
       
   572                         revlines[rev] = lines
       
   573                     for idx in idxs:
       
   574                         result[idx] = revlines[rev][linenum]
       
   575                 assert all(x is not None for x in result)
       
   576                 return result
       
   577 
       
   578             # run the annotate and the lines should match to the file content
       
   579             self.ui.debug('fastannotate: annotate %s to resolve lines\n'
       
   580                           % node.short(hsh))
       
   581             linelog.annotate(rev)
       
   582             fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
       
   583             annotated = linelog.annotateresult
       
   584             lines = mdiff.splitnewlines(fctx.data())
       
   585             if len(lines) != len(annotated):
       
   586                 raise faerror.CorruptedFileError('unexpected annotated lines')
       
   587             # resolve lines from the annotate result
       
   588             for i, line in enumerate(lines):
       
   589                 k = annotated[i]
       
   590                 if k in key2idxs:
       
   591                     for idx in key2idxs[k]:
       
   592                         result[idx] = line
       
   593                     del key2idxs[k]
       
   594         return result
       
   595 
       
   596     def annotatedirectly(self, f, showpath, showlines):
       
   597         """like annotate, but when we know that f is in linelog.
       
   598         f can be either a 20-char str (node) or a fctx. this is for perf - in
       
   599         the best case, the user provides a node and we don't need to read the
       
   600         filelog or construct any filecontext.
       
   601         """
       
   602         if isinstance(f, str):
       
   603             hsh = f
       
   604         else:
       
   605             hsh = f.node()
       
   606         llrev = self.revmap.hsh2rev(hsh)
       
   607         if not llrev:
       
   608             raise faerror.CorruptedFileError('%s is not in revmap'
       
   609                                              % node.hex(hsh))
       
   610         if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
       
   611             raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
       
   612                                              % node.hex(hsh))
       
   613         self.linelog.annotate(llrev)
       
   614         result = [(self.revmap.rev2hsh(r), l)
       
   615                   for r, l in self.linelog.annotateresult]
       
   616         return self._refineannotateresult(result, f, showpath, showlines)
       
   617 
       
   618     def _refineannotateresult(self, result, f, showpath, showlines):
       
   619         """add the missing path or line contents, they can be expensive.
       
   620         f could be either node or fctx.
       
   621         """
       
   622         if showpath:
       
   623             result = self._addpathtoresult(result)
       
   624         if showlines:
       
   625             if isinstance(f, str): # f: node or fctx
       
   626                 llrev = self.revmap.hsh2rev(f)
       
   627                 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
       
   628             else:
       
   629                 fctx = f
       
   630             lines = mdiff.splitnewlines(fctx.data())
       
   631             if len(lines) != len(result): # linelog is probably corrupted
       
   632                 raise faerror.CorruptedFileError()
       
   633             result = (result, lines)
       
   634         return result
       
   635 
       
   636     def _appendrev(self, fctx, blocks, bannotated=None):
       
   637         self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
       
   638 
       
   639     def _diffblocks(self, a, b):
       
   640         return mdiff.allblocks(a, b, self.opts.diffopts)
       
   641 
       
   642     @staticmethod
       
   643     def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
       
   644         """append a revision to linelog and revmap"""
       
   645 
       
   646         def getllrev(f):
       
   647             """(fctx) -> int"""
       
   648             # f should not be a linelog revision
       
   649             if isinstance(f, int):
       
   650                 raise error.ProgrammingError('f should not be an int')
       
   651             # f is a fctx, allocate linelog rev on demand
       
   652             hsh = f.node()
       
   653             rev = revmap.hsh2rev(hsh)
       
   654             if rev is None:
       
   655                 rev = revmap.append(hsh, sidebranch=True, path=f.path())
       
   656             return rev
       
   657 
       
   658         # append sidebranch revisions to revmap
       
   659         siderevs = []
       
   660         siderevmap = {} # node: int
       
   661         if bannotated is not None:
       
   662             for (a1, a2, b1, b2), op in blocks:
       
   663                 if op != '=':
       
   664                     # f could be either linelong rev, or fctx.
       
   665                     siderevs += [f for f, l in bannotated[b1:b2]
       
   666                                  if not isinstance(f, int)]
       
   667         siderevs = set(siderevs)
       
   668         if fctx in siderevs: # mainnode must be appended seperately
       
   669             siderevs.remove(fctx)
       
   670         for f in siderevs:
       
   671             siderevmap[f] = getllrev(f)
       
   672 
       
   673         # the changeset in the main branch, could be a merge
       
   674         llrev = revmap.append(fctx.node(), path=fctx.path())
       
   675         siderevmap[fctx] = llrev
       
   676 
       
   677         for (a1, a2, b1, b2), op in reversed(blocks):
       
   678             if op == '=':
       
   679                 continue
       
   680             if bannotated is None:
       
   681                 linelog.replacelines(llrev, a1, a2, b1, b2)
       
   682             else:
       
   683                 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
       
   684                           for r, l in bannotated[b1:b2]]
       
   685                 linelog.replacelines_vec(llrev, a1, a2, blines)
       
   686 
       
   687     def _addpathtoresult(self, annotateresult, revmap=None):
       
   688         """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
       
   689         if revmap is None:
       
   690             revmap = self.revmap
       
   691 
       
   692         def _getpath(nodeid):
       
   693             path = self._node2path.get(nodeid)
       
   694             if path is None:
       
   695                 path = revmap.rev2path(revmap.hsh2rev(nodeid))
       
   696                 self._node2path[nodeid] = path
       
   697             return path
       
   698 
       
   699         return [(n, l, _getpath(n)) for n, l in annotateresult]
       
   700 
       
   701     def _checklastmasterhead(self, fctx):
       
   702         """check if fctx is the master's head last time, raise if not"""
       
   703         if fctx is None:
       
   704             llrev = 0
       
   705         else:
       
   706             llrev = self.revmap.hsh2rev(fctx.node())
       
   707             if not llrev:
       
   708                 raise faerror.CannotReuseError()
       
   709         if self.linelog.maxrev != llrev:
       
   710             raise faerror.CannotReuseError()
       
   711 
       
   712     @util.propertycache
       
   713     def _parentfunc(self):
       
   714         """-> (fctx) -> [fctx]"""
       
   715         followrename = self.opts.followrename
       
   716         followmerge = self.opts.followmerge
       
   717         def parents(f):
       
   718             pl = _parents(f, follow=followrename)
       
   719             if not followmerge:
       
   720                 pl = pl[:1]
       
   721             return pl
       
   722         return parents
       
   723 
       
   724     @util.propertycache
       
   725     def _perfhack(self):
       
   726         return self.ui.configbool('fastannotate', 'perfhack')
       
   727 
       
   728     def _resolvefctx(self, rev, path=None, **kwds):
       
   729         return resolvefctx(self.repo, rev, (path or self.path), **kwds)
       
   730 
       
   731 def _unlinkpaths(paths):
       
   732     """silent, best-effort unlink"""
       
   733     for path in paths:
       
   734         try:
       
   735             util.unlink(path)
       
   736         except OSError:
       
   737             pass
       
   738 
       
   739 class pathhelper(object):
       
   740     """helper for getting paths for lockfile, linelog and revmap"""
       
   741 
       
   742     def __init__(self, repo, path, opts=defaultopts):
       
   743         # different options use different directories
       
   744         self._vfspath = os.path.join('fastannotate',
       
   745                                      opts.shortstr, encodedir(path))
       
   746         self._repo = repo
       
   747 
       
   748     @property
       
   749     def dirname(self):
       
   750         return os.path.dirname(self._repo.vfs.join(self._vfspath))
       
   751 
       
   752     @property
       
   753     def linelogpath(self):
       
   754         return self._repo.vfs.join(self._vfspath + '.l')
       
   755 
       
   756     def lock(self):
       
   757         return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
       
   758 
       
   759     @contextlib.contextmanager
       
   760     def _lockflock(self):
       
   761         """the same as 'lock' but use flock instead of lockmod.lock, to avoid
       
   762         creating temporary symlinks."""
       
   763         import fcntl
       
   764         lockpath = self.linelogpath
       
   765         util.makedirs(os.path.dirname(lockpath))
       
   766         lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
       
   767         fcntl.flock(lockfd, fcntl.LOCK_EX)
       
   768         try:
       
   769             yield
       
   770         finally:
       
   771             fcntl.flock(lockfd, fcntl.LOCK_UN)
       
   772             os.close(lockfd)
       
   773 
       
   774     @property
       
   775     def revmappath(self):
       
   776         return self._repo.vfs.join(self._vfspath + '.m')
       
   777 
       
   778 @contextlib.contextmanager
       
   779 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
       
   780     """context needed to perform (fast) annotate on a file
       
   781 
       
   782     an annotatecontext of a single file consists of two structures: the
       
   783     linelog and the revmap. this function takes care of locking. only 1
       
   784     process is allowed to write that file's linelog and revmap at a time.
       
   785 
       
   786     when something goes wrong, this function will assume the linelog and the
       
   787     revmap are in a bad state, and remove them from disk.
       
   788 
       
   789     use this function in the following way:
       
   790 
       
   791         with annotatecontext(...) as actx:
       
   792             actx. ....
       
   793     """
       
   794     helper = pathhelper(repo, path, opts)
       
   795     util.makedirs(helper.dirname)
       
   796     revmappath = helper.revmappath
       
   797     linelogpath = helper.linelogpath
       
   798     actx = None
       
   799     try:
       
   800         with helper.lock():
       
   801             actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
       
   802             if rebuild:
       
   803                 actx.rebuild()
       
   804             yield actx
       
   805     except Exception:
       
   806         if actx is not None:
       
   807             actx.rebuild()
       
   808         repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
       
   809         raise
       
   810     finally:
       
   811         if actx is not None:
       
   812             actx.close()
       
   813 
       
   814 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
       
   815     """like annotatecontext but get the context from a fctx. convenient when
       
   816     used in fctx.annotate
       
   817     """
       
   818     repo = fctx._repo
       
   819     path = fctx._path
       
   820     if repo.ui.configbool('fastannotate', 'forcefollow', True):
       
   821         follow = True
       
   822     aopts = annotateopts(diffopts=diffopts, followrename=follow)
       
   823     return annotatecontext(repo, path, aopts, rebuild)