comparison hgext/fastannotate/context.py @ 39210:1ddb296e0dee

fastannotate: initial import from Facebook's hg-experimental I made as few changes as I could to get the tests to pass, but this was a bit involved due to some churn in the blame code since someone last gave fastannotate any TLC. There's still follow-up work here to rip out support for old versions of hg and to integrate the protocol with modern standards. Some performance numbers (all on my 2016 MacBook Pro with a 2.6Ghz i7): Mercurial mercurial/manifest.py traditional blame time: real 1.050 secs (user 0.990+0.000 sys 0.060+0.000) build cache time: real 5.900 secs (user 5.720+0.000 sys 0.110+0.000) fastannotate time: real 0.120 secs (user 0.100+0.000 sys 0.020+0.000) Mercurial mercurial/localrepo.py traditional blame time: real 3.330 secs (user 3.220+0.000 sys 0.070+0.000) build cache time: real 30.610 secs (user 30.190+0.000 sys 0.230+0.000) fastannotate time: real 0.180 secs (user 0.160+0.000 sys 0.020+0.000) mozilla-central dom/ipc/ContentParent.cpp traditional blame time: real 7.640 secs (user 7.210+0.000 sys 0.380+0.000) build cache time: real 98.650 secs (user 97.000+0.000 sys 0.950+0.000) fastannotate time: real 1.580 secs (user 1.340+0.000 sys 0.240+0.000) mozilla-central dom/base/nsDocument.cpp traditional blame time: real 17.110 secs (user 16.490+0.000 sys 0.500+0.000) build cache time: real 399.750 secs (user 394.520+0.000 sys 2.610+0.000) fastannotate time: real 1.780 secs (user 1.530+0.000 sys 0.240+0.000) So building the cache is expensive (but might be faster with xdiff enabled), but the blame results are *way* faster. Differential Revision: https://phab.mercurial-scm.org/D3994
author Augie Fackler <augie@google.com>
date Mon, 30 Jul 2018 22:50:00 -0400
parents
children 1099d9bbdf9a
comparison
equal deleted inserted replaced
39209:1af95139e5ec 39210:1ddb296e0dee
1 # Copyright 2016-present Facebook. All Rights Reserved.
2 #
3 # context: context needed to annotate a file
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import collections
11 import contextlib
12 import hashlib
13 import os
14
15 from mercurial.i18n import _
16 from mercurial import (
17 error,
18 linelog as linelogmod,
19 lock as lockmod,
20 mdiff,
21 node,
22 pycompat,
23 scmutil,
24 util,
25 )
26
27 from . import (
28 error as faerror,
29 revmap as revmapmod,
30 )
31
32 # given path, get filelog, cached
33 @util.lrucachefunc
34 def _getflog(repo, path):
35 return repo.file(path)
36
37 # extracted from mercurial.context.basefilectx.annotate
38 def _parents(f, follow=True):
39 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
40 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
41 # from the topmost introrev (= srcrev) down to p.linkrev() if it
42 # isn't an ancestor of the srcrev.
43 f._changeid
44 pl = f.parents()
45
46 # Don't return renamed parents if we aren't following.
47 if not follow:
48 pl = [p for p in pl if p.path() == f.path()]
49
50 # renamed filectx won't have a filelog yet, so set it
51 # from the cache to save time
52 for p in pl:
53 if not '_filelog' in p.__dict__:
54 p._filelog = _getflog(f._repo, p.path())
55
56 return pl
57
58 # extracted from mercurial.context.basefilectx.annotate. slightly modified
59 # so it takes a fctx instead of a pair of text and fctx.
60 def _decorate(fctx):
61 text = fctx.data()
62 linecount = text.count('\n')
63 if text and not text.endswith('\n'):
64 linecount += 1
65 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
66
67 # extracted from mercurial.context.basefilectx.annotate. slightly modified
68 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
69 # calculating diff here.
70 def _pair(parent, child, blocks):
71 for (a1, a2, b1, b2), t in blocks:
72 # Changed blocks ('!') or blocks made only of blank lines ('~')
73 # belong to the child.
74 if t == '=':
75 child[0][b1:b2] = parent[0][a1:a2]
76 return child
77
78 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
79 # could be reused
80 _revsingle = util.lrucachefunc(scmutil.revsingle)
81
82 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
83 """(repo, str, str) -> fctx
84
85 get the filectx object from repo, rev, path, in an efficient way.
86
87 if resolverev is True, "rev" is a revision specified by the revset
88 language, otherwise "rev" is a nodeid, or a revision number that can
89 be consumed by repo.__getitem__.
90
91 if adjustctx is not None, the returned fctx will point to a changeset
92 that introduces the change (last modified the file). if adjustctx
93 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
94 faster for big repos but is incorrect for some cases.
95 """
96 if resolverev and not isinstance(rev, int) and rev is not None:
97 ctx = _revsingle(repo, rev)
98 else:
99 ctx = repo[rev]
100
101 # If we don't need to adjust the linkrev, create the filectx using the
102 # changectx instead of using ctx[path]. This means it already has the
103 # changectx information, so blame -u will be able to look directly at the
104 # commitctx object instead of having to resolve it by going through the
105 # manifest. In a lazy-manifest world this can prevent us from downloading a
106 # lot of data.
107 if adjustctx is None:
108 # ctx.rev() is None means it's the working copy, which is a special
109 # case.
110 if ctx.rev() is None:
111 fctx = ctx[path]
112 else:
113 fctx = repo.filectx(path, changeid=ctx.rev())
114 else:
115 fctx = ctx[path]
116 if adjustctx == 'linkrev':
117 introrev = fctx.linkrev()
118 else:
119 introrev = fctx.introrev()
120 if introrev != ctx.rev():
121 fctx._changeid = introrev
122 fctx._changectx = repo[introrev]
123 return fctx
124
125 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
126 def encodedir(path):
127 return (path
128 .replace('.hg/', '.hg.hg/')
129 .replace('.l/', '.l.hg/')
130 .replace('.m/', '.m.hg/')
131 .replace('.lock/', '.lock.hg/'))
132
133 def hashdiffopts(diffopts):
134 diffoptstr = str(sorted(
135 (k, getattr(diffopts, k))
136 for k in mdiff.diffopts.defaults.iterkeys()
137 ))
138 return hashlib.sha1(diffoptstr).hexdigest()[:6]
139
140 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
141
142 class annotateopts(object):
143 """like mercurial.mdiff.diffopts, but is for annotate
144
145 followrename: follow renames, like "hg annotate -f"
146 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
147 """
148
149 defaults = {
150 'diffopts': None,
151 'followrename': True,
152 'followmerge': True,
153 }
154
155 def __init__(self, **opts):
156 for k, v in self.defaults.iteritems():
157 setattr(self, k, opts.get(k, v))
158
159 @util.propertycache
160 def shortstr(self):
161 """represent opts in a short string, suitable for a directory name"""
162 result = ''
163 if not self.followrename:
164 result += 'r0'
165 if not self.followmerge:
166 result += 'm0'
167 if self.diffopts is not None:
168 assert isinstance(self.diffopts, mdiff.diffopts)
169 diffopthash = hashdiffopts(self.diffopts)
170 if diffopthash != _defaultdiffopthash:
171 result += 'i' + diffopthash
172 return result or 'default'
173
174 defaultopts = annotateopts()
175
176 class _annotatecontext(object):
177 """do not use this class directly as it does not use lock to protect
178 writes. use "with annotatecontext(...)" instead.
179 """
180
181 def __init__(self, repo, path, linelogpath, revmappath, opts):
182 self.repo = repo
183 self.ui = repo.ui
184 self.path = path
185 self.opts = opts
186 self.linelogpath = linelogpath
187 self.revmappath = revmappath
188 self._linelog = None
189 self._revmap = None
190 self._node2path = {} # {str: str}
191
192 @property
193 def linelog(self):
194 if self._linelog is None:
195 if os.path.exists(self.linelogpath):
196 with open(self.linelogpath, 'rb') as f:
197 try:
198 self._linelog = linelogmod.linelog.fromdata(f.read())
199 except linelogmod.LineLogError:
200 self._linelog = linelogmod.linelog()
201 else:
202 self._linelog = linelogmod.linelog()
203 return self._linelog
204
205 @property
206 def revmap(self):
207 if self._revmap is None:
208 self._revmap = revmapmod.revmap(self.revmappath)
209 return self._revmap
210
211 def close(self):
212 if self._revmap is not None:
213 self._revmap.flush()
214 self._revmap = None
215 if self._linelog is not None:
216 with open(self.linelogpath, 'wb') as f:
217 f.write(self._linelog.encode())
218 self._linelog = None
219
220 __del__ = close
221
222 def rebuild(self):
223 """delete linelog and revmap, useful for rebuilding"""
224 self.close()
225 self._node2path.clear()
226 _unlinkpaths([self.revmappath, self.linelogpath])
227
228 @property
229 def lastnode(self):
230 """return last node in revmap, or None if revmap is empty"""
231 if self._revmap is None:
232 # fast path, read revmap without loading its full content
233 return revmapmod.getlastnode(self.revmappath)
234 else:
235 return self._revmap.rev2hsh(self._revmap.maxrev)
236
237 def isuptodate(self, master, strict=True):
238 """return True if the revmap / linelog is up-to-date, or the file
239 does not exist in the master revision. False otherwise.
240
241 it tries to be fast and could return false negatives, because of the
242 use of linkrev instead of introrev.
243
244 useful for both server and client to decide whether to update
245 fastannotate cache or not.
246
247 if strict is True, even if fctx exists in the revmap, but is not the
248 last node, isuptodate will return False. it's good for performance - no
249 expensive check was done.
250
251 if strict is False, if fctx exists in the revmap, this function may
252 return True. this is useful for the client to skip downloading the
253 cache if the client's master is behind the server's.
254 """
255 lastnode = self.lastnode
256 try:
257 f = self._resolvefctx(master, resolverev=True)
258 # choose linkrev instead of introrev as the check is meant to be
259 # *fast*.
260 linknode = self.repo.changelog.node(f.linkrev())
261 if not strict and lastnode and linknode != lastnode:
262 # check if f.node() is in the revmap. note: this loads the
263 # revmap and can be slow.
264 return self.revmap.hsh2rev(linknode) is not None
265 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
266 # false negatives are acceptable in this case.
267 return linknode == lastnode
268 except LookupError:
269 # master does not have the file, or the revmap is ahead
270 return True
271
272 def annotate(self, rev, master=None, showpath=False, showlines=False):
273 """incrementally update the cache so it includes revisions in the main
274 branch till 'master'. and run annotate on 'rev', which may or may not be
275 included in the main branch.
276
277 if master is None, do not update linelog.
278
279 the first value returned is the annotate result, it is [(node, linenum)]
280 by default. [(node, linenum, path)] if showpath is True.
281
282 if showlines is True, a second value will be returned, it is a list of
283 corresponding line contents.
284 """
285
286 # the fast path test requires commit hash, convert rev number to hash,
287 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
288 # command could give us a revision number even if the user passes a
289 # commit hash.
290 if isinstance(rev, int):
291 rev = node.hex(self.repo.changelog.node(rev))
292
293 # fast path: if rev is in the main branch already
294 directly, revfctx = self.canannotatedirectly(rev)
295 if directly:
296 if self.ui.debugflag:
297 self.ui.debug('fastannotate: %s: using fast path '
298 '(resolved fctx: %s)\n'
299 % (self.path, util.safehasattr(revfctx, 'node')))
300 return self.annotatedirectly(revfctx, showpath, showlines)
301
302 # resolve master
303 masterfctx = None
304 if master:
305 try:
306 masterfctx = self._resolvefctx(master, resolverev=True,
307 adjustctx=True)
308 except LookupError: # master does not have the file
309 pass
310 else:
311 if masterfctx in self.revmap: # no need to update linelog
312 masterfctx = None
313
314 # ... - @ <- rev (can be an arbitrary changeset,
315 # / not necessarily a descendant
316 # master -> o of master)
317 # |
318 # a merge -> o 'o': new changesets in the main branch
319 # |\ '#': revisions in the main branch that
320 # o * exist in linelog / revmap
321 # | . '*': changesets in side branches, or
322 # last master -> # . descendants of master
323 # | .
324 # # * joint: '#', and is a parent of a '*'
325 # |/
326 # a joint -> # ^^^^ --- side branches
327 # |
328 # ^ --- main branch (in linelog)
329
330 # these DFSes are similar to the traditional annotate algorithm.
331 # we cannot really reuse the code for perf reason.
332
333 # 1st DFS calculates merges, joint points, and needed.
334 # "needed" is a simple reference counting dict to free items in
335 # "hist", reducing its memory usage otherwise could be huge.
336 initvisit = [revfctx]
337 if masterfctx:
338 if masterfctx.rev() is None:
339 raise error.Abort(_('cannot update linelog to wdir()'),
340 hint=_('set fastannotate.mainbranch'))
341 initvisit.append(masterfctx)
342 visit = initvisit[:]
343 pcache = {}
344 needed = {revfctx: 1}
345 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
346 while visit:
347 f = visit.pop()
348 if f in pcache or f in hist:
349 continue
350 if f in self.revmap: # in the old main branch, it's a joint
351 llrev = self.revmap.hsh2rev(f.node())
352 self.linelog.annotate(llrev)
353 result = self.linelog.annotateresult
354 hist[f] = (result, f.data())
355 continue
356 pl = self._parentfunc(f)
357 pcache[f] = pl
358 for p in pl:
359 needed[p] = needed.get(p, 0) + 1
360 if p not in pcache:
361 visit.append(p)
362
363 # 2nd (simple) DFS calculates new changesets in the main branch
364 # ('o' nodes in # the above graph), so we know when to update linelog.
365 newmainbranch = set()
366 f = masterfctx
367 while f and f not in self.revmap:
368 newmainbranch.add(f)
369 pl = pcache[f]
370 if pl:
371 f = pl[0]
372 else:
373 f = None
374 break
375
376 # f, if present, is the position where the last build stopped at, and
377 # should be the "master" last time. check to see if we can continue
378 # building the linelog incrementally. (we cannot if diverged)
379 if masterfctx is not None:
380 self._checklastmasterhead(f)
381
382 if self.ui.debugflag:
383 if newmainbranch:
384 self.ui.debug('fastannotate: %s: %d new changesets in the main'
385 ' branch\n' % (self.path, len(newmainbranch)))
386 elif not hist: # no joints, no updates
387 self.ui.debug('fastannotate: %s: linelog cannot help in '
388 'annotating this revision\n' % self.path)
389
390 # prepare annotateresult so we can update linelog incrementally
391 self.linelog.annotate(self.linelog.maxrev)
392
393 # 3rd DFS does the actual annotate
394 visit = initvisit[:]
395 progress = 0
396 while visit:
397 f = visit[-1]
398 if f in hist:
399 visit.pop()
400 continue
401
402 ready = True
403 pl = pcache[f]
404 for p in pl:
405 if p not in hist:
406 ready = False
407 visit.append(p)
408 if not ready:
409 continue
410
411 visit.pop()
412 blocks = None # mdiff blocks, used for appending linelog
413 ismainbranch = (f in newmainbranch)
414 # curr is the same as the traditional annotate algorithm,
415 # if we only care about linear history (do not follow merge),
416 # then curr is not actually used.
417 assert f not in hist
418 curr = _decorate(f)
419 for i, p in enumerate(pl):
420 bs = list(self._diffblocks(hist[p][1], curr[1]))
421 if i == 0 and ismainbranch:
422 blocks = bs
423 curr = _pair(hist[p], curr, bs)
424 if needed[p] == 1:
425 del hist[p]
426 del needed[p]
427 else:
428 needed[p] -= 1
429
430 hist[f] = curr
431 del pcache[f]
432
433 if ismainbranch: # need to write to linelog
434 if not self.ui.quiet:
435 progress += 1
436 self.ui.progress(_('building cache'), progress,
437 total=len(newmainbranch))
438 bannotated = None
439 if len(pl) == 2 and self.opts.followmerge: # merge
440 bannotated = curr[0]
441 if blocks is None: # no parents, add an empty one
442 blocks = list(self._diffblocks('', curr[1]))
443 self._appendrev(f, blocks, bannotated)
444 elif showpath: # not append linelog, but we need to record path
445 self._node2path[f.node()] = f.path()
446
447 if progress: # clean progress bar
448 self.ui.write()
449
450 result = [
451 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
452 for fr, l in hist[revfctx][0]] # [(node, linenumber)]
453 return self._refineannotateresult(result, revfctx, showpath, showlines)
454
455 def canannotatedirectly(self, rev):
456 """(str) -> bool, fctx or node.
457 return (True, f) if we can annotate without updating the linelog, pass
458 f to annotatedirectly.
459 return (False, f) if we need extra calculation. f is the fctx resolved
460 from rev.
461 """
462 result = True
463 f = None
464 if not isinstance(rev, int) and rev is not None:
465 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
466 if hsh is not None and (hsh, self.path) in self.revmap:
467 f = hsh
468 if f is None:
469 adjustctx = 'linkrev' if self._perfhack else True
470 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
471 result = f in self.revmap
472 if not result and self._perfhack:
473 # redo the resolution without perfhack - as we are going to
474 # do write operations, we need a correct fctx.
475 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
476 return result, f
477
478 def annotatealllines(self, rev, showpath=False, showlines=False):
479 """(rev : str) -> [(node : str, linenum : int, path : str)]
480
481 the result has the same format with annotate, but include all (including
482 deleted) lines up to rev. call this after calling annotate(rev, ...) for
483 better performance and accuracy.
484 """
485 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
486
487 # find a chain from rev to anything in the mainbranch
488 if revfctx not in self.revmap:
489 chain = [revfctx]
490 a = ''
491 while True:
492 f = chain[-1]
493 pl = self._parentfunc(f)
494 if not pl:
495 break
496 if pl[0] in self.revmap:
497 a = pl[0].data()
498 break
499 chain.append(pl[0])
500
501 # both self.linelog and self.revmap is backed by filesystem. now
502 # we want to modify them but do not want to write changes back to
503 # files. so we create in-memory objects and copy them. it's like
504 # a "fork".
505 linelog = linelogmod.linelog()
506 linelog.copyfrom(self.linelog)
507 linelog.annotate(linelog.maxrev)
508 revmap = revmapmod.revmap()
509 revmap.copyfrom(self.revmap)
510
511 for f in reversed(chain):
512 b = f.data()
513 blocks = list(self._diffblocks(a, b))
514 self._doappendrev(linelog, revmap, f, blocks)
515 a = b
516 else:
517 # fastpath: use existing linelog, revmap as we don't write to them
518 linelog = self.linelog
519 revmap = self.revmap
520
521 lines = linelog.getalllines()
522 hsh = revfctx.node()
523 llrev = revmap.hsh2rev(hsh)
524 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
525 # cannot use _refineannotateresult since we need custom logic for
526 # resolving line contents
527 if showpath:
528 result = self._addpathtoresult(result, revmap)
529 if showlines:
530 linecontents = self._resolvelines(result, revmap, linelog)
531 result = (result, linecontents)
532 return result
533
534 def _resolvelines(self, annotateresult, revmap, linelog):
535 """(annotateresult) -> [line]. designed for annotatealllines.
536 this is probably the most inefficient code in the whole fastannotate
537 directory. but we have made a decision that the linelog does not
538 store line contents. so getting them requires random accesses to
539 the revlog data, since they can be many, it can be very slow.
540 """
541 # [llrev]
542 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
543 result = [None] * len(annotateresult)
544 # {(rev, linenum): [lineindex]}
545 key2idxs = collections.defaultdict(list)
546 for i in pycompat.xrange(len(result)):
547 key2idxs[(revs[i], annotateresult[i][1])].append(i)
548 while key2idxs:
549 # find an unresolved line and its linelog rev to annotate
550 hsh = None
551 try:
552 for (rev, _linenum), idxs in key2idxs.iteritems():
553 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
554 continue
555 hsh = annotateresult[idxs[0]][0]
556 break
557 except StopIteration: # no more unresolved lines
558 return result
559 if hsh is None:
560 # the remaining key2idxs are not in main branch, resolving them
561 # using the hard way...
562 revlines = {}
563 for (rev, linenum), idxs in key2idxs.iteritems():
564 if rev not in revlines:
565 hsh = annotateresult[idxs[0]][0]
566 if self.ui.debugflag:
567 self.ui.debug('fastannotate: reading %s line #%d '
568 'to resolve lines %r\n'
569 % (node.short(hsh), linenum, idxs))
570 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
571 lines = mdiff.splitnewlines(fctx.data())
572 revlines[rev] = lines
573 for idx in idxs:
574 result[idx] = revlines[rev][linenum]
575 assert all(x is not None for x in result)
576 return result
577
578 # run the annotate and the lines should match to the file content
579 self.ui.debug('fastannotate: annotate %s to resolve lines\n'
580 % node.short(hsh))
581 linelog.annotate(rev)
582 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
583 annotated = linelog.annotateresult
584 lines = mdiff.splitnewlines(fctx.data())
585 if len(lines) != len(annotated):
586 raise faerror.CorruptedFileError('unexpected annotated lines')
587 # resolve lines from the annotate result
588 for i, line in enumerate(lines):
589 k = annotated[i]
590 if k in key2idxs:
591 for idx in key2idxs[k]:
592 result[idx] = line
593 del key2idxs[k]
594 return result
595
596 def annotatedirectly(self, f, showpath, showlines):
597 """like annotate, but when we know that f is in linelog.
598 f can be either a 20-char str (node) or a fctx. this is for perf - in
599 the best case, the user provides a node and we don't need to read the
600 filelog or construct any filecontext.
601 """
602 if isinstance(f, str):
603 hsh = f
604 else:
605 hsh = f.node()
606 llrev = self.revmap.hsh2rev(hsh)
607 if not llrev:
608 raise faerror.CorruptedFileError('%s is not in revmap'
609 % node.hex(hsh))
610 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
611 raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
612 % node.hex(hsh))
613 self.linelog.annotate(llrev)
614 result = [(self.revmap.rev2hsh(r), l)
615 for r, l in self.linelog.annotateresult]
616 return self._refineannotateresult(result, f, showpath, showlines)
617
618 def _refineannotateresult(self, result, f, showpath, showlines):
619 """add the missing path or line contents, they can be expensive.
620 f could be either node or fctx.
621 """
622 if showpath:
623 result = self._addpathtoresult(result)
624 if showlines:
625 if isinstance(f, str): # f: node or fctx
626 llrev = self.revmap.hsh2rev(f)
627 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
628 else:
629 fctx = f
630 lines = mdiff.splitnewlines(fctx.data())
631 if len(lines) != len(result): # linelog is probably corrupted
632 raise faerror.CorruptedFileError()
633 result = (result, lines)
634 return result
635
636 def _appendrev(self, fctx, blocks, bannotated=None):
637 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
638
639 def _diffblocks(self, a, b):
640 return mdiff.allblocks(a, b, self.opts.diffopts)
641
642 @staticmethod
643 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
644 """append a revision to linelog and revmap"""
645
646 def getllrev(f):
647 """(fctx) -> int"""
648 # f should not be a linelog revision
649 if isinstance(f, int):
650 raise error.ProgrammingError('f should not be an int')
651 # f is a fctx, allocate linelog rev on demand
652 hsh = f.node()
653 rev = revmap.hsh2rev(hsh)
654 if rev is None:
655 rev = revmap.append(hsh, sidebranch=True, path=f.path())
656 return rev
657
658 # append sidebranch revisions to revmap
659 siderevs = []
660 siderevmap = {} # node: int
661 if bannotated is not None:
662 for (a1, a2, b1, b2), op in blocks:
663 if op != '=':
664 # f could be either linelong rev, or fctx.
665 siderevs += [f for f, l in bannotated[b1:b2]
666 if not isinstance(f, int)]
667 siderevs = set(siderevs)
668 if fctx in siderevs: # mainnode must be appended seperately
669 siderevs.remove(fctx)
670 for f in siderevs:
671 siderevmap[f] = getllrev(f)
672
673 # the changeset in the main branch, could be a merge
674 llrev = revmap.append(fctx.node(), path=fctx.path())
675 siderevmap[fctx] = llrev
676
677 for (a1, a2, b1, b2), op in reversed(blocks):
678 if op == '=':
679 continue
680 if bannotated is None:
681 linelog.replacelines(llrev, a1, a2, b1, b2)
682 else:
683 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
684 for r, l in bannotated[b1:b2]]
685 linelog.replacelines_vec(llrev, a1, a2, blines)
686
687 def _addpathtoresult(self, annotateresult, revmap=None):
688 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
689 if revmap is None:
690 revmap = self.revmap
691
692 def _getpath(nodeid):
693 path = self._node2path.get(nodeid)
694 if path is None:
695 path = revmap.rev2path(revmap.hsh2rev(nodeid))
696 self._node2path[nodeid] = path
697 return path
698
699 return [(n, l, _getpath(n)) for n, l in annotateresult]
700
701 def _checklastmasterhead(self, fctx):
702 """check if fctx is the master's head last time, raise if not"""
703 if fctx is None:
704 llrev = 0
705 else:
706 llrev = self.revmap.hsh2rev(fctx.node())
707 if not llrev:
708 raise faerror.CannotReuseError()
709 if self.linelog.maxrev != llrev:
710 raise faerror.CannotReuseError()
711
712 @util.propertycache
713 def _parentfunc(self):
714 """-> (fctx) -> [fctx]"""
715 followrename = self.opts.followrename
716 followmerge = self.opts.followmerge
717 def parents(f):
718 pl = _parents(f, follow=followrename)
719 if not followmerge:
720 pl = pl[:1]
721 return pl
722 return parents
723
724 @util.propertycache
725 def _perfhack(self):
726 return self.ui.configbool('fastannotate', 'perfhack')
727
728 def _resolvefctx(self, rev, path=None, **kwds):
729 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
730
731 def _unlinkpaths(paths):
732 """silent, best-effort unlink"""
733 for path in paths:
734 try:
735 util.unlink(path)
736 except OSError:
737 pass
738
739 class pathhelper(object):
740 """helper for getting paths for lockfile, linelog and revmap"""
741
742 def __init__(self, repo, path, opts=defaultopts):
743 # different options use different directories
744 self._vfspath = os.path.join('fastannotate',
745 opts.shortstr, encodedir(path))
746 self._repo = repo
747
748 @property
749 def dirname(self):
750 return os.path.dirname(self._repo.vfs.join(self._vfspath))
751
752 @property
753 def linelogpath(self):
754 return self._repo.vfs.join(self._vfspath + '.l')
755
756 def lock(self):
757 return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
758
759 @contextlib.contextmanager
760 def _lockflock(self):
761 """the same as 'lock' but use flock instead of lockmod.lock, to avoid
762 creating temporary symlinks."""
763 import fcntl
764 lockpath = self.linelogpath
765 util.makedirs(os.path.dirname(lockpath))
766 lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
767 fcntl.flock(lockfd, fcntl.LOCK_EX)
768 try:
769 yield
770 finally:
771 fcntl.flock(lockfd, fcntl.LOCK_UN)
772 os.close(lockfd)
773
774 @property
775 def revmappath(self):
776 return self._repo.vfs.join(self._vfspath + '.m')
777
778 @contextlib.contextmanager
779 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
780 """context needed to perform (fast) annotate on a file
781
782 an annotatecontext of a single file consists of two structures: the
783 linelog and the revmap. this function takes care of locking. only 1
784 process is allowed to write that file's linelog and revmap at a time.
785
786 when something goes wrong, this function will assume the linelog and the
787 revmap are in a bad state, and remove them from disk.
788
789 use this function in the following way:
790
791 with annotatecontext(...) as actx:
792 actx. ....
793 """
794 helper = pathhelper(repo, path, opts)
795 util.makedirs(helper.dirname)
796 revmappath = helper.revmappath
797 linelogpath = helper.linelogpath
798 actx = None
799 try:
800 with helper.lock():
801 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
802 if rebuild:
803 actx.rebuild()
804 yield actx
805 except Exception:
806 if actx is not None:
807 actx.rebuild()
808 repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
809 raise
810 finally:
811 if actx is not None:
812 actx.close()
813
814 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
815 """like annotatecontext but get the context from a fctx. convenient when
816 used in fctx.annotate
817 """
818 repo = fctx._repo
819 path = fctx._path
820 if repo.ui.configbool('fastannotate', 'forcefollow', True):
821 follow = True
822 aopts = annotateopts(diffopts=diffopts, followrename=follow)
823 return annotatecontext(repo, path, aopts, rebuild)