comparison hgext/graphlog.py @ 16777:058e14da7044

graphlog: turn getlogrevs() into a generator This improves the poor "time to first changeset" compared to the original log command. When running: $ hg log -u user log will enumerate the changelog and display matching revisions when they are found. But: $ hg log -G -u user will first find all revisions matching the user then start to display them. Initially, I considered turning revset.match() into a generator. This is doable but requires a fair amount of work. Instead, cmdutil.increasingwindows() is reused to call the revset matcher repeatedly. This has the nice properties of: - Let us reorder the windows after filtering, which is necessary as the matcher can reorder inputs but is an internal detail not a feature. - Let us feed the matcher with windows in changelog order, which is good for performances. - Have a generator designed for log-like commands, returning small windows at first then batching larger ones. I feel that calling the matcher multiple times is correct, at least with the revsets involved in getlogrevs() because they are: - stateless (no limit()) - respecting f(a|b) = f(a) | f(b), though I have no valid argument about that. Known issues compared to log code: - Calling the revset matcher multiple times can be slow when revset functions have to create expensive data structure for filtering. This will be addressed in a followup. - Predicate combinations like "--user foo --user bar" or "--user foo and --branch bar" are inherently slower because all input revision are checked against the first condition, then against the second, and so forth. log would enumerate the input revisions once and check each of them once against all conditions, which is faster. There are solutions but nothing cheap to implement. Some numbers against mozilla repository: first line total * hg log -u rnewman /Users/pmezard/bin/hg-2.2 0.148s 7.293s /Users/pmezard/bin/hgdev 0.132s 5.747s * hg log -u rnewman -u girard /Users/pmezard/bin/hg-2.2 0.146s 7.323s /Users/pmezard/bin/hgdev 0.136s 11.096s * hg log -l 10 /Users/pmezard/bin/hg-2.2 0.137s 0.153s /Users/pmezard/bin/hgdev 0.128s 0.144s * hg log -l 10 -u rnewman /Users/pmezard/bin/hg-2.2 0.146s 0.265s /Users/pmezard/bin/hgdev 0.133s 0.236s * hg log -b GECKO193a2_20100228_RELBRANCH /Users/pmezard/bin/hg-2.2 2.332s 6.618s /Users/pmezard/bin/hgdev 1.972s 5.543s * hg log xulrunner /Users/pmezard/bin/hg-2.2 5.829s 5.958s /Users/pmezard/bin/hgdev 0.194s 6.017s * hg log --follow xulrunner/build.mk /Users/pmezard/bin/hg-2.2 0.353s 0.438s /Users/pmezard/bin/hgdev 0.394s 0.580s * hg log -u girard tools /Users/pmezard/bin/hg-2.2 5.853s 6.012s /Users/pmezard/bin/hgdev 0.195s 6.030s * hg log -b COMM2000_20110314_RELBRANCH --copies /Users/pmezard/bin/hg-2.2 2.231s 6.653s /Users/pmezard/bin/hgdev 1.897s 5.585s * hg log --follow /Users/pmezard/bin/hg-2.2 0.137s 14.140s /Users/pmezard/bin/hgdev 0.381s 44.246s * hg log --follow -r 80000:90000 /Users/pmezard/bin/hg-2.2 0.127s 1.611s /Users/pmezard/bin/hgdev 0.147s 1.847s * hg log --follow -r 90000:80000 /Users/pmezard/bin/hg-2.2 0.130s 1.702s /Users/pmezard/bin/hgdev 0.368s 6.106s * hg log --follow -r 80000:90000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.343s 0.388s /Users/pmezard/bin/hgdev 0.437s 0.631s * hg log --follow -r 90000:80000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.342s 0.389s /Users/pmezard/bin/hgdev 0.442s 0.628s
author Patrick Mezard <patrick@mezard.eu>
date Tue, 08 May 2012 22:43:44 +0200
parents 38caf405d010
children 2e13c1bd34dc
comparison
equal deleted inserted replaced
16776:5088d0b9a9a1 16777:058e14da7044
390 else: 390 else:
391 expr = None 391 expr = None
392 return expr, filematcher 392 return expr, filematcher
393 393
394 def getlogrevs(repo, pats, opts): 394 def getlogrevs(repo, pats, opts):
395 """Return (revs, expr, filematcher) where revs is a list of 395 """Return (revs, expr, filematcher) where revs is an iterable of
396 revision numbers, expr is a revset string built from log options 396 revision numbers, expr is a revset string built from log options
397 and file patterns or None, and used to filter 'revs'. If --stat or 397 and file patterns or None, and used to filter 'revs'. If --stat or
398 --patch are not passed filematcher is None. Otherwise it is a 398 --patch are not passed filematcher is None. Otherwise it is a
399 callable taking a revision number and returning a match objects 399 callable taking a revision number and returning a match objects
400 filtering the files to be detailed when displaying the revision. 400 filtering the files to be detailed when displaying the revision.
401 """ 401 """
402 def increasingrevs(repo, revs, matcher):
403 # The sorted input rev sequence is chopped in sub-sequences
404 # which are sorted in ascending order and passed to the
405 # matcher. The filtered revs are sorted again as they were in
406 # the original sub-sequence. This achieve several things:
407 #
408 # - getlogrevs() now returns a generator which behaviour is
409 # adapted to log need. First results come fast, last ones
410 # are batched for performances.
411 #
412 # - revset matchers often operate faster on revision in
413 # changelog order, because most filters deal with the
414 # changelog.
415 #
416 # - revset matchers can reorder revisions. "A or B" typically
417 # returns returns the revision matching A then the revision
418 # matching B. We want to hide this internal implementation
419 # detail from the caller, and sorting the filtered revision
420 # again achieves this.
421 for i, window in cmdutil.increasingwindows(0, len(revs), windowsize=1):
422 orevs = revs[i:i + window]
423 nrevs = set(matcher(repo, sorted(orevs)))
424 for rev in orevs:
425 if rev in nrevs:
426 yield rev
427
402 if not len(repo): 428 if not len(repo):
403 return [], None, None 429 return iter([]), None, None
404 # Default --rev value depends on --follow but --follow behaviour 430 # Default --rev value depends on --follow but --follow behaviour
405 # depends on revisions resolved from --rev... 431 # depends on revisions resolved from --rev...
406 follow = opts.get('follow') or opts.get('follow_first') 432 follow = opts.get('follow') or opts.get('follow_first')
407 if opts.get('rev'): 433 if opts.get('rev'):
408 revs = scmutil.revrange(repo, opts['rev']) 434 revs = scmutil.revrange(repo, opts['rev'])
410 if follow and len(repo) > 0: 436 if follow and len(repo) > 0:
411 revs = scmutil.revrange(repo, ['.:0']) 437 revs = scmutil.revrange(repo, ['.:0'])
412 else: 438 else:
413 revs = range(len(repo) - 1, -1, -1) 439 revs = range(len(repo) - 1, -1, -1)
414 if not revs: 440 if not revs:
415 return [], None, None 441 return iter([]), None, None
416 expr, filematcher = _makelogrevset(repo, pats, opts, revs) 442 expr, filematcher = _makelogrevset(repo, pats, opts, revs)
417 if expr: 443 if expr:
418 # Evaluate revisions in changelog order for performance 444 matcher = revset.match(repo.ui, expr)
419 # reasons but preserve the original sequence order in the 445 revs = increasingrevs(repo, revs, matcher)
420 # filtered result.
421 matched = set(revset.match(repo.ui, expr)(repo, sorted(revs)))
422 revs = [r for r in revs if r in matched]
423 if not opts.get('hidden'): 446 if not opts.get('hidden'):
424 # --hidden is still experimental and not worth a dedicated revset 447 # --hidden is still experimental and not worth a dedicated revset
425 # yet. Fortunately, filtering revision number is fast. 448 # yet. Fortunately, filtering revision number is fast.
426 revs = [r for r in revs if r not in repo.changelog.hiddenrevs] 449 revs = (r for r in revs if r not in repo.changelog.hiddenrevs)
450 else:
451 revs = iter(revs)
427 return revs, expr, filematcher 452 return revs, expr, filematcher
428 453
429 def generate(ui, dag, displayer, showparents, edgefn, getrenamed=None, 454 def generate(ui, dag, displayer, showparents, edgefn, getrenamed=None,
430 filematcher=None): 455 filematcher=None):
431 seen, state = [], asciistate() 456 seen, state = [], asciistate()