view mercurial/graphmod.py @ 16777:058e14da7044

graphlog: turn getlogrevs() into a generator This improves the poor "time to first changeset" compared to the original log command. When running: $ hg log -u user log will enumerate the changelog and display matching revisions when they are found. But: $ hg log -G -u user will first find all revisions matching the user then start to display them. Initially, I considered turning revset.match() into a generator. This is doable but requires a fair amount of work. Instead, cmdutil.increasingwindows() is reused to call the revset matcher repeatedly. This has the nice properties of: - Let us reorder the windows after filtering, which is necessary as the matcher can reorder inputs but is an internal detail not a feature. - Let us feed the matcher with windows in changelog order, which is good for performances. - Have a generator designed for log-like commands, returning small windows at first then batching larger ones. I feel that calling the matcher multiple times is correct, at least with the revsets involved in getlogrevs() because they are: - stateless (no limit()) - respecting f(a|b) = f(a) | f(b), though I have no valid argument about that. Known issues compared to log code: - Calling the revset matcher multiple times can be slow when revset functions have to create expensive data structure for filtering. This will be addressed in a followup. - Predicate combinations like "--user foo --user bar" or "--user foo and --branch bar" are inherently slower because all input revision are checked against the first condition, then against the second, and so forth. log would enumerate the input revisions once and check each of them once against all conditions, which is faster. There are solutions but nothing cheap to implement. Some numbers against mozilla repository: first line total * hg log -u rnewman /Users/pmezard/bin/hg-2.2 0.148s 7.293s /Users/pmezard/bin/hgdev 0.132s 5.747s * hg log -u rnewman -u girard /Users/pmezard/bin/hg-2.2 0.146s 7.323s /Users/pmezard/bin/hgdev 0.136s 11.096s * hg log -l 10 /Users/pmezard/bin/hg-2.2 0.137s 0.153s /Users/pmezard/bin/hgdev 0.128s 0.144s * hg log -l 10 -u rnewman /Users/pmezard/bin/hg-2.2 0.146s 0.265s /Users/pmezard/bin/hgdev 0.133s 0.236s * hg log -b GECKO193a2_20100228_RELBRANCH /Users/pmezard/bin/hg-2.2 2.332s 6.618s /Users/pmezard/bin/hgdev 1.972s 5.543s * hg log xulrunner /Users/pmezard/bin/hg-2.2 5.829s 5.958s /Users/pmezard/bin/hgdev 0.194s 6.017s * hg log --follow xulrunner/build.mk /Users/pmezard/bin/hg-2.2 0.353s 0.438s /Users/pmezard/bin/hgdev 0.394s 0.580s * hg log -u girard tools /Users/pmezard/bin/hg-2.2 5.853s 6.012s /Users/pmezard/bin/hgdev 0.195s 6.030s * hg log -b COMM2000_20110314_RELBRANCH --copies /Users/pmezard/bin/hg-2.2 2.231s 6.653s /Users/pmezard/bin/hgdev 1.897s 5.585s * hg log --follow /Users/pmezard/bin/hg-2.2 0.137s 14.140s /Users/pmezard/bin/hgdev 0.381s 44.246s * hg log --follow -r 80000:90000 /Users/pmezard/bin/hg-2.2 0.127s 1.611s /Users/pmezard/bin/hgdev 0.147s 1.847s * hg log --follow -r 90000:80000 /Users/pmezard/bin/hg-2.2 0.130s 1.702s /Users/pmezard/bin/hgdev 0.368s 6.106s * hg log --follow -r 80000:90000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.343s 0.388s /Users/pmezard/bin/hgdev 0.437s 0.631s * hg log --follow -r 90000:80000 js/src/jsproxy.cpp /Users/pmezard/bin/hg-2.2 0.342s 0.389s /Users/pmezard/bin/hgdev 0.442s 0.628s
author Patrick Mezard <patrick@mezard.eu>
date Tue, 08 May 2012 22:43:44 +0200
parents 6e4de55a41a4
children 0849d725e2f9
line wrap: on
line source

# Revision graph generator for Mercurial
#
# Copyright 2008 Dirkjan Ochtman <dirkjan@ochtman.nl>
# Copyright 2007 Joel Rosdahl <joel@rosdahl.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

"""supports walking the history as DAGs suitable for graphical output

The most basic format we use is that of::

  (id, type, data, [parentids])

The node and parent ids are arbitrary integers which identify a node in the
context of the graph returned. Type is a constant specifying the node type.
Data depends on type.
"""

from mercurial.node import nullrev
import util

CHANGESET = 'C'

def dagwalker(repo, revs):
    """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

    This generator function walks through revisions (which should be ordered
    from bigger to lower). It returns a tuple for each node. The node and parent
    ids are arbitrary integers which identify a node in the context of the graph
    returned.
    """
    if not revs:
        return

    cl = repo.changelog
    lowestrev = min(revs)
    gpcache = {}

    knownrevs = set(revs)
    for rev in revs:
        ctx = repo[rev]
        parents = sorted(set([p.rev() for p in ctx.parents()
                              if p.rev() in knownrevs]))
        mpars = [p.rev() for p in ctx.parents() if
                 p.rev() != nullrev and p.rev() not in parents]

        for mpar in mpars:
            gp = gpcache.get(mpar)
            if gp is None:
                gp = gpcache[mpar] = grandparent(cl, lowestrev, revs, mpar)
            if not gp:
                parents.append(mpar)
            else:
                parents.extend(g for g in gp if g not in parents)

        yield (ctx.rev(), CHANGESET, ctx, parents)

def nodes(repo, nodes):
    """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples

    This generator function walks the given nodes. It only returns parents
    that are in nodes, too.
    """
    include = set(nodes)
    for node in nodes:
        ctx = repo[node]
        parents = set([p.rev() for p in ctx.parents() if p.node() in include])
        yield (ctx.rev(), CHANGESET, ctx, sorted(parents))

def colored(dag, repo):
    """annotates a DAG with colored edge information

    For each DAG node this function emits tuples::

      (id, type, data, (col, color), [(col, nextcol, color)])

    with the following new elements:

      - Tuple (col, color) with column and color index for the current node
      - A list of tuples indicating the edges between the current node and its
        parents.
    """
    seen = []
    colors = {}
    newcolor = 1
    config = {}

    for key, val in repo.ui.configitems('graph'):
        if '.' in key:
            branch, setting = key.rsplit('.', 1)
            # Validation
            if setting == "width" and val.isdigit():
                config.setdefault(branch, {})[setting] = int(val)
            elif setting == "color" and val.isalnum():
                config.setdefault(branch, {})[setting] = val

    if config:
        getconf = util.lrucachefunc(
            lambda rev: config.get(repo[rev].branch(), {}))
    else:
        getconf = lambda rev: {}

    for (cur, type, data, parents) in dag:

        # Compute seen and next
        if cur not in seen:
            seen.append(cur) # new head
            colors[cur] = newcolor
            newcolor += 1

        col = seen.index(cur)
        color = colors.pop(cur)
        next = seen[:]

        # Add parents to next
        addparents = [p for p in parents if p not in next]
        next[col:col + 1] = addparents

        # Set colors for the parents
        for i, p in enumerate(addparents):
            if not i:
                colors[p] = color
            else:
                colors[p] = newcolor
                newcolor += 1

        # Add edges to the graph
        edges = []
        for ecol, eid in enumerate(seen):
            if eid in next:
                bconf = getconf(eid)
                edges.append((
                    ecol, next.index(eid), colors[eid],
                    bconf.get('width', -1),
                    bconf.get('color', '')))
            elif eid == cur:
                for p in parents:
                    bconf = getconf(p)
                    edges.append((
                        ecol, next.index(p), color,
                        bconf.get('width', -1),
                        bconf.get('color', '')))

        # Yield and move on
        yield (cur, type, data, (col, color), edges)
        seen = next

def grandparent(cl, lowestrev, roots, head):
    """Return all ancestors of head in roots which revision is
    greater or equal to lowestrev.
    """
    pending = set([head])
    seen = set()
    kept = set()
    llowestrev = max(nullrev, lowestrev)
    while pending:
        r = pending.pop()
        if r >= llowestrev and r not in seen:
            if r in roots:
                kept.add(r)
            else:
                pending.update([p for p in cl.parentrevs(r)])
            seen.add(r)
    return sorted(kept)