view mercurial/treediscovery.py @ 25527:262e6ad93885

phases: really fix native phase computation For some reason (probably rebase issue, leprechaun or badly resolved .rej) 1635579f9baf contains only half of the emailed patches and do not fix the bug. This patch adds the other half and enable the sweet native computation for real. As expected this provide massive speedup along the board. revset #0: not public() plain first 0) 0.011960 0.010523 1) 0.000465 3% 0.000492 4% revset #1: (tip~1000::) - public() plain first 0) 0.025700 0.025169 1) 0.002864 11% 0.001899 7% revset #2: not public() and branch("default") plain first 0) 0.022842 0.020863 1) 0.011418 49% 0.010948 52% However, it has a less impact (even bad) on first result time in simple situation. This comes from the overhead of building the set and filtering it. This is especially true on my Mercurial repository (used here) where about 1/3 of the changesets are non public and hidden. This could be mitigated by a caching of the set and a better usage of smartset in '_notpublic'. (But this won't happen in this patch because the win is massive everywhere else). revset #0: not public() last 0) 0.000081 1) 0.000493 x6.1 <-- bad impact revset #1: (tip~1000::) - public() last 0) 0.013966 1) 0.002737 19% revset #2: not public() and branch("default") last 0) 0.011021 1) 0.011038 The effect mostly disappear when the number of non-public changesets is small and/or the repo get bigger. Result for Mozilla central: Mozilla revset #0: not public() plain first last 0) 0.092787 0.084094 0.000080 1) 0.000054 0% 0.000083 0% 0.000083 revset #1: (tip~1000::) - public() plain first last 0) 0.215607 0.183996 0.124962 1) 0.031620 14% 0.006616 3% 0.031168 24% revset #2: not public() and branch("default") plain first last 0) 0.092626 0.082687 0.000162 1) 0.000139 0% 0.000165 0% 0.000167
author Pierre-Yves David <pierre-yves.david@fb.com>
date Wed, 10 Jun 2015 19:26:16 -0700
parents 0ca8410ea345
children c6d049a5de43
line wrap: on
line source

# discovery.py - protocol changeset discovery functions
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
from node import nullid, short
from i18n import _
import util, error

def findcommonincoming(repo, remote, heads=None, force=False):
    """Return a tuple (common, fetch, heads) used to identify the common
    subset of nodes between repo and remote.

    "common" is a list of (at least) the heads of the common subset.
    "fetch" is a list of roots of the nodes that would be incoming, to be
      supplied to changegroupsubset.
    "heads" is either the supplied heads, or else the remote's heads.
    """

    knownnode = repo.changelog.hasnode
    search = []
    fetch = set()
    seen = set()
    seenbranch = set()
    base = set()

    if not heads:
        heads = remote.heads()

    if repo.changelog.tip() == nullid:
        base.add(nullid)
        if heads != [nullid]:
            return [nullid], [nullid], list(heads)
        return [nullid], [], heads

    # assume we're closer to the tip than the root
    # and start by examining the heads
    repo.ui.status(_("searching for changes\n"))

    unknown = []
    for h in heads:
        if not knownnode(h):
            unknown.append(h)
        else:
            base.add(h)

    if not unknown:
        return list(base), [], list(heads)

    req = set(unknown)
    reqcnt = 0

    # search through remote branches
    # a 'branch' here is a linear segment of history, with four parts:
    # head, root, first parent, second parent
    # (a branch always has two parents (or none) by definition)
    unknown = collections.deque(remote.branches(unknown))
    while unknown:
        r = []
        while unknown:
            n = unknown.popleft()
            if n[0] in seen:
                continue

            repo.ui.debug("examining %s:%s\n"
                          % (short(n[0]), short(n[1])))
            if n[0] == nullid: # found the end of the branch
                pass
            elif n in seenbranch:
                repo.ui.debug("branch already found\n")
                continue
            elif n[1] and knownnode(n[1]): # do we know the base?
                repo.ui.debug("found incomplete branch %s:%s\n"
                              % (short(n[0]), short(n[1])))
                search.append(n[0:2]) # schedule branch range for scanning
                seenbranch.add(n)
            else:
                if n[1] not in seen and n[1] not in fetch:
                    if knownnode(n[2]) and knownnode(n[3]):
                        repo.ui.debug("found new changeset %s\n" %
                                      short(n[1]))
                        fetch.add(n[1]) # earliest unknown
                    for p in n[2:4]:
                        if knownnode(p):
                            base.add(p) # latest known

                for p in n[2:4]:
                    if p not in req and not knownnode(p):
                        r.append(p)
                        req.add(p)
            seen.add(n[0])

        if r:
            reqcnt += 1
            repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
            repo.ui.debug("request %d: %s\n" %
                        (reqcnt, " ".join(map(short, r))))
            for p in xrange(0, len(r), 10):
                for b in remote.branches(r[p:p + 10]):
                    repo.ui.debug("received %s:%s\n" %
                                  (short(b[0]), short(b[1])))
                    unknown.append(b)

    # do binary search on the branches we found
    while search:
        newsearch = []
        reqcnt += 1
        repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
        for n, l in zip(search, remote.between(search)):
            l.append(n[1])
            p = n[0]
            f = 1
            for i in l:
                repo.ui.debug("narrowing %d:%d %s\n" % (f, len(l), short(i)))
                if knownnode(i):
                    if f <= 2:
                        repo.ui.debug("found new branch changeset %s\n" %
                                          short(p))
                        fetch.add(p)
                        base.add(i)
                    else:
                        repo.ui.debug("narrowed branch search to %s:%s\n"
                                      % (short(p), short(i)))
                        newsearch.append((p, i))
                    break
                p, f = i, f * 2
            search = newsearch

    # sanity check our fetch list
    for f in fetch:
        if knownnode(f):
            raise error.RepoError(_("already have changeset ")
                                  + short(f[:4]))

    base = list(base)
    if base == [nullid]:
        if force:
            repo.ui.warn(_("warning: repository is unrelated\n"))
        else:
            raise util.Abort(_("repository is unrelated"))

    repo.ui.debug("found new changesets starting at " +
                 " ".join([short(f) for f in fetch]) + "\n")

    repo.ui.progress(_('searching'), None)
    repo.ui.debug("%d total queries\n" % reqcnt)

    return base, list(fetch), heads