view mercurial/setdiscovery.py @ 14512:8c8b55733cbd

COPYING: refresh with current address from fsf.org
author Matt Mackall <mpm@selenic.com>
date Thu, 02 Jun 2011 11:17:02 -0500
parents 2bf60f158ecb
children f03c82d1f50a
line wrap: on
line source

# setdiscovery.py - improved discovery of common nodeset for mercurial
#
# Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
# and Peter Arrenbrecht <peter@arrenbrecht.ch>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from node import nullid
from i18n import _
import random, collections, util, dagutil

def _updatesample(dag, nodes, sample, always, quicksamplesize=0):
    # if nodes is empty we scan the entire graph
    if nodes:
        heads = dag.headsetofconnecteds(nodes)
    else:
        heads = dag.heads()
    dist = {}
    visit = collections.deque(heads)
    seen = set()
    factor = 1
    while visit:
        curr = visit.popleft()
        if curr in seen:
            continue
        d = dist.setdefault(curr, 1)
        if d > factor:
            factor *= 2
        if d == factor:
            if curr not in always: # need this check for the early exit below
                sample.add(curr)
                if quicksamplesize and (len(sample) >= quicksamplesize):
                    return
        seen.add(curr)
        for p in dag.parents(curr):
            if not nodes or p in nodes:
                dist.setdefault(p, d + 1)
                visit.append(p)

def _setupsample(dag, nodes, size):
    if len(nodes) <= size:
        return set(nodes), None, 0
    always = set(dag.heads())
    desiredlen = size - len(always)
    if desiredlen <= 0:
        # This could be bad if there are very many heads, all unknown to the
        # server. We're counting on long request support here.
        return always, None, desiredlen
    return always, set(), desiredlen

def _takequicksample(dag, nodes, size, initial):
    always, sample, desiredlen = _setupsample(dag, nodes, size)
    if sample is None:
        return always
    if initial:
        fromset = None
    else:
        fromset = nodes
    _updatesample(dag, fromset, sample, always, quicksamplesize=desiredlen)
    sample.update(always)
    return sample

def _takefullsample(dag, nodes, size):
    always, sample, desiredlen = _setupsample(dag, nodes, size)
    if sample is None:
        return always
    # update from heads
    _updatesample(dag, nodes, sample, always)
    # update from roots
    _updatesample(dag.inverse(), nodes, sample, always)
    assert sample
    if len(sample) > desiredlen:
        sample = set(random.sample(sample, desiredlen))
    elif len(sample) < desiredlen:
        more = desiredlen - len(sample)
        sample.update(random.sample(list(nodes - sample - always), more))
    sample.update(always)
    return sample

def findcommonheads(ui, local, remote,
                    initialsamplesize=100,
                    fullsamplesize=200,
                    abortwhenunrelated=True):
    '''Return a tuple (common, anyincoming, remoteheads) used to identify
    missing nodes from or in remote.

    shortcutlocal determines whether we try use direct access to localrepo if
    remote is actually local.
    '''
    roundtrips = 0
    cl = local.changelog
    dag = dagutil.revlogdag(cl)
    nodes = dag.nodeset()

    # early exit if we know all the specified server heads already
    ui.debug("query 1; heads\n")
    roundtrips += 1
    srvheadhashes = remote.heads()

    ## TODO We might want to request an additional random sample of the server's
    ## nodes batched with the heads query here.

    if cl.tip() == nullid:
        if srvheadhashes != [nullid]:
            return [nullid], True, srvheadhashes
        return [nullid], False, []

    # start actual discovery (we note this before the next "if" for
    # compatibility reasons)
    ui.status(_("searching for changes\n"))

    srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
    if len(srvheads) == len(srvheadhashes):
        ui.note("all remote heads known locally\n")
        return (srvheadhashes, False, srvheadhashes,)

    # full blown discovery
    undecided = nodes # own nodes where I don't know if the server knows them
    common = set() # own nodes I know we both know
    missing = set() # own nodes I know the server lacks

    # treat remote heads as a first implicit sample response
    common.update(dag.ancestorset(srvheads))
    undecided.difference_update(common)
    # use cheapish initial sample
    if common:
        ui.debug("taking initial sample\n")
        sample = _takefullsample(dag, undecided, size=fullsamplesize)
    else:
        ui.debug("taking quick initial sample\n")
        sample = _takequicksample(dag, nodes, size=initialsamplesize,
                                  initial=True)

    roundtrips += 1
    ui.progress(_('searching'), roundtrips, unit=_('queries'))
    ui.debug("query %i; still undecided: %i, sample size is: %i\n"
             % (roundtrips, len(undecided), len(sample)))
    # indices between sample and externalized version must match
    sample = list(sample)
    yesno = remote.known(dag.externalizeall(sample))

    while undecided:
        commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
        common.update(dag.ancestorset(commoninsample, common))

        missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
        missing.update(dag.descendantset(missinginsample, missing))

        undecided.difference_update(missing)
        undecided.difference_update(common)

        if not undecided:
            break

        ui.note("sampling from both directions\n")
        sample = _takefullsample(dag, undecided, size=fullsamplesize)

        roundtrips += 1
        ui.progress(_('searching'), roundtrips, unit=_('queries'))
        ui.debug("query %i; still undecided: %i, sample size is: %i\n"
                 % (roundtrips, len(undecided), len(sample)))
        # indices between sample and externalized version must match
        sample = list(sample)
        yesno = remote.known(dag.externalizeall(sample))

    result = dag.headsetofconnecteds(common)
    ui.progress(_('searching'), None)
    ui.debug("%d total queries\n" % roundtrips)

    if not result and srvheadhashes != [nullid]:
        if abortwhenunrelated:
            raise util.Abort(_("repository is unrelated"))
        else:
            ui.warn(_("warning: repository is unrelated\n"))
        return (set([nullid]), True, srvheadhashes,)

    return (dag.externalizeall(result), True, srvheadhashes,)