mercurial/setdiscovery.py
changeset 14164 cb98fed52495
child 14206 2bf60f158ecb
equal deleted inserted replaced
14163:38184a72d793 14164:cb98fed52495
       
     1 # setdiscovery.py - improved discovery of common nodeset for mercurial
       
     2 #
       
     3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
       
     4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
       
     5 #
       
     6 # This software may be used and distributed according to the terms of the
       
     7 # GNU General Public License version 2 or any later version.
       
     8 
       
     9 from node import nullid
       
    10 from i18n import _
       
    11 import random, collections, util, dagutil
       
    12 
       
    13 def _updatesample(dag, nodes, sample, always, quicksamplesize=0):
       
    14     # if nodes is empty we scan the entire graph
       
    15     if nodes:
       
    16         heads = dag.headsetofconnecteds(nodes)
       
    17     else:
       
    18         heads = dag.heads()
       
    19     dist = {}
       
    20     visit = collections.deque(heads)
       
    21     seen = set()
       
    22     factor = 1
       
    23     while visit:
       
    24         curr = visit.popleft()
       
    25         if curr in seen:
       
    26             continue
       
    27         d = dist.setdefault(curr, 1)
       
    28         if d > factor:
       
    29             factor *= 2
       
    30         if d == factor:
       
    31             if curr not in always: # need this check for the early exit below
       
    32                 sample.add(curr)
       
    33                 if quicksamplesize and (len(sample) >= quicksamplesize):
       
    34                     return
       
    35         seen.add(curr)
       
    36         for p in dag.parents(curr):
       
    37             if not nodes or p in nodes:
       
    38                 dist.setdefault(p, d + 1)
       
    39                 visit.append(p)
       
    40 
       
    41 def _setupsample(dag, nodes, size):
       
    42     if len(nodes) <= size:
       
    43         return set(nodes), None, 0
       
    44     always = set(dag.heads())
       
    45     desiredlen = size - len(always)
       
    46     if desiredlen <= 0:
       
    47         # This could be bad if there are very many heads, all unknown to the
       
    48         # server. We're counting on long request support here.
       
    49         return always, None, desiredlen
       
    50     return always, set(), desiredlen
       
    51 
       
    52 def _takequicksample(dag, nodes, size, initial):
       
    53     always, sample, desiredlen = _setupsample(dag, nodes, size)
       
    54     if sample is None:
       
    55         return always
       
    56     if initial:
       
    57         fromset = None
       
    58     else:
       
    59         fromset = nodes
       
    60     _updatesample(dag, fromset, sample, always, quicksamplesize=desiredlen)
       
    61     sample.update(always)
       
    62     return sample
       
    63 
       
    64 def _takefullsample(dag, nodes, size):
       
    65     always, sample, desiredlen = _setupsample(dag, nodes, size)
       
    66     if sample is None:
       
    67         return always
       
    68     # update from heads
       
    69     _updatesample(dag, nodes, sample, always)
       
    70     # update from roots
       
    71     _updatesample(dag.inverse(), nodes, sample, always)
       
    72     assert sample
       
    73     if len(sample) > desiredlen:
       
    74         sample = set(random.sample(sample, desiredlen))
       
    75     elif len(sample) < desiredlen:
       
    76         more = desiredlen - len(sample)
       
    77         sample.update(random.sample(list(nodes - sample - always), more))
       
    78     sample.update(always)
       
    79     return sample
       
    80 
       
    81 def findcommonheads(ui, local, remote,
       
    82                     initialsamplesize=100,
       
    83                     fullsamplesize=200,
       
    84                     abortwhenunrelated=True):
       
    85     '''Return a tuple (common, anyincoming, remoteheads) used to identify missing
       
    86     nodes from or in remote.
       
    87 
       
    88     shortcutlocal determines whether we try use direct access to localrepo if
       
    89     remote is actually local.
       
    90     '''
       
    91     roundtrips = 0
       
    92     cl = local.changelog
       
    93     dag = dagutil.revlogdag(cl)
       
    94     nodes = dag.nodeset()
       
    95 
       
    96     # early exit if we know all the specified server heads already
       
    97     ui.debug("query 1; heads\n")
       
    98     roundtrips += 1
       
    99     srvheadhashes = remote.heads()
       
   100 
       
   101     ## TODO We might want to request an additional random sample of the server's
       
   102     ## nodes batched with the heads query here.
       
   103 
       
   104     if cl.tip() == nullid:
       
   105         if srvheadhashes != [nullid]:
       
   106             return [nullid], True, srvheadhashes
       
   107         return [nullid], False, []
       
   108 
       
   109     # start actual discovery (we note this before the next "if" for compatibility
       
   110     # reasons)
       
   111     ui.status(_("searching for changes\n"))
       
   112 
       
   113     srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
       
   114     if len(srvheads) == len(srvheadhashes):
       
   115         ui.note("all remote heads known locally\n")
       
   116         return (srvheadhashes, False, srvheadhashes,)
       
   117 
       
   118     # full blown discovery
       
   119     undecided = nodes # own nodes where I don't know if the server knows them
       
   120     common = set() # own nodes I know we both know
       
   121     missing = set() # own nodes I know the server lacks
       
   122 
       
   123     # treat remote heads as a first implicit sample response
       
   124     common.update(dag.ancestorset(srvheads))
       
   125     undecided.difference_update(common)
       
   126     # use cheapish initial sample
       
   127     if common:
       
   128         ui.debug("taking initial sample\n")
       
   129         sample = _takefullsample(dag, undecided, size=fullsamplesize)
       
   130     else:
       
   131         ui.debug("taking quick initial sample\n")
       
   132         sample = _takequicksample(dag, nodes, size=initialsamplesize,
       
   133                                   initial=True)
       
   134 
       
   135     roundtrips += 1
       
   136     ui.progress(_('searching'), roundtrips, unit=_('queries'))
       
   137     ui.debug("query %i; still undecided: %i, sample size is: %i\n"
       
   138              % (roundtrips, len(undecided), len(sample)))
       
   139     # indices between sample and externalized version must match
       
   140     sample = list(sample)
       
   141     yesno = remote.known(dag.externalizeall(sample))
       
   142 
       
   143     while undecided:
       
   144         commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
       
   145         common.update(dag.ancestorset(commoninsample, common))
       
   146 
       
   147         missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
       
   148         missing.update(dag.descendantset(missinginsample, missing))
       
   149 
       
   150         undecided.difference_update(missing)
       
   151         undecided.difference_update(common)
       
   152 
       
   153         if not undecided:
       
   154             break
       
   155 
       
   156         ui.note("sampling from both directions\n")
       
   157         sample = _takefullsample(dag, undecided, size=fullsamplesize)
       
   158 
       
   159         roundtrips += 1
       
   160         ui.progress(_('searching'), roundtrips, unit=_('queries'))
       
   161         ui.debug("query %i; still undecided: %i, sample size is: %i\n"
       
   162                  % (roundtrips, len(undecided), len(sample)))
       
   163         # indices between sample and externalized version must match
       
   164         sample = list(sample)
       
   165         yesno = remote.known(dag.externalizeall(sample))
       
   166 
       
   167     result = dag.headsetofconnecteds(common)
       
   168     ui.progress(_('searching'), None)
       
   169     ui.debug("%d total queries\n" % roundtrips)
       
   170 
       
   171     if not result and srvheadhashes != [nullid]:
       
   172         if abortwhenunrelated:
       
   173             raise util.Abort(_("repository is unrelated"))
       
   174         else:
       
   175             ui.warn(_("warning: repository is unrelated\n"))
       
   176         return (set([nullid]), True, srvheadhashes,)
       
   177 
       
   178     return (dag.externalizeall(result), True, srvheadhashes,)