|
1 # setdiscovery.py - improved discovery of common nodeset for mercurial |
|
2 # |
|
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com> |
|
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch> |
|
5 # |
|
6 # This software may be used and distributed according to the terms of the |
|
7 # GNU General Public License version 2 or any later version. |
|
8 |
|
9 from node import nullid |
|
10 from i18n import _ |
|
11 import random, collections, util, dagutil |
|
12 |
|
13 def _updatesample(dag, nodes, sample, always, quicksamplesize=0): |
|
14 # if nodes is empty we scan the entire graph |
|
15 if nodes: |
|
16 heads = dag.headsetofconnecteds(nodes) |
|
17 else: |
|
18 heads = dag.heads() |
|
19 dist = {} |
|
20 visit = collections.deque(heads) |
|
21 seen = set() |
|
22 factor = 1 |
|
23 while visit: |
|
24 curr = visit.popleft() |
|
25 if curr in seen: |
|
26 continue |
|
27 d = dist.setdefault(curr, 1) |
|
28 if d > factor: |
|
29 factor *= 2 |
|
30 if d == factor: |
|
31 if curr not in always: # need this check for the early exit below |
|
32 sample.add(curr) |
|
33 if quicksamplesize and (len(sample) >= quicksamplesize): |
|
34 return |
|
35 seen.add(curr) |
|
36 for p in dag.parents(curr): |
|
37 if not nodes or p in nodes: |
|
38 dist.setdefault(p, d + 1) |
|
39 visit.append(p) |
|
40 |
|
41 def _setupsample(dag, nodes, size): |
|
42 if len(nodes) <= size: |
|
43 return set(nodes), None, 0 |
|
44 always = set(dag.heads()) |
|
45 desiredlen = size - len(always) |
|
46 if desiredlen <= 0: |
|
47 # This could be bad if there are very many heads, all unknown to the |
|
48 # server. We're counting on long request support here. |
|
49 return always, None, desiredlen |
|
50 return always, set(), desiredlen |
|
51 |
|
52 def _takequicksample(dag, nodes, size, initial): |
|
53 always, sample, desiredlen = _setupsample(dag, nodes, size) |
|
54 if sample is None: |
|
55 return always |
|
56 if initial: |
|
57 fromset = None |
|
58 else: |
|
59 fromset = nodes |
|
60 _updatesample(dag, fromset, sample, always, quicksamplesize=desiredlen) |
|
61 sample.update(always) |
|
62 return sample |
|
63 |
|
64 def _takefullsample(dag, nodes, size): |
|
65 always, sample, desiredlen = _setupsample(dag, nodes, size) |
|
66 if sample is None: |
|
67 return always |
|
68 # update from heads |
|
69 _updatesample(dag, nodes, sample, always) |
|
70 # update from roots |
|
71 _updatesample(dag.inverse(), nodes, sample, always) |
|
72 assert sample |
|
73 if len(sample) > desiredlen: |
|
74 sample = set(random.sample(sample, desiredlen)) |
|
75 elif len(sample) < desiredlen: |
|
76 more = desiredlen - len(sample) |
|
77 sample.update(random.sample(list(nodes - sample - always), more)) |
|
78 sample.update(always) |
|
79 return sample |
|
80 |
|
81 def findcommonheads(ui, local, remote, |
|
82 initialsamplesize=100, |
|
83 fullsamplesize=200, |
|
84 abortwhenunrelated=True): |
|
85 '''Return a tuple (common, anyincoming, remoteheads) used to identify missing |
|
86 nodes from or in remote. |
|
87 |
|
88 shortcutlocal determines whether we try use direct access to localrepo if |
|
89 remote is actually local. |
|
90 ''' |
|
91 roundtrips = 0 |
|
92 cl = local.changelog |
|
93 dag = dagutil.revlogdag(cl) |
|
94 nodes = dag.nodeset() |
|
95 |
|
96 # early exit if we know all the specified server heads already |
|
97 ui.debug("query 1; heads\n") |
|
98 roundtrips += 1 |
|
99 srvheadhashes = remote.heads() |
|
100 |
|
101 ## TODO We might want to request an additional random sample of the server's |
|
102 ## nodes batched with the heads query here. |
|
103 |
|
104 if cl.tip() == nullid: |
|
105 if srvheadhashes != [nullid]: |
|
106 return [nullid], True, srvheadhashes |
|
107 return [nullid], False, [] |
|
108 |
|
109 # start actual discovery (we note this before the next "if" for compatibility |
|
110 # reasons) |
|
111 ui.status(_("searching for changes\n")) |
|
112 |
|
113 srvheads = dag.internalizeall(srvheadhashes, filterunknown=True) |
|
114 if len(srvheads) == len(srvheadhashes): |
|
115 ui.note("all remote heads known locally\n") |
|
116 return (srvheadhashes, False, srvheadhashes,) |
|
117 |
|
118 # full blown discovery |
|
119 undecided = nodes # own nodes where I don't know if the server knows them |
|
120 common = set() # own nodes I know we both know |
|
121 missing = set() # own nodes I know the server lacks |
|
122 |
|
123 # treat remote heads as a first implicit sample response |
|
124 common.update(dag.ancestorset(srvheads)) |
|
125 undecided.difference_update(common) |
|
126 # use cheapish initial sample |
|
127 if common: |
|
128 ui.debug("taking initial sample\n") |
|
129 sample = _takefullsample(dag, undecided, size=fullsamplesize) |
|
130 else: |
|
131 ui.debug("taking quick initial sample\n") |
|
132 sample = _takequicksample(dag, nodes, size=initialsamplesize, |
|
133 initial=True) |
|
134 |
|
135 roundtrips += 1 |
|
136 ui.progress(_('searching'), roundtrips, unit=_('queries')) |
|
137 ui.debug("query %i; still undecided: %i, sample size is: %i\n" |
|
138 % (roundtrips, len(undecided), len(sample))) |
|
139 # indices between sample and externalized version must match |
|
140 sample = list(sample) |
|
141 yesno = remote.known(dag.externalizeall(sample)) |
|
142 |
|
143 while undecided: |
|
144 commoninsample = set(n for i, n in enumerate(sample) if yesno[i]) |
|
145 common.update(dag.ancestorset(commoninsample, common)) |
|
146 |
|
147 missinginsample = [n for i, n in enumerate(sample) if not yesno[i]] |
|
148 missing.update(dag.descendantset(missinginsample, missing)) |
|
149 |
|
150 undecided.difference_update(missing) |
|
151 undecided.difference_update(common) |
|
152 |
|
153 if not undecided: |
|
154 break |
|
155 |
|
156 ui.note("sampling from both directions\n") |
|
157 sample = _takefullsample(dag, undecided, size=fullsamplesize) |
|
158 |
|
159 roundtrips += 1 |
|
160 ui.progress(_('searching'), roundtrips, unit=_('queries')) |
|
161 ui.debug("query %i; still undecided: %i, sample size is: %i\n" |
|
162 % (roundtrips, len(undecided), len(sample))) |
|
163 # indices between sample and externalized version must match |
|
164 sample = list(sample) |
|
165 yesno = remote.known(dag.externalizeall(sample)) |
|
166 |
|
167 result = dag.headsetofconnecteds(common) |
|
168 ui.progress(_('searching'), None) |
|
169 ui.debug("%d total queries\n" % roundtrips) |
|
170 |
|
171 if not result and srvheadhashes != [nullid]: |
|
172 if abortwhenunrelated: |
|
173 raise util.Abort(_("repository is unrelated")) |
|
174 else: |
|
175 ui.warn(_("warning: repository is unrelated\n")) |
|
176 return (set([nullid]), True, srvheadhashes,) |
|
177 |
|
178 return (dag.externalizeall(result), True, srvheadhashes,) |