comparison mercurial/treediscovery.py @ 14164:cb98fed52495

discovery: add new set-based discovery Adds a new discovery method based on repeatedly sampling the still undecided subset of the local node graph to determine the set of nodes common to both the client and the server. For small differences between client and server, it uses about the same or slightly fewer roundtrips than the old tree-based discovery. For larger differences, it typically reduces the number of roundtrips drastically (from 150 to 4, for instance). The old discovery code now lives in treediscovery.py, the new code is in setdiscovery.py. Still missing is a hook for extensions to contribute nodes to the initial sample. For instance, Augie's remotebranches could contribute the last known state of the server's heads. Credits for the actual sampler and computing common heads instead of bases go to Benoit Boissinot.
author Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
date Mon, 02 May 2011 19:21:30 +0200
parents mercurial/discovery.py@72c84f24b420
children e3dd3dcd6059
comparison
equal deleted inserted replaced
14163:38184a72d793 14164:cb98fed52495
1 # discovery.py - protocol changeset discovery functions
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from node import nullid, short
9 from i18n import _
10 import util, error
11
12 def findcommonincoming(repo, remote, heads=None, force=False):
13 """Return a tuple (common, fetch, heads) used to identify the common
14 subset of nodes between repo and remote.
15
16 "common" is a list of (at least) the heads of the common subset.
17 "fetch" is a list of roots of the nodes that would be incoming, to be
18 supplied to changegroupsubset.
19 "heads" is either the supplied heads, or else the remote's heads.
20 """
21
22 m = repo.changelog.nodemap
23 search = []
24 fetch = set()
25 seen = set()
26 seenbranch = set()
27 base = set()
28
29 if not heads:
30 heads = remote.heads()
31
32 if repo.changelog.tip() == nullid:
33 base.add(nullid)
34 if heads != [nullid]:
35 return [nullid], [nullid], list(heads)
36 return [nullid], [], []
37
38 # assume we're closer to the tip than the root
39 # and start by examining the heads
40 repo.ui.status(_("searching for changes\n"))
41
42 unknown = []
43 for h in heads:
44 if h not in m:
45 unknown.append(h)
46 else:
47 base.add(h)
48
49 heads = unknown
50 if not unknown:
51 return list(base), [], []
52
53 req = set(unknown)
54 reqcnt = 0
55
56 # search through remote branches
57 # a 'branch' here is a linear segment of history, with four parts:
58 # head, root, first parent, second parent
59 # (a branch always has two parents (or none) by definition)
60 unknown = remote.branches(unknown)
61 while unknown:
62 r = []
63 while unknown:
64 n = unknown.pop(0)
65 if n[0] in seen:
66 continue
67
68 repo.ui.debug("examining %s:%s\n"
69 % (short(n[0]), short(n[1])))
70 if n[0] == nullid: # found the end of the branch
71 pass
72 elif n in seenbranch:
73 repo.ui.debug("branch already found\n")
74 continue
75 elif n[1] and n[1] in m: # do we know the base?
76 repo.ui.debug("found incomplete branch %s:%s\n"
77 % (short(n[0]), short(n[1])))
78 search.append(n[0:2]) # schedule branch range for scanning
79 seenbranch.add(n)
80 else:
81 if n[1] not in seen and n[1] not in fetch:
82 if n[2] in m and n[3] in m:
83 repo.ui.debug("found new changeset %s\n" %
84 short(n[1]))
85 fetch.add(n[1]) # earliest unknown
86 for p in n[2:4]:
87 if p in m:
88 base.add(p) # latest known
89
90 for p in n[2:4]:
91 if p not in req and p not in m:
92 r.append(p)
93 req.add(p)
94 seen.add(n[0])
95
96 if r:
97 reqcnt += 1
98 repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
99 repo.ui.debug("request %d: %s\n" %
100 (reqcnt, " ".join(map(short, r))))
101 for p in xrange(0, len(r), 10):
102 for b in remote.branches(r[p:p + 10]):
103 repo.ui.debug("received %s:%s\n" %
104 (short(b[0]), short(b[1])))
105 unknown.append(b)
106
107 # do binary search on the branches we found
108 while search:
109 newsearch = []
110 reqcnt += 1
111 repo.ui.progress(_('searching'), reqcnt, unit=_('queries'))
112 for n, l in zip(search, remote.between(search)):
113 l.append(n[1])
114 p = n[0]
115 f = 1
116 for i in l:
117 repo.ui.debug("narrowing %d:%d %s\n" % (f, len(l), short(i)))
118 if i in m:
119 if f <= 2:
120 repo.ui.debug("found new branch changeset %s\n" %
121 short(p))
122 fetch.add(p)
123 base.add(i)
124 else:
125 repo.ui.debug("narrowed branch search to %s:%s\n"
126 % (short(p), short(i)))
127 newsearch.append((p, i))
128 break
129 p, f = i, f * 2
130 search = newsearch
131
132 # sanity check our fetch list
133 for f in fetch:
134 if f in m:
135 raise error.RepoError(_("already have changeset ")
136 + short(f[:4]))
137
138 base = list(base)
139 if base == [nullid]:
140 if force:
141 repo.ui.warn(_("warning: repository is unrelated\n"))
142 else:
143 raise util.Abort(_("repository is unrelated"))
144
145 repo.ui.debug("found new changesets starting at " +
146 " ".join([short(f) for f in fetch]) + "\n")
147
148 repo.ui.progress(_('searching'), None)
149 repo.ui.debug("%d total queries\n" % reqcnt)
150
151 return base, list(fetch), heads