comparison hgext/narrow/narrowbundle2.py @ 36079:a2a6e724d61a

narrow: import experimental extension from narrowhg revision cb51d673e9c5 Adjustments: * renamed src to hgext/narrow * marked extension experimental * added correct copyright header where it was missing * updated hgrc extension enable line in library.sh * renamed library.sh to narrow-library.sh * dropped all files from repo root as they're not interesting * dropped test-pyflakes.t, test-check-code.t and test-check-py3-compat.t * renamed remaining tests to all be test-narrow-* when they didn't already * fixed test-narrow-expanddirstate.t to refer to narrow and not narrowhg * fixed tests that wanted `update -C .` instead of `merge --abort` * corrected a two-space indent in narrowspec.py * added a missing _() in narrowcommands.py * fixed imports to pass the import checker * narrow only adds its --include and --exclude to clone if sparse isn't enabled to avoid breaking test-duplicateoptions.py. This is a kludge, and we'll need to come up with a better solution in the future. These were more or less the minimum to import something that would pass tests and not create a bunch of files we'll never use. Changes I intend to make as followups: * rework the test-narrow-*-tree.t tests to use the new testcases functionality in run-tests.py * remove lots of monkeypatches of core things Differential Revision: https://phab.mercurial-scm.org/D1974
author Augie Fackler <augie@google.com>
date Mon, 29 Jan 2018 16:19:33 -0500
parents
children 48c12b440b4a
comparison
equal deleted inserted replaced
36078:7f68235f23ff 36079:a2a6e724d61a
1 # narrowbundle2.py - bundle2 extensions for narrow repository support
2 #
3 # Copyright 2017 Google, Inc.
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import collections
11 import errno
12 import struct
13
14 from mercurial.i18n import _
15 from mercurial.node import (
16 bin,
17 nullid,
18 nullrev,
19 )
20 from mercurial import (
21 bundle2,
22 changegroup,
23 dagutil,
24 error,
25 exchange,
26 extensions,
27 repair,
28 util,
29 wireproto,
30 )
31
32 from . import (
33 narrowrepo,
34 narrowspec,
35 )
36
37 narrowcap = 'narrow'
38 narrowacl_section = 'narrowhgacl'
39 changespecpart = narrowcap + ':changespec'
40 specpart = narrowcap + ':spec'
41 specpart_include = 'include'
42 specpart_exclude = 'exclude'
43 killnodesignal = 'KILL'
44 donesignal = 'DONE'
45 elidedcsheader = '>20s20s20sl' # cset id, p1, p2, len(text)
46 elidedmfheader = '>20s20s20s20sl' # manifest id, p1, p2, link id, len(text)
47 csheadersize = struct.calcsize(elidedcsheader)
48 mfheadersize = struct.calcsize(elidedmfheader)
49
50 # When advertising capabilities, always include narrow clone support.
51 def getrepocaps_narrow(orig, repo, **kwargs):
52 caps = orig(repo, **kwargs)
53 caps[narrowcap] = ['v0']
54 return caps
55
56 def _computeellipsis(repo, common, heads, known, match, depth=None):
57 """Compute the shape of a narrowed DAG.
58
59 Args:
60 repo: The repository we're transferring.
61 common: The roots of the DAG range we're transferring.
62 May be just [nullid], which means all ancestors of heads.
63 heads: The heads of the DAG range we're transferring.
64 match: The narrowmatcher that allows us to identify relevant changes.
65 depth: If not None, only consider nodes to be full nodes if they are at
66 most depth changesets away from one of heads.
67
68 Returns:
69 A tuple of (visitnodes, relevant_nodes, ellipsisroots) where:
70
71 visitnodes: The list of nodes (either full or ellipsis) which
72 need to be sent to the client.
73 relevant_nodes: The set of changelog nodes which change a file inside
74 the narrowspec. The client needs these as non-ellipsis nodes.
75 ellipsisroots: A dict of {rev: parents} that is used in
76 narrowchangegroup to produce ellipsis nodes with the
77 correct parents.
78 """
79 cl = repo.changelog
80 mfl = repo.manifestlog
81
82 cldag = dagutil.revlogdag(cl)
83 # dagutil does not like nullid/nullrev
84 commonrevs = cldag.internalizeall(common - set([nullid])) | set([nullrev])
85 headsrevs = cldag.internalizeall(heads)
86 if depth:
87 revdepth = {h: 0 for h in headsrevs}
88
89 ellipsisheads = collections.defaultdict(set)
90 ellipsisroots = collections.defaultdict(set)
91
92 def addroot(head, curchange):
93 """Add a root to an ellipsis head, splitting heads with 3 roots."""
94 ellipsisroots[head].add(curchange)
95 # Recursively split ellipsis heads with 3 roots by finding the
96 # roots' youngest common descendant which is an elided merge commit.
97 # That descendant takes 2 of the 3 roots as its own, and becomes a
98 # root of the head.
99 while len(ellipsisroots[head]) > 2:
100 child, roots = splithead(head)
101 splitroots(head, child, roots)
102 head = child # Recurse in case we just added a 3rd root
103
104 def splitroots(head, child, roots):
105 ellipsisroots[head].difference_update(roots)
106 ellipsisroots[head].add(child)
107 ellipsisroots[child].update(roots)
108 ellipsisroots[child].discard(child)
109
110 def splithead(head):
111 r1, r2, r3 = sorted(ellipsisroots[head])
112 for nr1, nr2 in ((r2, r3), (r1, r3), (r1, r2)):
113 mid = repo.revs('sort(merge() & %d::%d & %d::%d, -rev)',
114 nr1, head, nr2, head)
115 for j in mid:
116 if j == nr2:
117 return nr2, (nr1, nr2)
118 if j not in ellipsisroots or len(ellipsisroots[j]) < 2:
119 return j, (nr1, nr2)
120 raise error.Abort('Failed to split up ellipsis node! head: %d, '
121 'roots: %d %d %d' % (head, r1, r2, r3))
122
123 missing = list(cl.findmissingrevs(common=commonrevs, heads=headsrevs))
124 visit = reversed(missing)
125 relevant_nodes = set()
126 visitnodes = map(cl.node, missing)
127 required = set(headsrevs) | known
128 for rev in visit:
129 clrev = cl.changelogrevision(rev)
130 ps = cldag.parents(rev)
131 if depth is not None:
132 curdepth = revdepth[rev]
133 for p in ps:
134 revdepth[p] = min(curdepth + 1, revdepth.get(p, depth + 1))
135 needed = False
136 shallow_enough = depth is None or revdepth[rev] <= depth
137 if shallow_enough:
138 curmf = mfl[clrev.manifest].read()
139 if ps:
140 # We choose to not trust the changed files list in
141 # changesets because it's not always correct. TODO: could
142 # we trust it for the non-merge case?
143 p1mf = mfl[cl.changelogrevision(ps[0]).manifest].read()
144 needed = any(match(f) for f in curmf.diff(p1mf).iterkeys())
145 if not needed and len(ps) > 1:
146 # For merge changes, the list of changed files is not
147 # helpful, since we need to emit the merge if a file
148 # in the narrow spec has changed on either side of the
149 # merge. As a result, we do a manifest diff to check.
150 p2mf = mfl[cl.changelogrevision(ps[1]).manifest].read()
151 needed = any(match(f) for f in curmf.diff(p2mf).iterkeys())
152 else:
153 # For a root node, we need to include the node if any
154 # files in the node match the narrowspec.
155 needed = any(match(f) for f in curmf)
156
157 if needed:
158 for head in ellipsisheads[rev]:
159 addroot(head, rev)
160 for p in ps:
161 required.add(p)
162 relevant_nodes.add(cl.node(rev))
163 else:
164 if not ps:
165 ps = [nullrev]
166 if rev in required:
167 for head in ellipsisheads[rev]:
168 addroot(head, rev)
169 for p in ps:
170 ellipsisheads[p].add(rev)
171 else:
172 for p in ps:
173 ellipsisheads[p] |= ellipsisheads[rev]
174
175 # add common changesets as roots of their reachable ellipsis heads
176 for c in commonrevs:
177 for head in ellipsisheads[c]:
178 addroot(head, c)
179 return visitnodes, relevant_nodes, ellipsisroots
180
181 def _packellipsischangegroup(repo, common, match, relevant_nodes,
182 ellipsisroots, visitnodes, depth, source, version):
183 if version in ('01', '02'):
184 raise error.Abort(
185 'ellipsis nodes require at least cg3 on client and server, '
186 'but negotiated version %s' % version)
187 # We wrap cg1packer.revchunk, using a side channel to pass
188 # relevant_nodes into that area. Then if linknode isn't in the
189 # set, we know we have an ellipsis node and we should defer
190 # sending that node's data. We override close() to detect
191 # pending ellipsis nodes and flush them.
192 packer = changegroup.getbundler(version, repo)
193 # Let the packer have access to the narrow matcher so it can
194 # omit filelogs and dirlogs as needed
195 packer._narrow_matcher = lambda : match
196 # Give the packer the list of nodes which should not be
197 # ellipsis nodes. We store this rather than the set of nodes
198 # that should be an ellipsis because for very large histories
199 # we expect this to be significantly smaller.
200 packer.full_nodes = relevant_nodes
201 # Maps ellipsis revs to their roots at the changelog level.
202 packer.precomputed_ellipsis = ellipsisroots
203 # Maps CL revs to per-revlog revisions. Cleared in close() at
204 # the end of each group.
205 packer.clrev_to_localrev = {}
206 packer.next_clrev_to_localrev = {}
207 # Maps changelog nodes to changelog revs. Filled in once
208 # during changelog stage and then left unmodified.
209 packer.clnode_to_rev = {}
210 packer.changelog_done = False
211 # If true, informs the packer that it is serving shallow content and might
212 # need to pack file contents not introduced by the changes being packed.
213 packer.is_shallow = depth is not None
214
215 return packer.generate(common, visitnodes, False, source)
216
217 # Serve a changegroup for a client with a narrow clone.
218 def getbundlechangegrouppart_narrow(bundler, repo, source,
219 bundlecaps=None, b2caps=None, heads=None,
220 common=None, **kwargs):
221 cgversions = b2caps.get('changegroup')
222 getcgkwargs = {}
223 if cgversions: # 3.1 and 3.2 ship with an empty value
224 cgversions = [v for v in cgversions
225 if v in changegroup.supportedoutgoingversions(repo)]
226 if not cgversions:
227 raise ValueError(_('no common changegroup version'))
228 version = getcgkwargs['version'] = max(cgversions)
229 else:
230 raise ValueError(_("server does not advertise changegroup version,"
231 " can't negotiate support for ellipsis nodes"))
232
233 include = sorted(filter(bool, kwargs.get('includepats', [])))
234 exclude = sorted(filter(bool, kwargs.get('excludepats', [])))
235 newmatch = narrowspec.match(repo.root, include=include, exclude=exclude)
236 if not repo.ui.configbool("experimental", "narrowservebrokenellipses"):
237 outgoing = exchange._computeoutgoing(repo, heads, common)
238 if not outgoing.missing:
239 return
240 if util.safehasattr(changegroup, 'getsubsetraw'):
241 # getsubsetraw was replaced with makestream in hg in 92f1e2be8ab6
242 # (2017/09/10).
243 packer = changegroup.getbundler(version, repo)
244 packer._narrow_matcher = lambda : newmatch
245 cg = changegroup.getsubsetraw(repo, outgoing, packer, source)
246 else:
247 def wrappedgetbundler(orig, *args, **kwargs):
248 bundler = orig(*args, **kwargs)
249 bundler._narrow_matcher = lambda : newmatch
250 return bundler
251 with extensions.wrappedfunction(changegroup, 'getbundler',
252 wrappedgetbundler):
253 cg = changegroup.makestream(repo, outgoing, version, source)
254 part = bundler.newpart('changegroup', data=cg)
255 part.addparam('version', version)
256 if 'treemanifest' in repo.requirements:
257 part.addparam('treemanifest', '1')
258
259 if include or exclude:
260 narrowspecpart = bundler.newpart(specpart)
261 if include:
262 narrowspecpart.addparam(
263 specpart_include, '\n'.join(include), mandatory=True)
264 if exclude:
265 narrowspecpart.addparam(
266 specpart_exclude, '\n'.join(exclude), mandatory=True)
267
268 return
269
270 depth = kwargs.get('depth', None)
271 if depth is not None:
272 depth = int(depth)
273 if depth < 1:
274 raise error.Abort(_('depth must be positive, got %d') % depth)
275
276 heads = set(heads or repo.heads())
277 common = set(common or [nullid])
278 oldinclude = sorted(filter(bool, kwargs.get('oldincludepats', [])))
279 oldexclude = sorted(filter(bool, kwargs.get('oldexcludepats', [])))
280 known = {bin(n) for n in kwargs.get('known', [])}
281 if known and (oldinclude != include or oldexclude != exclude):
282 # Steps:
283 # 1. Send kill for "$known & ::common"
284 #
285 # 2. Send changegroup for ::common
286 #
287 # 3. Proceed.
288 #
289 # In the future, we can send kills for only the specific
290 # nodes we know should go away or change shape, and then
291 # send a data stream that tells the client something like this:
292 #
293 # a) apply this changegroup
294 # b) apply nodes XXX, YYY, ZZZ that you already have
295 # c) goto a
296 #
297 # until they've built up the full new state.
298 # Convert to revnums and intersect with "common". The client should
299 # have made it a subset of "common" already, but let's be safe.
300 known = set(repo.revs("%ln & ::%ln", known, common))
301 # TODO: we could send only roots() of this set, and the
302 # list of nodes in common, and the client could work out
303 # what to strip, instead of us explicitly sending every
304 # single node.
305 deadrevs = known
306 def genkills():
307 for r in deadrevs:
308 yield killnodesignal
309 yield repo.changelog.node(r)
310 yield donesignal
311 bundler.newpart(changespecpart, data=genkills())
312 newvisit, newfull, newellipsis = _computeellipsis(
313 repo, set(), common, known, newmatch)
314 if newvisit:
315 cg = _packellipsischangegroup(
316 repo, common, newmatch, newfull, newellipsis,
317 newvisit, depth, source, version)
318 part = bundler.newpart('changegroup', data=cg)
319 part.addparam('version', version)
320 if 'treemanifest' in repo.requirements:
321 part.addparam('treemanifest', '1')
322
323 visitnodes, relevant_nodes, ellipsisroots = _computeellipsis(
324 repo, common, heads, set(), newmatch, depth=depth)
325
326 repo.ui.debug('Found %d relevant revs\n' % len(relevant_nodes))
327 if visitnodes:
328 cg = _packellipsischangegroup(
329 repo, common, newmatch, relevant_nodes, ellipsisroots,
330 visitnodes, depth, source, version)
331 part = bundler.newpart('changegroup', data=cg)
332 part.addparam('version', version)
333 if 'treemanifest' in repo.requirements:
334 part.addparam('treemanifest', '1')
335
336 def applyacl_narrow(repo, kwargs):
337 username = repo.ui.shortuser(repo.ui.username())
338 user_includes = repo.ui.configlist(
339 narrowacl_section, username + '.includes',
340 repo.ui.configlist(narrowacl_section, 'default.includes'))
341 user_excludes = repo.ui.configlist(
342 narrowacl_section, username + '.excludes',
343 repo.ui.configlist(narrowacl_section, 'default.excludes'))
344 if not user_includes:
345 raise error.Abort(_("{} configuration for user {} is empty")
346 .format(narrowacl_section, username))
347
348 user_includes = [
349 'path:.' if p == '*' else 'path:' + p for p in user_includes]
350 user_excludes = [
351 'path:.' if p == '*' else 'path:' + p for p in user_excludes]
352
353 req_includes = set(kwargs.get('includepats', []))
354 req_excludes = set(kwargs.get('excludepats', []))
355
356 invalid_includes = []
357 req_includes, req_excludes = narrowspec.restrictpatterns(
358 req_includes, req_excludes,
359 user_includes, user_excludes, invalid_includes)
360
361 if invalid_includes:
362 raise error.Abort(
363 _("The following includes are not accessible for {}: {}")
364 .format(username, invalid_includes))
365
366 new_args = {}
367 new_args.update(kwargs)
368 new_args['includepats'] = req_includes
369 if req_excludes:
370 new_args['excludepats'] = req_excludes
371 return new_args
372
373 @bundle2.parthandler(specpart, (specpart_include, specpart_exclude))
374 def _handlechangespec_2(op, inpart):
375 includepats = set(inpart.params.get(specpart_include, '').splitlines())
376 excludepats = set(inpart.params.get(specpart_exclude, '').splitlines())
377 narrowspec.save(op.repo, includepats, excludepats)
378 if not narrowrepo.requirement in op.repo.requirements:
379 op.repo.requirements.add(narrowrepo.requirement)
380 op.repo._writerequirements()
381 op.repo.invalidate(clearfilecache=True)
382
383 @bundle2.parthandler(changespecpart)
384 def _handlechangespec(op, inpart):
385 repo = op.repo
386 cl = repo.changelog
387
388 # changesets which need to be stripped entirely. either they're no longer
389 # needed in the new narrow spec, or the server is sending a replacement
390 # in the changegroup part.
391 clkills = set()
392
393 # A changespec part contains all the updates to ellipsis nodes
394 # that will happen as a result of widening or narrowing a
395 # repo. All the changes that this block encounters are ellipsis
396 # nodes or flags to kill an existing ellipsis.
397 chunksignal = changegroup.readexactly(inpart, 4)
398 while chunksignal != donesignal:
399 if chunksignal == killnodesignal:
400 # a node used to be an ellipsis but isn't anymore
401 ck = changegroup.readexactly(inpart, 20)
402 if cl.hasnode(ck):
403 clkills.add(ck)
404 else:
405 raise error.Abort(
406 _('unexpected changespec node chunk type: %s') % chunksignal)
407 chunksignal = changegroup.readexactly(inpart, 4)
408
409 if clkills:
410 # preserve bookmarks that repair.strip() would otherwise strip
411 bmstore = repo._bookmarks
412 class dummybmstore(dict):
413 def applychanges(self, repo, tr, changes):
414 pass
415 def recordchange(self, tr): # legacy version
416 pass
417 repo._bookmarks = dummybmstore()
418 chgrpfile = repair.strip(op.ui, repo, list(clkills), backup=True,
419 topic='widen')
420 repo._bookmarks = bmstore
421 if chgrpfile:
422 # presence of _widen_bundle attribute activates widen handler later
423 op._widen_bundle = chgrpfile
424 # Set the new narrowspec if we're widening. The setnewnarrowpats() method
425 # will currently always be there when using the core+narrowhg server, but
426 # other servers may include a changespec part even when not widening (e.g.
427 # because we're deepening a shallow repo).
428 if util.safehasattr(repo, 'setnewnarrowpats'):
429 repo.setnewnarrowpats()
430
431 def handlechangegroup_widen(op, inpart):
432 """Changegroup exchange handler which restores temporarily-stripped nodes"""
433 # We saved a bundle with stripped node data we must now restore.
434 # This approach is based on mercurial/repair.py@6ee26a53c111.
435 repo = op.repo
436 ui = op.ui
437
438 chgrpfile = op._widen_bundle
439 del op._widen_bundle
440 vfs = repo.vfs
441
442 ui.note(_("adding branch\n"))
443 f = vfs.open(chgrpfile, "rb")
444 try:
445 gen = exchange.readbundle(ui, f, chgrpfile, vfs)
446 if not ui.verbose:
447 # silence internal shuffling chatter
448 ui.pushbuffer()
449 if isinstance(gen, bundle2.unbundle20):
450 with repo.transaction('strip') as tr:
451 bundle2.processbundle(repo, gen, lambda: tr)
452 else:
453 gen.apply(repo, 'strip', 'bundle:' + vfs.join(chgrpfile), True)
454 if not ui.verbose:
455 ui.popbuffer()
456 finally:
457 f.close()
458
459 # remove undo files
460 for undovfs, undofile in repo.undofiles():
461 try:
462 undovfs.unlink(undofile)
463 except OSError as e:
464 if e.errno != errno.ENOENT:
465 ui.warn(_('error removing %s: %s\n') %
466 (undovfs.join(undofile), str(e)))
467
468 # Remove partial backup only if there were no exceptions
469 vfs.unlink(chgrpfile)
470
471 def setup():
472 """Enable narrow repo support in bundle2-related extension points."""
473 extensions.wrapfunction(bundle2, 'getrepocaps', getrepocaps_narrow)
474
475 wireproto.gboptsmap['narrow'] = 'boolean'
476 wireproto.gboptsmap['depth'] = 'plain'
477 wireproto.gboptsmap['oldincludepats'] = 'csv'
478 wireproto.gboptsmap['oldexcludepats'] = 'csv'
479 wireproto.gboptsmap['includepats'] = 'csv'
480 wireproto.gboptsmap['excludepats'] = 'csv'
481 wireproto.gboptsmap['known'] = 'csv'
482
483 # Extend changegroup serving to handle requests from narrow clients.
484 origcgfn = exchange.getbundle2partsmapping['changegroup']
485 def wrappedcgfn(*args, **kwargs):
486 repo = args[1]
487 if repo.ui.has_section(narrowacl_section):
488 getbundlechangegrouppart_narrow(
489 *args, **applyacl_narrow(repo, kwargs))
490 elif kwargs.get('narrow', False):
491 getbundlechangegrouppart_narrow(*args, **kwargs)
492 else:
493 origcgfn(*args, **kwargs)
494 exchange.getbundle2partsmapping['changegroup'] = wrappedcgfn
495
496 # Extend changegroup receiver so client can fixup after widen requests.
497 origcghandler = bundle2.parthandlermapping['changegroup']
498 def wrappedcghandler(op, inpart):
499 origcghandler(op, inpart)
500 if util.safehasattr(op, '_widen_bundle'):
501 handlechangegroup_widen(op, inpart)
502 wrappedcghandler.params = origcghandler.params
503 bundle2.parthandlermapping['changegroup'] = wrappedcghandler