diff hgext/narrow/narrowbundle2.py @ 36079:a2a6e724d61a

narrow: import experimental extension from narrowhg revision cb51d673e9c5 Adjustments: * renamed src to hgext/narrow * marked extension experimental * added correct copyright header where it was missing * updated hgrc extension enable line in library.sh * renamed library.sh to narrow-library.sh * dropped all files from repo root as they're not interesting * dropped test-pyflakes.t, test-check-code.t and test-check-py3-compat.t * renamed remaining tests to all be test-narrow-* when they didn't already * fixed test-narrow-expanddirstate.t to refer to narrow and not narrowhg * fixed tests that wanted `update -C .` instead of `merge --abort` * corrected a two-space indent in narrowspec.py * added a missing _() in narrowcommands.py * fixed imports to pass the import checker * narrow only adds its --include and --exclude to clone if sparse isn't enabled to avoid breaking test-duplicateoptions.py. This is a kludge, and we'll need to come up with a better solution in the future. These were more or less the minimum to import something that would pass tests and not create a bunch of files we'll never use. Changes I intend to make as followups: * rework the test-narrow-*-tree.t tests to use the new testcases functionality in run-tests.py * remove lots of monkeypatches of core things Differential Revision: https://phab.mercurial-scm.org/D1974
author Augie Fackler <augie@google.com>
date Mon, 29 Jan 2018 16:19:33 -0500
parents
children 48c12b440b4a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/narrow/narrowbundle2.py	Mon Jan 29 16:19:33 2018 -0500
@@ -0,0 +1,503 @@
+# narrowbundle2.py - bundle2 extensions for narrow repository support
+#
+# Copyright 2017 Google, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import collections
+import errno
+import struct
+
+from mercurial.i18n import _
+from mercurial.node import (
+    bin,
+    nullid,
+    nullrev,
+)
+from mercurial import (
+    bundle2,
+    changegroup,
+    dagutil,
+    error,
+    exchange,
+    extensions,
+    repair,
+    util,
+    wireproto,
+)
+
+from . import (
+    narrowrepo,
+    narrowspec,
+)
+
+narrowcap = 'narrow'
+narrowacl_section = 'narrowhgacl'
+changespecpart = narrowcap + ':changespec'
+specpart = narrowcap + ':spec'
+specpart_include = 'include'
+specpart_exclude = 'exclude'
+killnodesignal = 'KILL'
+donesignal = 'DONE'
+elidedcsheader = '>20s20s20sl' # cset id, p1, p2, len(text)
+elidedmfheader = '>20s20s20s20sl' # manifest id, p1, p2, link id, len(text)
+csheadersize = struct.calcsize(elidedcsheader)
+mfheadersize = struct.calcsize(elidedmfheader)
+
+# When advertising capabilities, always include narrow clone support.
+def getrepocaps_narrow(orig, repo, **kwargs):
+    caps = orig(repo, **kwargs)
+    caps[narrowcap] = ['v0']
+    return caps
+
+def _computeellipsis(repo, common, heads, known, match, depth=None):
+    """Compute the shape of a narrowed DAG.
+
+    Args:
+      repo: The repository we're transferring.
+      common: The roots of the DAG range we're transferring.
+              May be just [nullid], which means all ancestors of heads.
+      heads: The heads of the DAG range we're transferring.
+      match: The narrowmatcher that allows us to identify relevant changes.
+      depth: If not None, only consider nodes to be full nodes if they are at
+             most depth changesets away from one of heads.
+
+    Returns:
+      A tuple of (visitnodes, relevant_nodes, ellipsisroots) where:
+
+        visitnodes: The list of nodes (either full or ellipsis) which
+                    need to be sent to the client.
+        relevant_nodes: The set of changelog nodes which change a file inside
+                 the narrowspec. The client needs these as non-ellipsis nodes.
+        ellipsisroots: A dict of {rev: parents} that is used in
+                       narrowchangegroup to produce ellipsis nodes with the
+                       correct parents.
+    """
+    cl = repo.changelog
+    mfl = repo.manifestlog
+
+    cldag = dagutil.revlogdag(cl)
+    # dagutil does not like nullid/nullrev
+    commonrevs = cldag.internalizeall(common - set([nullid])) | set([nullrev])
+    headsrevs = cldag.internalizeall(heads)
+    if depth:
+        revdepth = {h: 0 for h in headsrevs}
+
+    ellipsisheads = collections.defaultdict(set)
+    ellipsisroots = collections.defaultdict(set)
+
+    def addroot(head, curchange):
+        """Add a root to an ellipsis head, splitting heads with 3 roots."""
+        ellipsisroots[head].add(curchange)
+        # Recursively split ellipsis heads with 3 roots by finding the
+        # roots' youngest common descendant which is an elided merge commit.
+        # That descendant takes 2 of the 3 roots as its own, and becomes a
+        # root of the head.
+        while len(ellipsisroots[head]) > 2:
+            child, roots = splithead(head)
+            splitroots(head, child, roots)
+            head = child  # Recurse in case we just added a 3rd root
+
+    def splitroots(head, child, roots):
+        ellipsisroots[head].difference_update(roots)
+        ellipsisroots[head].add(child)
+        ellipsisroots[child].update(roots)
+        ellipsisroots[child].discard(child)
+
+    def splithead(head):
+        r1, r2, r3 = sorted(ellipsisroots[head])
+        for nr1, nr2 in ((r2, r3), (r1, r3), (r1, r2)):
+            mid = repo.revs('sort(merge() & %d::%d & %d::%d, -rev)',
+                            nr1, head, nr2, head)
+            for j in mid:
+                if j == nr2:
+                    return nr2, (nr1, nr2)
+                if j not in ellipsisroots or len(ellipsisroots[j]) < 2:
+                    return j, (nr1, nr2)
+        raise error.Abort('Failed to split up ellipsis node! head: %d, '
+                          'roots: %d %d %d' % (head, r1, r2, r3))
+
+    missing = list(cl.findmissingrevs(common=commonrevs, heads=headsrevs))
+    visit = reversed(missing)
+    relevant_nodes = set()
+    visitnodes = map(cl.node, missing)
+    required = set(headsrevs) | known
+    for rev in visit:
+        clrev = cl.changelogrevision(rev)
+        ps = cldag.parents(rev)
+        if depth is not None:
+            curdepth = revdepth[rev]
+            for p in ps:
+                revdepth[p] = min(curdepth + 1, revdepth.get(p, depth + 1))
+        needed = False
+        shallow_enough = depth is None or revdepth[rev] <= depth
+        if shallow_enough:
+            curmf = mfl[clrev.manifest].read()
+            if ps:
+                # We choose to not trust the changed files list in
+                # changesets because it's not always correct. TODO: could
+                # we trust it for the non-merge case?
+                p1mf = mfl[cl.changelogrevision(ps[0]).manifest].read()
+                needed = any(match(f) for f in curmf.diff(p1mf).iterkeys())
+                if not needed and len(ps) > 1:
+                    # For merge changes, the list of changed files is not
+                    # helpful, since we need to emit the merge if a file
+                    # in the narrow spec has changed on either side of the
+                    # merge. As a result, we do a manifest diff to check.
+                    p2mf = mfl[cl.changelogrevision(ps[1]).manifest].read()
+                    needed = any(match(f) for f in curmf.diff(p2mf).iterkeys())
+            else:
+                # For a root node, we need to include the node if any
+                # files in the node match the narrowspec.
+                needed = any(match(f) for f in curmf)
+
+        if needed:
+            for head in ellipsisheads[rev]:
+                addroot(head, rev)
+            for p in ps:
+                required.add(p)
+            relevant_nodes.add(cl.node(rev))
+        else:
+            if not ps:
+                ps = [nullrev]
+            if rev in required:
+                for head in ellipsisheads[rev]:
+                    addroot(head, rev)
+                for p in ps:
+                    ellipsisheads[p].add(rev)
+            else:
+                for p in ps:
+                    ellipsisheads[p] |= ellipsisheads[rev]
+
+    # add common changesets as roots of their reachable ellipsis heads
+    for c in commonrevs:
+        for head in ellipsisheads[c]:
+            addroot(head, c)
+    return visitnodes, relevant_nodes, ellipsisroots
+
+def _packellipsischangegroup(repo, common, match, relevant_nodes,
+                             ellipsisroots, visitnodes, depth, source, version):
+    if version in ('01', '02'):
+        raise error.Abort(
+            'ellipsis nodes require at least cg3 on client and server, '
+            'but negotiated version %s' % version)
+    # We wrap cg1packer.revchunk, using a side channel to pass
+    # relevant_nodes into that area. Then if linknode isn't in the
+    # set, we know we have an ellipsis node and we should defer
+    # sending that node's data. We override close() to detect
+    # pending ellipsis nodes and flush them.
+    packer = changegroup.getbundler(version, repo)
+    # Let the packer have access to the narrow matcher so it can
+    # omit filelogs and dirlogs as needed
+    packer._narrow_matcher = lambda : match
+    # Give the packer the list of nodes which should not be
+    # ellipsis nodes. We store this rather than the set of nodes
+    # that should be an ellipsis because for very large histories
+    # we expect this to be significantly smaller.
+    packer.full_nodes = relevant_nodes
+    # Maps ellipsis revs to their roots at the changelog level.
+    packer.precomputed_ellipsis = ellipsisroots
+    # Maps CL revs to per-revlog revisions. Cleared in close() at
+    # the end of each group.
+    packer.clrev_to_localrev = {}
+    packer.next_clrev_to_localrev = {}
+    # Maps changelog nodes to changelog revs. Filled in once
+    # during changelog stage and then left unmodified.
+    packer.clnode_to_rev = {}
+    packer.changelog_done = False
+    # If true, informs the packer that it is serving shallow content and might
+    # need to pack file contents not introduced by the changes being packed.
+    packer.is_shallow = depth is not None
+
+    return packer.generate(common, visitnodes, False, source)
+
+# Serve a changegroup for a client with a narrow clone.
+def getbundlechangegrouppart_narrow(bundler, repo, source,
+                                    bundlecaps=None, b2caps=None, heads=None,
+                                    common=None, **kwargs):
+    cgversions = b2caps.get('changegroup')
+    getcgkwargs = {}
+    if cgversions:  # 3.1 and 3.2 ship with an empty value
+        cgversions = [v for v in cgversions
+                      if v in changegroup.supportedoutgoingversions(repo)]
+        if not cgversions:
+            raise ValueError(_('no common changegroup version'))
+        version = getcgkwargs['version'] = max(cgversions)
+    else:
+        raise ValueError(_("server does not advertise changegroup version,"
+                           " can't negotiate support for ellipsis nodes"))
+
+    include = sorted(filter(bool, kwargs.get('includepats', [])))
+    exclude = sorted(filter(bool, kwargs.get('excludepats', [])))
+    newmatch = narrowspec.match(repo.root, include=include, exclude=exclude)
+    if not repo.ui.configbool("experimental", "narrowservebrokenellipses"):
+        outgoing = exchange._computeoutgoing(repo, heads, common)
+        if not outgoing.missing:
+            return
+        if util.safehasattr(changegroup, 'getsubsetraw'):
+            # getsubsetraw was replaced with makestream in hg in 92f1e2be8ab6
+            # (2017/09/10).
+            packer = changegroup.getbundler(version, repo)
+            packer._narrow_matcher = lambda : newmatch
+            cg = changegroup.getsubsetraw(repo, outgoing, packer, source)
+        else:
+            def wrappedgetbundler(orig, *args, **kwargs):
+                bundler = orig(*args, **kwargs)
+                bundler._narrow_matcher = lambda : newmatch
+                return bundler
+            with extensions.wrappedfunction(changegroup, 'getbundler',
+                                            wrappedgetbundler):
+                cg = changegroup.makestream(repo, outgoing, version, source)
+        part = bundler.newpart('changegroup', data=cg)
+        part.addparam('version', version)
+        if 'treemanifest' in repo.requirements:
+            part.addparam('treemanifest', '1')
+
+        if include or exclude:
+            narrowspecpart = bundler.newpart(specpart)
+            if include:
+                narrowspecpart.addparam(
+                    specpart_include, '\n'.join(include), mandatory=True)
+            if exclude:
+                narrowspecpart.addparam(
+                    specpart_exclude, '\n'.join(exclude), mandatory=True)
+
+        return
+
+    depth = kwargs.get('depth', None)
+    if depth is not None:
+        depth = int(depth)
+        if depth < 1:
+            raise error.Abort(_('depth must be positive, got %d') % depth)
+
+    heads = set(heads or repo.heads())
+    common = set(common or [nullid])
+    oldinclude = sorted(filter(bool, kwargs.get('oldincludepats', [])))
+    oldexclude = sorted(filter(bool, kwargs.get('oldexcludepats', [])))
+    known = {bin(n) for n in kwargs.get('known', [])}
+    if known and (oldinclude != include or oldexclude != exclude):
+        # Steps:
+        # 1. Send kill for "$known & ::common"
+        #
+        # 2. Send changegroup for ::common
+        #
+        # 3. Proceed.
+        #
+        # In the future, we can send kills for only the specific
+        # nodes we know should go away or change shape, and then
+        # send a data stream that tells the client something like this:
+        #
+        # a) apply this changegroup
+        # b) apply nodes XXX, YYY, ZZZ that you already have
+        # c) goto a
+        #
+        # until they've built up the full new state.
+        # Convert to revnums and intersect with "common". The client should
+        # have made it a subset of "common" already, but let's be safe.
+        known = set(repo.revs("%ln & ::%ln", known, common))
+        # TODO: we could send only roots() of this set, and the
+        # list of nodes in common, and the client could work out
+        # what to strip, instead of us explicitly sending every
+        # single node.
+        deadrevs = known
+        def genkills():
+            for r in deadrevs:
+                yield killnodesignal
+                yield repo.changelog.node(r)
+            yield donesignal
+        bundler.newpart(changespecpart, data=genkills())
+        newvisit, newfull, newellipsis = _computeellipsis(
+            repo, set(), common, known, newmatch)
+        if newvisit:
+            cg = _packellipsischangegroup(
+                repo, common, newmatch, newfull, newellipsis,
+                newvisit, depth, source, version)
+            part = bundler.newpart('changegroup', data=cg)
+            part.addparam('version', version)
+            if 'treemanifest' in repo.requirements:
+                part.addparam('treemanifest', '1')
+
+    visitnodes, relevant_nodes, ellipsisroots = _computeellipsis(
+        repo, common, heads, set(), newmatch, depth=depth)
+
+    repo.ui.debug('Found %d relevant revs\n' % len(relevant_nodes))
+    if visitnodes:
+        cg = _packellipsischangegroup(
+            repo, common, newmatch, relevant_nodes, ellipsisroots,
+            visitnodes, depth, source, version)
+        part = bundler.newpart('changegroup', data=cg)
+        part.addparam('version', version)
+        if 'treemanifest' in repo.requirements:
+            part.addparam('treemanifest', '1')
+
+def applyacl_narrow(repo, kwargs):
+    username = repo.ui.shortuser(repo.ui.username())
+    user_includes = repo.ui.configlist(
+        narrowacl_section, username + '.includes',
+        repo.ui.configlist(narrowacl_section, 'default.includes'))
+    user_excludes = repo.ui.configlist(
+        narrowacl_section, username + '.excludes',
+        repo.ui.configlist(narrowacl_section, 'default.excludes'))
+    if not user_includes:
+        raise error.Abort(_("{} configuration for user {} is empty")
+                          .format(narrowacl_section, username))
+
+    user_includes = [
+        'path:.' if p == '*' else 'path:' + p for p in user_includes]
+    user_excludes = [
+        'path:.' if p == '*' else 'path:' + p for p in user_excludes]
+
+    req_includes = set(kwargs.get('includepats', []))
+    req_excludes = set(kwargs.get('excludepats', []))
+
+    invalid_includes = []
+    req_includes, req_excludes = narrowspec.restrictpatterns(
+        req_includes, req_excludes,
+        user_includes, user_excludes, invalid_includes)
+
+    if invalid_includes:
+        raise error.Abort(
+            _("The following includes are not accessible for {}: {}")
+            .format(username, invalid_includes))
+
+    new_args = {}
+    new_args.update(kwargs)
+    new_args['includepats'] = req_includes
+    if req_excludes:
+        new_args['excludepats'] = req_excludes
+    return new_args
+
+@bundle2.parthandler(specpart, (specpart_include, specpart_exclude))
+def _handlechangespec_2(op, inpart):
+    includepats = set(inpart.params.get(specpart_include, '').splitlines())
+    excludepats = set(inpart.params.get(specpart_exclude, '').splitlines())
+    narrowspec.save(op.repo, includepats, excludepats)
+    if not narrowrepo.requirement in op.repo.requirements:
+        op.repo.requirements.add(narrowrepo.requirement)
+        op.repo._writerequirements()
+    op.repo.invalidate(clearfilecache=True)
+
+@bundle2.parthandler(changespecpart)
+def _handlechangespec(op, inpart):
+    repo = op.repo
+    cl = repo.changelog
+
+    # changesets which need to be stripped entirely. either they're no longer
+    # needed in the new narrow spec, or the server is sending a replacement
+    # in the changegroup part.
+    clkills = set()
+
+    # A changespec part contains all the updates to ellipsis nodes
+    # that will happen as a result of widening or narrowing a
+    # repo. All the changes that this block encounters are ellipsis
+    # nodes or flags to kill an existing ellipsis.
+    chunksignal = changegroup.readexactly(inpart, 4)
+    while chunksignal != donesignal:
+        if chunksignal == killnodesignal:
+            # a node used to be an ellipsis but isn't anymore
+            ck = changegroup.readexactly(inpart, 20)
+            if cl.hasnode(ck):
+                clkills.add(ck)
+        else:
+            raise error.Abort(
+                _('unexpected changespec node chunk type: %s') % chunksignal)
+        chunksignal = changegroup.readexactly(inpart, 4)
+
+    if clkills:
+        # preserve bookmarks that repair.strip() would otherwise strip
+        bmstore = repo._bookmarks
+        class dummybmstore(dict):
+            def applychanges(self, repo, tr, changes):
+                pass
+            def recordchange(self, tr): # legacy version
+                pass
+        repo._bookmarks = dummybmstore()
+        chgrpfile = repair.strip(op.ui, repo, list(clkills), backup=True,
+                                 topic='widen')
+        repo._bookmarks = bmstore
+        if chgrpfile:
+            # presence of _widen_bundle attribute activates widen handler later
+            op._widen_bundle = chgrpfile
+    # Set the new narrowspec if we're widening. The setnewnarrowpats() method
+    # will currently always be there when using the core+narrowhg server, but
+    # other servers may include a changespec part even when not widening (e.g.
+    # because we're deepening a shallow repo).
+    if util.safehasattr(repo, 'setnewnarrowpats'):
+        repo.setnewnarrowpats()
+
+def handlechangegroup_widen(op, inpart):
+    """Changegroup exchange handler which restores temporarily-stripped nodes"""
+    # We saved a bundle with stripped node data we must now restore.
+    # This approach is based on mercurial/repair.py@6ee26a53c111.
+    repo = op.repo
+    ui = op.ui
+
+    chgrpfile = op._widen_bundle
+    del op._widen_bundle
+    vfs = repo.vfs
+
+    ui.note(_("adding branch\n"))
+    f = vfs.open(chgrpfile, "rb")
+    try:
+        gen = exchange.readbundle(ui, f, chgrpfile, vfs)
+        if not ui.verbose:
+            # silence internal shuffling chatter
+            ui.pushbuffer()
+        if isinstance(gen, bundle2.unbundle20):
+            with repo.transaction('strip') as tr:
+                bundle2.processbundle(repo, gen, lambda: tr)
+        else:
+            gen.apply(repo, 'strip', 'bundle:' + vfs.join(chgrpfile), True)
+        if not ui.verbose:
+            ui.popbuffer()
+    finally:
+        f.close()
+
+    # remove undo files
+    for undovfs, undofile in repo.undofiles():
+        try:
+            undovfs.unlink(undofile)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                ui.warn(_('error removing %s: %s\n') %
+                        (undovfs.join(undofile), str(e)))
+
+    # Remove partial backup only if there were no exceptions
+    vfs.unlink(chgrpfile)
+
+def setup():
+    """Enable narrow repo support in bundle2-related extension points."""
+    extensions.wrapfunction(bundle2, 'getrepocaps', getrepocaps_narrow)
+
+    wireproto.gboptsmap['narrow'] = 'boolean'
+    wireproto.gboptsmap['depth'] = 'plain'
+    wireproto.gboptsmap['oldincludepats'] = 'csv'
+    wireproto.gboptsmap['oldexcludepats'] = 'csv'
+    wireproto.gboptsmap['includepats'] = 'csv'
+    wireproto.gboptsmap['excludepats'] = 'csv'
+    wireproto.gboptsmap['known'] = 'csv'
+
+    # Extend changegroup serving to handle requests from narrow clients.
+    origcgfn = exchange.getbundle2partsmapping['changegroup']
+    def wrappedcgfn(*args, **kwargs):
+        repo = args[1]
+        if repo.ui.has_section(narrowacl_section):
+            getbundlechangegrouppart_narrow(
+                *args, **applyacl_narrow(repo, kwargs))
+        elif kwargs.get('narrow', False):
+            getbundlechangegrouppart_narrow(*args, **kwargs)
+        else:
+            origcgfn(*args, **kwargs)
+    exchange.getbundle2partsmapping['changegroup'] = wrappedcgfn
+
+    # Extend changegroup receiver so client can fixup after widen requests.
+    origcghandler = bundle2.parthandlermapping['changegroup']
+    def wrappedcghandler(op, inpart):
+        origcghandler(op, inpart)
+        if util.safehasattr(op, '_widen_bundle'):
+            handlechangegroup_widen(op, inpart)
+    wrappedcghandler.params = origcghandler.params
+    bundle2.parthandlermapping['changegroup'] = wrappedcghandler