exchangev2: recognize narrow patterns when pulling
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 26 Sep 2018 14:38:43 -0700
changeset 40327 55836a34f41b
parent 40326 fed697fa1734
child 40328 2c55716f8a1c
exchangev2: recognize narrow patterns when pulling pulloperation instances were recently taught to record file include and exclude patterns to facilitate narrow file transfer. Teaching the exchangev2 code to transfer a subset of files is as simple as constructing a narrow matcher from these patterns and filtering all seen file paths through it. Keep in mind that this change only influences file data: we're still fetching all changeset and manifest data. So, there's still a ton of "partial clone" to implement in exchangev2. On a personal note, I derive gratification that this feature requires very few lines of new code to implement. To test this, we implemented a minimal extension which allows us to specify --include/--exclude to clone. While the narrow extension provides these arguments, I explicitly wanted to test this functionality without the narrow extension enabled, as that extension monkeypatches various things and I want to isolate the behavior of core Mercurial. Differential Revision: https://phab.mercurial-scm.org/D5132
mercurial/exchangev2.py
tests/pullext.py
tests/test-wireproto-exchangev2.t
--- a/mercurial/exchangev2.py	Tue Oct 09 08:50:13 2018 -0700
+++ b/mercurial/exchangev2.py	Wed Sep 26 14:38:43 2018 -0700
@@ -19,6 +19,7 @@
     bookmarks,
     error,
     mdiff,
+    narrowspec,
     phases,
     pycompat,
     setdiscovery,
@@ -30,6 +31,23 @@
     remote = pullop.remote
     tr = pullop.trmanager.transaction()
 
+    # We don't use the repo's narrow matcher here because the patterns passed
+    # to exchange.pull() could be different.
+    narrowmatcher = narrowspec.match(repo.root,
+                                     # Empty maps to nevermatcher. So always
+                                     # set includes if missing.
+                                     pullop.includepats or {'path:.'},
+                                     pullop.excludepats)
+
+    if pullop.includepats or pullop.excludepats:
+        pathfilter = {}
+        if pullop.includepats:
+            pathfilter[b'include'] = sorted(pullop.includepats)
+        if pullop.excludepats:
+            pathfilter[b'exclude'] = sorted(pullop.excludepats)
+    else:
+        pathfilter = None
+
     # Figure out what needs to be fetched.
     common, fetch, remoteheads = _pullchangesetdiscovery(
         repo, remote, pullop.heads, abortwhenunrelated=pullop.force)
@@ -63,8 +81,8 @@
 
     # Find all file nodes referenced by added manifests and fetch those
     # revisions.
-    fnodes = _derivefilesfrommanifests(repo, manres['added'])
-    _fetchfilesfromcsets(repo, tr, remote, fnodes, csetres['added'],
+    fnodes = _derivefilesfrommanifests(repo, narrowmatcher, manres['added'])
+    _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetres['added'],
                          manres['linkrevs'])
 
 def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
@@ -315,7 +333,7 @@
         'linkrevs': linkrevs,
     }
 
-def _derivefilesfrommanifests(repo, manifestnodes):
+def _derivefilesfrommanifests(repo, matcher, manifestnodes):
     """Determine what file nodes are relevant given a set of manifest nodes.
 
     Returns a dict mapping file paths to dicts of file node to first manifest
@@ -340,7 +358,8 @@
             md = m.readfast()
 
             for path, fnode in md.items():
-                fnodes[path].setdefault(fnode, manifestnode)
+                if matcher(path):
+                    fnodes[path].setdefault(fnode, manifestnode)
 
             progress.increment()
 
@@ -421,7 +440,8 @@
                     locallinkrevs[path].__getitem__,
                     weakref.proxy(tr))
 
-def _fetchfilesfromcsets(repo, tr, remote, fnodes, csets, manlinkrevs):
+def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets,
+                         manlinkrevs):
     """Fetch file data from explicit changeset revisions."""
 
     def iterrevisions(objs, remaining, progress):
@@ -481,6 +501,9 @@
                 b'haveparents': True,
             }
 
+            if pathfilter:
+                args[b'pathfilter'] = pathfilter
+
             objs = e.callcommand(b'filesdata', args).result()
 
             # First object is an overall header.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pullext.py	Wed Sep 26 14:38:43 2018 -0700
@@ -0,0 +1,38 @@
+# pullext.py - Simple extension to test pulling
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from mercurial.i18n import _
+from mercurial import (
+    commands,
+    extensions,
+    localrepo,
+    repository,
+)
+
+def clonecommand(orig, ui, repo, *args, **kwargs):
+    if kwargs.get(r'include') or kwargs.get(r'exclude'):
+        kwargs[r'narrow'] = True
+
+    return orig(ui, repo, *args, **kwargs)
+
+def featuresetup(ui, features):
+    features.add(repository.NARROW_REQUIREMENT)
+
+def extsetup(ui):
+    entry = extensions.wrapcommand(commands.table, 'clone', clonecommand)
+
+    hasinclude = any(x[1] == 'include' for x in entry[1])
+
+    if not hasinclude:
+        entry[1].append(('', 'include', [],
+                         _('pattern of file/directory to clone')))
+        entry[1].append(('', 'exclude', [],
+                         _('pattern of file/directory to not clone')))
+
+    localrepo.featuresetupfuncs.add(featuresetup)
--- a/tests/test-wireproto-exchangev2.t	Tue Oct 09 08:50:13 2018 -0700
+++ b/tests/test-wireproto-exchangev2.t	Wed Sep 26 14:38:43 2018 -0700
@@ -619,3 +619,347 @@
   $ hg -R client-bookmarks bookmarks
      book-1                    2:cd2534766bec
      book-2                    2:cd2534766bec
+
+  $ killdaemons.py
+
+Let's set up a slightly more complicated server
+
+  $ hg init server-2
+  $ enablehttpv2 server-2
+  $ cd server-2
+  $ mkdir dir0 dir1
+  $ echo a0 > a
+  $ echo b0 > b
+  $ hg -q commit -A -m 'commit 0'
+  $ echo c0 > dir0/c
+  $ echo d0 > dir0/d
+  $ hg -q commit -A -m 'commit 1'
+  $ echo e0 > dir1/e
+  $ echo f0 > dir1/f
+  $ hg -q commit -A -m 'commit 2'
+  $ echo c1 > dir0/c
+  $ echo e1 > dir1/e
+  $ hg commit -m 'commit 3'
+  $ hg serve -p $HGPORT -d --pid-file hg.pid -E error.log
+  $ cat hg.pid > $DAEMON_PIDS
+
+  $ cd ..
+
+Narrow clone only fetches some files
+
+  $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --include dir0/ http://localhost:$HGPORT/ client-narrow-0
+  using http://localhost:$HGPORT/
+  sending capabilities command
+  query 1; heads
+  sending 2 commands
+  sending command heads: {}
+  sending command known: {
+    'nodes': []
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command changesetdata: {
+    'fields': set([
+      'bookmarks',
+      'parents',
+      'phase',
+      'revision'
+    ]),
+    'revisions': [
+      {
+        'heads': [
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'roots': [],
+        'type': 'changesetdagrange'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  add changeset 3390ef850073
+  add changeset b709380892b1
+  add changeset 47fe012ab237
+  add changeset 97765fc3cd62
+  checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+      '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+      '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+    ],
+    'tree': ''
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command filesdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'pathfilter': {
+      'include': [
+        'path:dir0'
+      ]
+    },
+    'revisions': [
+      {
+        'nodes': [
+          '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+          '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+          'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'type': 'changesetexplicit'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=449; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  updating the branch cache
+  new changesets 3390ef850073:97765fc3cd62
+  (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+  $ find client-narrow-0/.hg/store -type f -name '*.i' | sort
+  client-narrow-0/.hg/store/00changelog.i
+  client-narrow-0/.hg/store/00manifest.i
+  client-narrow-0/.hg/store/data/dir0/c.i
+  client-narrow-0/.hg/store/data/dir0/d.i
+#endif
+
+--exclude by itself works
+
+  $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --exclude dir0/ http://localhost:$HGPORT/ client-narrow-1
+  using http://localhost:$HGPORT/
+  sending capabilities command
+  query 1; heads
+  sending 2 commands
+  sending command heads: {}
+  sending command known: {
+    'nodes': []
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command changesetdata: {
+    'fields': set([
+      'bookmarks',
+      'parents',
+      'phase',
+      'revision'
+    ]),
+    'revisions': [
+      {
+        'heads': [
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'roots': [],
+        'type': 'changesetdagrange'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  add changeset 3390ef850073
+  add changeset b709380892b1
+  add changeset 47fe012ab237
+  add changeset 97765fc3cd62
+  checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+      '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+      '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+    ],
+    'tree': ''
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command filesdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'pathfilter': {
+      'exclude': [
+        'path:dir0'
+      ],
+      'include': [
+        'path:.'
+      ]
+    },
+    'revisions': [
+      {
+        'nodes': [
+          '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+          '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+          'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'type': 'changesetexplicit'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=709; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  updating the branch cache
+  new changesets 3390ef850073:97765fc3cd62
+  (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+  $ find client-narrow-1/.hg/store -type f -name '*.i' | sort
+  client-narrow-1/.hg/store/00changelog.i
+  client-narrow-1/.hg/store/00manifest.i
+  client-narrow-1/.hg/store/data/a.i
+  client-narrow-1/.hg/store/data/b.i
+  client-narrow-1/.hg/store/data/dir1/e.i
+  client-narrow-1/.hg/store/data/dir1/f.i
+#endif
+
+Mixing --include and --exclude works
+
+  $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --include dir0/ --exclude dir0/c http://localhost:$HGPORT/ client-narrow-2
+  using http://localhost:$HGPORT/
+  sending capabilities command
+  query 1; heads
+  sending 2 commands
+  sending command heads: {}
+  sending command known: {
+    'nodes': []
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command changesetdata: {
+    'fields': set([
+      'bookmarks',
+      'parents',
+      'phase',
+      'revision'
+    ]),
+    'revisions': [
+      {
+        'heads': [
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'roots': [],
+        'type': 'changesetdagrange'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  add changeset 3390ef850073
+  add changeset b709380892b1
+  add changeset 47fe012ab237
+  add changeset 97765fc3cd62
+  checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+      '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+      '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+    ],
+    'tree': ''
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  sending 1 commands
+  sending command filesdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'haveparents': True,
+    'pathfilter': {
+      'exclude': [
+        'path:dir0/c'
+      ],
+      'include': [
+        'path:dir0'
+      ]
+    },
+    'revisions': [
+      {
+        'nodes': [
+          '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+          '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+          'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+          '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+        ],
+        'type': 'changesetexplicit'
+      }
+    ]
+  }
+  received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+  received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=160; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+  updating the branch cache
+  new changesets 3390ef850073:97765fc3cd62
+  (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+  $ find client-narrow-2/.hg/store -type f -name '*.i' | sort
+  client-narrow-2/.hg/store/00changelog.i
+  client-narrow-2/.hg/store/00manifest.i
+  client-narrow-2/.hg/store/data/dir0/d.i
+#endif