exchangev2: recognize narrow patterns when pulling
pulloperation instances were recently taught to record file
include and exclude patterns to facilitate narrow file transfer.
Teaching the exchangev2 code to transfer a subset of files is
as simple as constructing a narrow matcher from these patterns and
filtering all seen file paths through it.
Keep in mind that this change only influences file data: we're
still fetching all changeset and manifest data. So, there's still
a ton of "partial clone" to implement in exchangev2.
On a personal note, I derive gratification that this feature requires
very few lines of new code to implement.
To test this, we implemented a minimal extension which allows us to specify
--include/--exclude to clone. While the narrow extension provides these
arguments, I explicitly wanted to test this functionality without the
narrow extension enabled, as that extension monkeypatches various things
and I want to isolate the behavior of core Mercurial.
Differential Revision: https://phab.mercurial-scm.org/D5132
--- a/mercurial/exchangev2.py Tue Oct 09 08:50:13 2018 -0700
+++ b/mercurial/exchangev2.py Wed Sep 26 14:38:43 2018 -0700
@@ -19,6 +19,7 @@
bookmarks,
error,
mdiff,
+ narrowspec,
phases,
pycompat,
setdiscovery,
@@ -30,6 +31,23 @@
remote = pullop.remote
tr = pullop.trmanager.transaction()
+ # We don't use the repo's narrow matcher here because the patterns passed
+ # to exchange.pull() could be different.
+ narrowmatcher = narrowspec.match(repo.root,
+ # Empty maps to nevermatcher. So always
+ # set includes if missing.
+ pullop.includepats or {'path:.'},
+ pullop.excludepats)
+
+ if pullop.includepats or pullop.excludepats:
+ pathfilter = {}
+ if pullop.includepats:
+ pathfilter[b'include'] = sorted(pullop.includepats)
+ if pullop.excludepats:
+ pathfilter[b'exclude'] = sorted(pullop.excludepats)
+ else:
+ pathfilter = None
+
# Figure out what needs to be fetched.
common, fetch, remoteheads = _pullchangesetdiscovery(
repo, remote, pullop.heads, abortwhenunrelated=pullop.force)
@@ -63,8 +81,8 @@
# Find all file nodes referenced by added manifests and fetch those
# revisions.
- fnodes = _derivefilesfrommanifests(repo, manres['added'])
- _fetchfilesfromcsets(repo, tr, remote, fnodes, csetres['added'],
+ fnodes = _derivefilesfrommanifests(repo, narrowmatcher, manres['added'])
+ _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetres['added'],
manres['linkrevs'])
def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
@@ -315,7 +333,7 @@
'linkrevs': linkrevs,
}
-def _derivefilesfrommanifests(repo, manifestnodes):
+def _derivefilesfrommanifests(repo, matcher, manifestnodes):
"""Determine what file nodes are relevant given a set of manifest nodes.
Returns a dict mapping file paths to dicts of file node to first manifest
@@ -340,7 +358,8 @@
md = m.readfast()
for path, fnode in md.items():
- fnodes[path].setdefault(fnode, manifestnode)
+ if matcher(path):
+ fnodes[path].setdefault(fnode, manifestnode)
progress.increment()
@@ -421,7 +440,8 @@
locallinkrevs[path].__getitem__,
weakref.proxy(tr))
-def _fetchfilesfromcsets(repo, tr, remote, fnodes, csets, manlinkrevs):
+def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets,
+ manlinkrevs):
"""Fetch file data from explicit changeset revisions."""
def iterrevisions(objs, remaining, progress):
@@ -481,6 +501,9 @@
b'haveparents': True,
}
+ if pathfilter:
+ args[b'pathfilter'] = pathfilter
+
objs = e.callcommand(b'filesdata', args).result()
# First object is an overall header.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pullext.py Wed Sep 26 14:38:43 2018 -0700
@@ -0,0 +1,38 @@
+# pullext.py - Simple extension to test pulling
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from mercurial.i18n import _
+from mercurial import (
+ commands,
+ extensions,
+ localrepo,
+ repository,
+)
+
+def clonecommand(orig, ui, repo, *args, **kwargs):
+ if kwargs.get(r'include') or kwargs.get(r'exclude'):
+ kwargs[r'narrow'] = True
+
+ return orig(ui, repo, *args, **kwargs)
+
+def featuresetup(ui, features):
+ features.add(repository.NARROW_REQUIREMENT)
+
+def extsetup(ui):
+ entry = extensions.wrapcommand(commands.table, 'clone', clonecommand)
+
+ hasinclude = any(x[1] == 'include' for x in entry[1])
+
+ if not hasinclude:
+ entry[1].append(('', 'include', [],
+ _('pattern of file/directory to clone')))
+ entry[1].append(('', 'exclude', [],
+ _('pattern of file/directory to not clone')))
+
+ localrepo.featuresetupfuncs.add(featuresetup)
--- a/tests/test-wireproto-exchangev2.t Tue Oct 09 08:50:13 2018 -0700
+++ b/tests/test-wireproto-exchangev2.t Wed Sep 26 14:38:43 2018 -0700
@@ -619,3 +619,347 @@
$ hg -R client-bookmarks bookmarks
book-1 2:cd2534766bec
book-2 2:cd2534766bec
+
+ $ killdaemons.py
+
+Let's set up a slightly more complicated server
+
+ $ hg init server-2
+ $ enablehttpv2 server-2
+ $ cd server-2
+ $ mkdir dir0 dir1
+ $ echo a0 > a
+ $ echo b0 > b
+ $ hg -q commit -A -m 'commit 0'
+ $ echo c0 > dir0/c
+ $ echo d0 > dir0/d
+ $ hg -q commit -A -m 'commit 1'
+ $ echo e0 > dir1/e
+ $ echo f0 > dir1/f
+ $ hg -q commit -A -m 'commit 2'
+ $ echo c1 > dir0/c
+ $ echo e1 > dir1/e
+ $ hg commit -m 'commit 3'
+ $ hg serve -p $HGPORT -d --pid-file hg.pid -E error.log
+ $ cat hg.pid > $DAEMON_PIDS
+
+ $ cd ..
+
+Narrow clone only fetches some files
+
+ $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --include dir0/ http://localhost:$HGPORT/ client-narrow-0
+ using http://localhost:$HGPORT/
+ sending capabilities command
+ query 1; heads
+ sending 2 commands
+ sending command heads: {}
+ sending command known: {
+ 'nodes': []
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command changesetdata: {
+ 'fields': set([
+ 'bookmarks',
+ 'parents',
+ 'phase',
+ 'revision'
+ ]),
+ 'revisions': [
+ {
+ 'heads': [
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'roots': [],
+ 'type': 'changesetdagrange'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ add changeset 3390ef850073
+ add changeset b709380892b1
+ add changeset 47fe012ab237
+ add changeset 97765fc3cd62
+ checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+ '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+ '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+ ],
+ 'tree': ''
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command filesdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'pathfilter': {
+ 'include': [
+ 'path:dir0'
+ ]
+ },
+ 'revisions': [
+ {
+ 'nodes': [
+ '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+ '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+ 'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'type': 'changesetexplicit'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=449; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ updating the branch cache
+ new changesets 3390ef850073:97765fc3cd62
+ (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+ $ find client-narrow-0/.hg/store -type f -name '*.i' | sort
+ client-narrow-0/.hg/store/00changelog.i
+ client-narrow-0/.hg/store/00manifest.i
+ client-narrow-0/.hg/store/data/dir0/c.i
+ client-narrow-0/.hg/store/data/dir0/d.i
+#endif
+
+--exclude by itself works
+
+ $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --exclude dir0/ http://localhost:$HGPORT/ client-narrow-1
+ using http://localhost:$HGPORT/
+ sending capabilities command
+ query 1; heads
+ sending 2 commands
+ sending command heads: {}
+ sending command known: {
+ 'nodes': []
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command changesetdata: {
+ 'fields': set([
+ 'bookmarks',
+ 'parents',
+ 'phase',
+ 'revision'
+ ]),
+ 'revisions': [
+ {
+ 'heads': [
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'roots': [],
+ 'type': 'changesetdagrange'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ add changeset 3390ef850073
+ add changeset b709380892b1
+ add changeset 47fe012ab237
+ add changeset 97765fc3cd62
+ checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+ '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+ '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+ ],
+ 'tree': ''
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command filesdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'pathfilter': {
+ 'exclude': [
+ 'path:dir0'
+ ],
+ 'include': [
+ 'path:.'
+ ]
+ },
+ 'revisions': [
+ {
+ 'nodes': [
+ '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+ '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+ 'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'type': 'changesetexplicit'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=709; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ updating the branch cache
+ new changesets 3390ef850073:97765fc3cd62
+ (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+ $ find client-narrow-1/.hg/store -type f -name '*.i' | sort
+ client-narrow-1/.hg/store/00changelog.i
+ client-narrow-1/.hg/store/00manifest.i
+ client-narrow-1/.hg/store/data/a.i
+ client-narrow-1/.hg/store/data/b.i
+ client-narrow-1/.hg/store/data/dir1/e.i
+ client-narrow-1/.hg/store/data/dir1/f.i
+#endif
+
+Mixing --include and --exclude works
+
+ $ hg --config extensions.pullext=$TESTDIR/pullext.py --debug clone -U --include dir0/ --exclude dir0/c http://localhost:$HGPORT/ client-narrow-2
+ using http://localhost:$HGPORT/
+ sending capabilities command
+ query 1; heads
+ sending 2 commands
+ sending command heads: {}
+ sending command known: {
+ 'nodes': []
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=22; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ received frame(size=11; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=1; request=3; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=3; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command changesetdata: {
+ 'fields': set([
+ 'bookmarks',
+ 'parents',
+ 'phase',
+ 'revision'
+ ]),
+ 'revisions': [
+ {
+ 'heads': [
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'roots': [],
+ 'type': 'changesetdagrange'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=783; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ add changeset 3390ef850073
+ add changeset b709380892b1
+ add changeset 47fe012ab237
+ add changeset 97765fc3cd62
+ checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '|2 \x1a\xa3\xa1R\xa9\xe6\xa9"+?\xa8\xd0\xe3\x0f\xc2V\xe8',
+ '\x8d\xd0W<\x7f\xaf\xe2\x04F\xcc\xea\xac\x05N\xea\xa4x\x91M\xdb',
+ '113\x85\xf2!\x8b\x08^\xb2Z\x821\x1e*\xdd\x0e\xeb\x8c3'
+ ],
+ 'tree': ''
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=967; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ sending 1 commands
+ sending command filesdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'haveparents': True,
+ 'pathfilter': {
+ 'exclude': [
+ 'path:dir0/c'
+ ],
+ 'include': [
+ 'path:dir0'
+ ]
+ },
+ 'revisions': [
+ {
+ 'nodes': [
+ '3\x90\xef\x85\x00s\xfb\xc2\xf0\xdf\xff"D4,\x8e\x92)\x01:',
+ '\xb7\t8\x08\x92\xb1\x93\xc1\t\x1d:\x81\x7fp`R\xe3F\x82\x1b',
+ 'G\xfe\x01*\xb27\xa8\xc7\xfc\x0cx\xf9\xf2mXf\xee\xf3\xf8%',
+ '\x97v_\xc3\xcdbO\xd1\xfa\x01v\x93,!\xff\xd1j\xdfC.'
+ ],
+ 'type': 'changesetexplicit'
+ }
+ ]
+ }
+ received frame(size=9; request=1; stream=2; streamflags=stream-begin; type=stream-settings; flags=eos)
+ received frame(size=11; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=160; request=1; stream=2; streamflags=encoded; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
+ updating the branch cache
+ new changesets 3390ef850073:97765fc3cd62
+ (sent 5 HTTP requests and * bytes; received * bytes in responses) (glob)
+
+#if reporevlogstore
+ $ find client-narrow-2/.hg/store -type f -name '*.i' | sort
+ client-narrow-2/.hg/store/00changelog.i
+ client-narrow-2/.hg/store/00manifest.i
+ client-narrow-2/.hg/store/data/dir0/d.i
+#endif