comparison mercurial/wireprotov2server.py @ 40178:46a40bce3ae0

wireprotov2: define and implement "filesdata" command Previously, the only way to access file revision data was the "filedata" command. This command is useful to have. But, it only allowed resolving revision data for a single file. This meant that clients needed to send 1 command for each tracked path they were seeking data on. Furthermore, those commands would need to enumerate the exact file nodes they wanted data for. This approach meant that clients were sending a lot of data to remotes in order to request file data. e.g. if there were 1M file revisions, we'd need at least 20,000,000 bytes just to encode file nodes! Many clients on the internet don't have that kind of upload capacity. In order to limit the amount of data that clients must send, we'll need more efficient ways to request repository data. This commit defines and implements a new "filesdata" command. This command allows the retrieval of data for multiple files by specifying changeset revisions and optional file patterns. The command figures out what file revisions are "relevant" and sends them in bulk. The logic around choosing which file revisions to send in the case of haveparents not being set is overly simple and will over-send files. We will need more smarts here eventually. (Specifically, the client will need to tell the server which revisions it knows about.) This work is deferred until a later time. Differential Revision: https://phab.mercurial-scm.org/D4981
author Gregory Szorc <gregory.szorc@gmail.com>
date Wed, 03 Oct 2018 12:54:39 -0700
parents 41e2633bcd00
children ed55a0077490
comparison
equal deleted inserted replaced
40177:41e2633bcd00 40178:46a40bce3ae0
4 # This software may be used and distributed according to the terms of the 4 # This software may be used and distributed according to the terms of the
5 # GNU General Public License version 2 or any later version. 5 # GNU General Public License version 2 or any later version.
6 6
7 from __future__ import absolute_import 7 from __future__ import absolute_import
8 8
9 import collections
9 import contextlib 10 import contextlib
10 import hashlib 11 import hashlib
11 12
12 from .i18n import _ 13 from .i18n import _
13 from .node import ( 14 from .node import (
16 ) 17 )
17 from . import ( 18 from . import (
18 discovery, 19 discovery,
19 encoding, 20 encoding,
20 error, 21 error,
22 match as matchmod,
21 narrowspec, 23 narrowspec,
22 pycompat, 24 pycompat,
23 wireprotoframing, 25 wireprotoframing,
24 wireprototypes, 26 wireprototypes,
25 ) 27 )
1002 yield d 1004 yield d
1003 1005
1004 for extra in followingdata: 1006 for extra in followingdata:
1005 yield extra 1007 yield extra
1006 1008
1009 def makefilematcher(repo, pathfilter):
1010 """Construct a matcher from a path filter dict."""
1011
1012 # Validate values.
1013 if pathfilter:
1014 for key in (b'include', b'exclude'):
1015 for pattern in pathfilter.get(key, []):
1016 if not pattern.startswith((b'path:', b'rootfilesin:')):
1017 raise error.WireprotoCommandError(
1018 '%s pattern must begin with `path:` or `rootfilesin:`; '
1019 'got %s', (key, pattern))
1020
1021 if pathfilter:
1022 matcher = matchmod.match(repo.root, b'',
1023 include=pathfilter.get(b'include', []),
1024 exclude=pathfilter.get(b'exclude', []))
1025 else:
1026 matcher = matchmod.match(repo.root, b'')
1027
1028 # Requested patterns could include files not in the local store. So
1029 # filter those out.
1030 return matchmod.intersectmatchers(repo.narrowmatch(), matcher)
1031
1007 @wireprotocommand( 1032 @wireprotocommand(
1008 'filedata', 1033 'filedata',
1009 args={ 1034 args={
1010 'haveparents': { 1035 'haveparents': {
1011 'type': 'bool', 1036 'type': 'bool',
1031 # TODO censoring a file revision won't invalidate the cache. 1056 # TODO censoring a file revision won't invalidate the cache.
1032 # Figure out a way to take censoring into account when deriving 1057 # Figure out a way to take censoring into account when deriving
1033 # the cache key. 1058 # the cache key.
1034 cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True)) 1059 cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True))
1035 def filedata(repo, proto, haveparents, nodes, fields, path): 1060 def filedata(repo, proto, haveparents, nodes, fields, path):
1061 # TODO this API allows access to file revisions that are attached to
1062 # secret changesets. filesdata does not have this problem. Maybe this
1063 # API should be deleted?
1064
1036 try: 1065 try:
1037 # Extensions may wish to access the protocol handler. 1066 # Extensions may wish to access the protocol handler.
1038 store = getfilestore(repo, proto, path) 1067 store = getfilestore(repo, proto, path)
1039 except FileAccessError as e: 1068 except FileAccessError as e:
1040 raise error.WireprotoCommandError(e.msg, e.args) 1069 raise error.WireprotoCommandError(e.msg, e.args)
1055 b'totalitems': len(nodes), 1084 b'totalitems': len(nodes),
1056 } 1085 }
1057 1086
1058 for o in emitfilerevisions(revisions, fields): 1087 for o in emitfilerevisions(revisions, fields):
1059 yield o 1088 yield o
1089
1090 def filesdatacapabilities(repo, proto):
1091 batchsize = repo.ui.configint(
1092 b'experimental', b'server.filesdata.recommended-batch-size')
1093 return {
1094 b'recommendedbatchsize': batchsize,
1095 }
1096
1097 @wireprotocommand(
1098 'filesdata',
1099 args={
1100 'haveparents': {
1101 'type': 'bool',
1102 'default': lambda: False,
1103 'example': True,
1104 },
1105 'fields': {
1106 'type': 'set',
1107 'default': set,
1108 'example': {b'parents', b'revision'},
1109 'validvalues': {b'firstchangeset', b'parents', b'revision'},
1110 },
1111 'pathfilter': {
1112 'type': 'dict',
1113 'default': lambda: None,
1114 'example': {b'include': [b'path:tests']},
1115 },
1116 'revisions': {
1117 'type': 'list',
1118 'example': [{
1119 b'type': b'changesetexplicit',
1120 b'nodes': [b'abcdef...'],
1121 }],
1122 },
1123 },
1124 permission='pull',
1125 # TODO censoring a file revision won't invalidate the cache.
1126 # Figure out a way to take censoring into account when deriving
1127 # the cache key.
1128 cachekeyfn=makecommandcachekeyfn('filesdata', 1, allargs=True),
1129 extracapabilitiesfn=filesdatacapabilities)
1130 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions):
1131 # TODO This should operate on a repo that exposes obsolete changesets. There
1132 # is a race between a client making a push that obsoletes a changeset and
1133 # another client fetching files data for that changeset. If a client has a
1134 # changeset, it should probably be allowed to access files data for that
1135 # changeset.
1136
1137 cl = repo.changelog
1138 outgoing = resolvenodes(repo, revisions)
1139 filematcher = makefilematcher(repo, pathfilter)
1140
1141 # Figure out what needs to be emitted.
1142 changedpaths = set()
1143 fnodes = collections.defaultdict(set)
1144
1145 for node in outgoing:
1146 ctx = repo[node]
1147 changedpaths.update(ctx.files())
1148
1149 changedpaths = sorted(p for p in changedpaths if filematcher(p))
1150
1151 # If ancestors are known, we send file revisions having a linkrev in the
1152 # outgoing set of changeset revisions.
1153 if haveparents:
1154 outgoingclrevs = set(cl.rev(n) for n in outgoing)
1155
1156 for path in changedpaths:
1157 try:
1158 store = getfilestore(repo, proto, path)
1159 except FileAccessError as e:
1160 raise error.WireprotoCommandError(e.msg, e.args)
1161
1162 for rev in store:
1163 linkrev = store.linkrev(rev)
1164
1165 if linkrev in outgoingclrevs:
1166 fnodes[path].add(store.node(rev))
1167
1168 # If ancestors aren't known, we walk the manifests and send all
1169 # encountered file revisions.
1170 else:
1171 for node in outgoing:
1172 mctx = repo[node].manifestctx()
1173
1174 for path, fnode in mctx.read().items():
1175 if filematcher(path):
1176 fnodes[path].add(fnode)
1177
1178 yield {
1179 b'totalpaths': len(fnodes),
1180 b'totalitems': sum(len(v) for v in fnodes.values())
1181 }
1182
1183 for path, filenodes in sorted(fnodes.items()):
1184 try:
1185 store = getfilestore(repo, proto, path)
1186 except FileAccessError as e:
1187 raise error.WireprotoCommandError(e.msg, e.args)
1188
1189 yield {
1190 b'path': path,
1191 b'totalitems': len(filenodes),
1192 }
1193
1194 revisions = store.emitrevisions(filenodes,
1195 revisiondata=b'revision' in fields,
1196 assumehaveparentrevisions=haveparents)
1197
1198 for o in emitfilerevisions(revisions, fields):
1199 yield o
1060 1200
1061 @wireprotocommand( 1201 @wireprotocommand(
1062 'heads', 1202 'heads',
1063 args={ 1203 args={
1064 'publiconly': { 1204 'publiconly': {