comparison hgext/remotefilelog/shallowbundle.py @ 40495:3a333a582d7b

remotefilelog: import pruned-down remotefilelog extension from hg-experimental This is remotefilelog as of my recent patches for compatibility with current tip of hg, minus support for old versions of Mercurial and some FB-specific features like their treemanifest extension and fetching linkrev data from a patched phabricator. The file extutil.py moved from hgext3rd to remotefilelog. This is not yet ready to be landed, consider it a preview for now. Planned changes include: * replace lz4 with zstd * rename some capabilities, requirements and wireproto commands to mark them as experimental * consolidate bits of shallowutil with related functions (eg readfile) I'm certainly open to other (small) changes, but my rough mission is to land this largely as-is so we can use it as a model of the functionality we need going forward for lazy-fetching of file contents from a server. # no-check-commit because of a few foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D4782
author Augie Fackler <augie@google.com>
date Thu, 27 Sep 2018 13:03:19 -0400
parents
children ed19958dbf5d
comparison
equal deleted inserted replaced
40494:9aeb9e2d28a7 40495:3a333a582d7b
1 # shallowbundle.py - bundle10 implementation for use with shallow repositories
2 #
3 # Copyright 2013 Facebook, Inc.
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
8
9 from mercurial.i18n import _
10 from mercurial.node import bin, hex, nullid
11 from mercurial import (
12 bundlerepo,
13 changegroup,
14 error,
15 match,
16 mdiff,
17 pycompat,
18 )
19 from . import (
20 remotefilelog,
21 shallowutil,
22 )
23
24 NoFiles = 0
25 LocalFiles = 1
26 AllFiles = 2
27
28 requirement = "remotefilelog"
29
30 def shallowgroup(cls, self, nodelist, rlog, lookup, units=None, reorder=None):
31 if not isinstance(rlog, remotefilelog.remotefilelog):
32 for c in super(cls, self).group(nodelist, rlog, lookup,
33 units=units):
34 yield c
35 return
36
37 if len(nodelist) == 0:
38 yield self.close()
39 return
40
41 nodelist = shallowutil.sortnodes(nodelist, rlog.parents)
42
43 # add the parent of the first rev
44 p = rlog.parents(nodelist[0])[0]
45 nodelist.insert(0, p)
46
47 # build deltas
48 for i in pycompat.xrange(len(nodelist) - 1):
49 prev, curr = nodelist[i], nodelist[i + 1]
50 linknode = lookup(curr)
51 for c in self.nodechunk(rlog, curr, prev, linknode):
52 yield c
53
54 yield self.close()
55
56 class shallowcg1packer(changegroup.cgpacker):
57 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
58 if "remotefilelog" in self._repo.requirements:
59 fastpathlinkrev = False
60
61 return super(shallowcg1packer, self).generate(commonrevs, clnodes,
62 fastpathlinkrev, source)
63
64 def group(self, nodelist, rlog, lookup, units=None, reorder=None):
65 return shallowgroup(shallowcg1packer, self, nodelist, rlog, lookup,
66 units=units)
67
68 def generatefiles(self, changedfiles, *args):
69 try:
70 linknodes, commonrevs, source = args
71 except ValueError:
72 commonrevs, source, mfdicts, fastpathlinkrev, fnodes, clrevs = args
73 if requirement in self._repo.requirements:
74 repo = self._repo
75 if isinstance(repo, bundlerepo.bundlerepository):
76 # If the bundle contains filelogs, we can't pull from it, since
77 # bundlerepo is heavily tied to revlogs. Instead require that
78 # the user use unbundle instead.
79 # Force load the filelog data.
80 bundlerepo.bundlerepository.file(repo, 'foo')
81 if repo._cgfilespos:
82 raise error.Abort("cannot pull from full bundles",
83 hint="use `hg unbundle` instead")
84 return []
85 filestosend = self.shouldaddfilegroups(source)
86 if filestosend == NoFiles:
87 changedfiles = list([f for f in changedfiles
88 if not repo.shallowmatch(f)])
89
90 return super(shallowcg1packer, self).generatefiles(
91 changedfiles, *args)
92
93 def shouldaddfilegroups(self, source):
94 repo = self._repo
95 if not requirement in repo.requirements:
96 return AllFiles
97
98 if source == "push" or source == "bundle":
99 return AllFiles
100
101 caps = self._bundlecaps or []
102 if source == "serve" or source == "pull":
103 if 'remotefilelog' in caps:
104 return LocalFiles
105 else:
106 # Serving to a full repo requires us to serve everything
107 repo.ui.warn(_("pulling from a shallow repo\n"))
108 return AllFiles
109
110 return NoFiles
111
112 def prune(self, rlog, missing, commonrevs):
113 if not isinstance(rlog, remotefilelog.remotefilelog):
114 return super(shallowcg1packer, self).prune(rlog, missing,
115 commonrevs)
116
117 repo = self._repo
118 results = []
119 for fnode in missing:
120 fctx = repo.filectx(rlog.filename, fileid=fnode)
121 if fctx.linkrev() not in commonrevs:
122 results.append(fnode)
123 return results
124
125 def nodechunk(self, revlog, node, prevnode, linknode):
126 prefix = ''
127 if prevnode == nullid:
128 delta = revlog.revision(node, raw=True)
129 prefix = mdiff.trivialdiffheader(len(delta))
130 else:
131 # Actually uses remotefilelog.revdiff which works on nodes, not revs
132 delta = revlog.revdiff(prevnode, node)
133 p1, p2 = revlog.parents(node)
134 flags = revlog.flags(node)
135 meta = self.builddeltaheader(node, p1, p2, prevnode, linknode, flags)
136 meta += prefix
137 l = len(meta) + len(delta)
138 yield changegroup.chunkheader(l)
139 yield meta
140 yield delta
141
142 def makechangegroup(orig, repo, outgoing, version, source, *args, **kwargs):
143 if not requirement in repo.requirements:
144 return orig(repo, outgoing, version, source, *args, **kwargs)
145
146 original = repo.shallowmatch
147 try:
148 # if serving, only send files the clients has patterns for
149 if source == 'serve':
150 bundlecaps = kwargs.get('bundlecaps')
151 includepattern = None
152 excludepattern = None
153 for cap in (bundlecaps or []):
154 if cap.startswith("includepattern="):
155 raw = cap[len("includepattern="):]
156 if raw:
157 includepattern = raw.split('\0')
158 elif cap.startswith("excludepattern="):
159 raw = cap[len("excludepattern="):]
160 if raw:
161 excludepattern = raw.split('\0')
162 if includepattern or excludepattern:
163 repo.shallowmatch = match.match(repo.root, '', None,
164 includepattern, excludepattern)
165 else:
166 repo.shallowmatch = match.always(repo.root, '')
167 return orig(repo, outgoing, version, source, *args, **kwargs)
168 finally:
169 repo.shallowmatch = original
170
171 def addchangegroupfiles(orig, repo, source, revmap, trp, expectedfiles, *args):
172 if not requirement in repo.requirements:
173 return orig(repo, source, revmap, trp, expectedfiles, *args)
174
175 files = 0
176 newfiles = 0
177 visited = set()
178 revisiondatas = {}
179 queue = []
180
181 # Normal Mercurial processes each file one at a time, adding all
182 # the new revisions for that file at once. In remotefilelog a file
183 # revision may depend on a different file's revision (in the case
184 # of a rename/copy), so we must lay all revisions down across all
185 # files in topological order.
186
187 # read all the file chunks but don't add them
188 while True:
189 chunkdata = source.filelogheader()
190 if not chunkdata:
191 break
192 files += 1
193 f = chunkdata["filename"]
194 repo.ui.debug("adding %s revisions\n" % f)
195 repo.ui.progress(_('files'), files, total=expectedfiles)
196
197 if not repo.shallowmatch(f):
198 fl = repo.file(f)
199 deltas = source.deltaiter()
200 fl.addgroup(deltas, revmap, trp)
201 continue
202
203 chain = None
204 while True:
205 # returns: (node, p1, p2, cs, deltabase, delta, flags) or None
206 revisiondata = source.deltachunk(chain)
207 if not revisiondata:
208 break
209
210 chain = revisiondata[0]
211
212 revisiondatas[(f, chain)] = revisiondata
213 queue.append((f, chain))
214
215 if f not in visited:
216 newfiles += 1
217 visited.add(f)
218
219 if chain is None:
220 raise error.Abort(_("received file revlog group is empty"))
221
222 processed = set()
223 def available(f, node, depf, depnode):
224 if depnode != nullid and (depf, depnode) not in processed:
225 if not (depf, depnode) in revisiondatas:
226 # It's not in the changegroup, assume it's already
227 # in the repo
228 return True
229 # re-add self to queue
230 queue.insert(0, (f, node))
231 # add dependency in front
232 queue.insert(0, (depf, depnode))
233 return False
234 return True
235
236 skipcount = 0
237
238 # Prefetch the non-bundled revisions that we will need
239 prefetchfiles = []
240 for f, node in queue:
241 revisiondata = revisiondatas[(f, node)]
242 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags)
243 dependents = [revisiondata[1], revisiondata[2], revisiondata[4]]
244
245 for dependent in dependents:
246 if dependent == nullid or (f, dependent) in revisiondatas:
247 continue
248 prefetchfiles.append((f, hex(dependent)))
249
250 repo.fileservice.prefetch(prefetchfiles)
251
252 # Apply the revisions in topological order such that a revision
253 # is only written once it's deltabase and parents have been written.
254 while queue:
255 f, node = queue.pop(0)
256 if (f, node) in processed:
257 continue
258
259 skipcount += 1
260 if skipcount > len(queue) + 1:
261 raise error.Abort(_("circular node dependency"))
262
263 fl = repo.file(f)
264
265 revisiondata = revisiondatas[(f, node)]
266 # revisiondata: (node, p1, p2, cs, deltabase, delta, flags)
267 node, p1, p2, linknode, deltabase, delta, flags = revisiondata
268
269 if not available(f, node, f, deltabase):
270 continue
271
272 base = fl.revision(deltabase, raw=True)
273 text = mdiff.patch(base, delta)
274 if isinstance(text, buffer):
275 text = str(text)
276
277 meta, text = shallowutil.parsemeta(text)
278 if 'copy' in meta:
279 copyfrom = meta['copy']
280 copynode = bin(meta['copyrev'])
281 if not available(f, node, copyfrom, copynode):
282 continue
283
284 for p in [p1, p2]:
285 if p != nullid:
286 if not available(f, node, f, p):
287 continue
288
289 fl.add(text, meta, trp, linknode, p1, p2)
290 processed.add((f, node))
291 skipcount = 0
292
293 repo.ui.progress(_('files'), None)
294
295 return len(revisiondatas), newfiles