comparison hgext/remotefilelog/shallowrepo.py @ 40495:3a333a582d7b

remotefilelog: import pruned-down remotefilelog extension from hg-experimental This is remotefilelog as of my recent patches for compatibility with current tip of hg, minus support for old versions of Mercurial and some FB-specific features like their treemanifest extension and fetching linkrev data from a patched phabricator. The file extutil.py moved from hgext3rd to remotefilelog. This is not yet ready to be landed, consider it a preview for now. Planned changes include: * replace lz4 with zstd * rename some capabilities, requirements and wireproto commands to mark them as experimental * consolidate bits of shallowutil with related functions (eg readfile) I'm certainly open to other (small) changes, but my rough mission is to land this largely as-is so we can use it as a model of the functionality we need going forward for lazy-fetching of file contents from a server. # no-check-commit because of a few foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D4782
author Augie Fackler <augie@google.com>
date Thu, 27 Sep 2018 13:03:19 -0400
parents
children 60eb35b0c11c
comparison
equal deleted inserted replaced
40494:9aeb9e2d28a7 40495:3a333a582d7b
1 # shallowrepo.py - shallow repository that uses remote filelogs
2 #
3 # Copyright 2013 Facebook, Inc.
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
8
9 import os
10
11 from mercurial.i18n import _
12 from mercurial.node import hex, nullid, nullrev
13 from mercurial import (
14 encoding,
15 error,
16 localrepo,
17 match,
18 scmutil,
19 sparse,
20 util,
21 )
22 from mercurial.utils import procutil
23 from . import (
24 connectionpool,
25 constants,
26 contentstore,
27 datapack,
28 extutil,
29 fileserverclient,
30 historypack,
31 metadatastore,
32 remotefilectx,
33 remotefilelog,
34 shallowutil,
35 )
36
37 if util.safehasattr(util, '_hgexecutable'):
38 # Before 5be286db
39 _hgexecutable = util.hgexecutable
40 else:
41 from mercurial.utils import procutil
42 _hgexecutable = procutil.hgexecutable
43
44 requirement = "remotefilelog"
45 _prefetching = _('prefetching')
46
47 # These make*stores functions are global so that other extensions can replace
48 # them.
49 def makelocalstores(repo):
50 """In-repo stores, like .hg/store/data; can not be discarded."""
51 localpath = os.path.join(repo.svfs.vfs.base, 'data')
52 if not os.path.exists(localpath):
53 os.makedirs(localpath)
54
55 # Instantiate local data stores
56 localcontent = contentstore.remotefilelogcontentstore(
57 repo, localpath, repo.name, shared=False)
58 localmetadata = metadatastore.remotefilelogmetadatastore(
59 repo, localpath, repo.name, shared=False)
60 return localcontent, localmetadata
61
62 def makecachestores(repo):
63 """Typically machine-wide, cache of remote data; can be discarded."""
64 # Instantiate shared cache stores
65 cachepath = shallowutil.getcachepath(repo.ui)
66 cachecontent = contentstore.remotefilelogcontentstore(
67 repo, cachepath, repo.name, shared=True)
68 cachemetadata = metadatastore.remotefilelogmetadatastore(
69 repo, cachepath, repo.name, shared=True)
70
71 repo.sharedstore = cachecontent
72 repo.shareddatastores.append(cachecontent)
73 repo.sharedhistorystores.append(cachemetadata)
74
75 return cachecontent, cachemetadata
76
77 def makeremotestores(repo, cachecontent, cachemetadata):
78 """These stores fetch data from a remote server."""
79 # Instantiate remote stores
80 repo.fileservice = fileserverclient.fileserverclient(repo)
81 remotecontent = contentstore.remotecontentstore(
82 repo.ui, repo.fileservice, cachecontent)
83 remotemetadata = metadatastore.remotemetadatastore(
84 repo.ui, repo.fileservice, cachemetadata)
85 return remotecontent, remotemetadata
86
87 def makepackstores(repo):
88 """Packs are more efficient (to read from) cache stores."""
89 # Instantiate pack stores
90 packpath = shallowutil.getcachepackpath(repo,
91 constants.FILEPACK_CATEGORY)
92 packcontentstore = datapack.datapackstore(repo.ui, packpath)
93 packmetadatastore = historypack.historypackstore(repo.ui, packpath)
94
95 repo.shareddatastores.append(packcontentstore)
96 repo.sharedhistorystores.append(packmetadatastore)
97 shallowutil.reportpackmetrics(repo.ui, 'filestore', packcontentstore,
98 packmetadatastore)
99 return packcontentstore, packmetadatastore
100
101 def makeunionstores(repo):
102 """Union stores iterate the other stores and return the first result."""
103 repo.shareddatastores = []
104 repo.sharedhistorystores = []
105
106 packcontentstore, packmetadatastore = makepackstores(repo)
107 cachecontent, cachemetadata = makecachestores(repo)
108 localcontent, localmetadata = makelocalstores(repo)
109 remotecontent, remotemetadata = makeremotestores(repo, cachecontent,
110 cachemetadata)
111
112 # Instantiate union stores
113 repo.contentstore = contentstore.unioncontentstore(
114 packcontentstore, cachecontent,
115 localcontent, remotecontent, writestore=localcontent)
116 repo.metadatastore = metadatastore.unionmetadatastore(
117 packmetadatastore, cachemetadata, localmetadata, remotemetadata,
118 writestore=localmetadata)
119
120 fileservicedatawrite = cachecontent
121 fileservicehistorywrite = cachemetadata
122 if repo.ui.configbool('remotefilelog', 'fetchpacks'):
123 fileservicedatawrite = packcontentstore
124 fileservicehistorywrite = packmetadatastore
125 repo.fileservice.setstore(repo.contentstore, repo.metadatastore,
126 fileservicedatawrite, fileservicehistorywrite)
127 shallowutil.reportpackmetrics(repo.ui, 'filestore',
128 packcontentstore, packmetadatastore)
129
130 def wraprepo(repo):
131 class shallowrepository(repo.__class__):
132 @util.propertycache
133 def name(self):
134 return self.ui.config('remotefilelog', 'reponame')
135
136 @util.propertycache
137 def fallbackpath(self):
138 path = repo.ui.config("remotefilelog", "fallbackpath",
139 repo.ui.config('paths', 'default'))
140 if not path:
141 raise error.Abort("no remotefilelog server "
142 "configured - is your .hg/hgrc trusted?")
143
144 return path
145
146 def maybesparsematch(self, *revs, **kwargs):
147 '''
148 A wrapper that allows the remotefilelog to invoke sparsematch() if
149 this is a sparse repository, or returns None if this is not a
150 sparse repository.
151 '''
152 if revs:
153 return sparse.matcher(repo, revs=revs)
154 return sparse.matcher(repo)
155
156 def file(self, f):
157 if f[0] == '/':
158 f = f[1:]
159
160 if self.shallowmatch(f):
161 return remotefilelog.remotefilelog(self.svfs, f, self)
162 else:
163 return super(shallowrepository, self).file(f)
164
165 def filectx(self, path, *args, **kwargs):
166 if self.shallowmatch(path):
167 return remotefilectx.remotefilectx(self, path, *args, **kwargs)
168 else:
169 return super(shallowrepository, self).filectx(path, *args,
170 **kwargs)
171
172 @localrepo.unfilteredmethod
173 def commitctx(self, ctx, error=False):
174 """Add a new revision to current repository.
175 Revision information is passed via the context argument.
176 """
177
178 # some contexts already have manifest nodes, they don't need any
179 # prefetching (for example if we're just editing a commit message
180 # we can reuse manifest
181 if not ctx.manifestnode():
182 # prefetch files that will likely be compared
183 m1 = ctx.p1().manifest()
184 files = []
185 for f in ctx.modified() + ctx.added():
186 fparent1 = m1.get(f, nullid)
187 if fparent1 != nullid:
188 files.append((f, hex(fparent1)))
189 self.fileservice.prefetch(files)
190 return super(shallowrepository, self).commitctx(ctx,
191 error=error)
192
193 def backgroundprefetch(self, revs, base=None, repack=False, pats=None,
194 opts=None):
195 """Runs prefetch in background with optional repack
196 """
197 cmd = [_hgexecutable(), '-R', repo.origroot, 'prefetch']
198 if repack:
199 cmd.append('--repack')
200 if revs:
201 cmd += ['-r', revs]
202 cmd = ' '.join(map(procutil.shellquote, cmd))
203
204 extutil.runshellcommand(cmd, encoding.environ)
205
206 def prefetch(self, revs, base=None, pats=None, opts=None):
207 """Prefetches all the necessary file revisions for the given revs
208 Optionally runs repack in background
209 """
210 with repo._lock(repo.svfs, 'prefetchlock', True, None, None,
211 _('prefetching in %s') % repo.origroot):
212 self._prefetch(revs, base, pats, opts)
213
214 def _prefetch(self, revs, base=None, pats=None, opts=None):
215 fallbackpath = self.fallbackpath
216 if fallbackpath:
217 # If we know a rev is on the server, we should fetch the server
218 # version of those files, since our local file versions might
219 # become obsolete if the local commits are stripped.
220 localrevs = repo.revs('outgoing(%s)', fallbackpath)
221 if base is not None and base != nullrev:
222 serverbase = list(repo.revs('first(reverse(::%s) - %ld)',
223 base, localrevs))
224 if serverbase:
225 base = serverbase[0]
226 else:
227 localrevs = repo
228
229 mfl = repo.manifestlog
230 mfrevlog = mfl.getstorage('')
231 if base is not None:
232 mfdict = mfl[repo[base].manifestnode()].read()
233 skip = set(mfdict.iteritems())
234 else:
235 skip = set()
236
237 # Copy the skip set to start large and avoid constant resizing,
238 # and since it's likely to be very similar to the prefetch set.
239 files = skip.copy()
240 serverfiles = skip.copy()
241 visited = set()
242 visited.add(nullrev)
243 revnum = 0
244 revcount = len(revs)
245 self.ui.progress(_prefetching, revnum, total=revcount)
246 for rev in sorted(revs):
247 ctx = repo[rev]
248 if pats:
249 m = scmutil.match(ctx, pats, opts)
250 sparsematch = repo.maybesparsematch(rev)
251
252 mfnode = ctx.manifestnode()
253 mfrev = mfrevlog.rev(mfnode)
254
255 # Decompressing manifests is expensive.
256 # When possible, only read the deltas.
257 p1, p2 = mfrevlog.parentrevs(mfrev)
258 if p1 in visited and p2 in visited:
259 mfdict = mfl[mfnode].readfast()
260 else:
261 mfdict = mfl[mfnode].read()
262
263 diff = mfdict.iteritems()
264 if pats:
265 diff = (pf for pf in diff if m(pf[0]))
266 if sparsematch:
267 diff = (pf for pf in diff if sparsematch(pf[0]))
268 if rev not in localrevs:
269 serverfiles.update(diff)
270 else:
271 files.update(diff)
272
273 visited.add(mfrev)
274 revnum += 1
275 self.ui.progress(_prefetching, revnum, total=revcount)
276
277 files.difference_update(skip)
278 serverfiles.difference_update(skip)
279 self.ui.progress(_prefetching, None)
280
281 # Fetch files known to be on the server
282 if serverfiles:
283 results = [(path, hex(fnode)) for (path, fnode) in serverfiles]
284 repo.fileservice.prefetch(results, force=True)
285
286 # Fetch files that may or may not be on the server
287 if files:
288 results = [(path, hex(fnode)) for (path, fnode) in files]
289 repo.fileservice.prefetch(results)
290
291 def close(self):
292 super(shallowrepository, self).close()
293 self.connectionpool.close()
294
295 repo.__class__ = shallowrepository
296
297 repo.shallowmatch = match.always(repo.root, '')
298
299 makeunionstores(repo)
300
301 repo.includepattern = repo.ui.configlist("remotefilelog", "includepattern",
302 None)
303 repo.excludepattern = repo.ui.configlist("remotefilelog", "excludepattern",
304 None)
305 if not util.safehasattr(repo, 'connectionpool'):
306 repo.connectionpool = connectionpool.connectionpool(repo)
307
308 if repo.includepattern or repo.excludepattern:
309 repo.shallowmatch = match.match(repo.root, '', None,
310 repo.includepattern, repo.excludepattern)