Mercurial > hg
comparison hgext/remotefilelog/shallowrepo.py @ 40495:3a333a582d7b
remotefilelog: import pruned-down remotefilelog extension from hg-experimental
This is remotefilelog as of my recent patches for compatibility with
current tip of hg, minus support for old versions of Mercurial and
some FB-specific features like their treemanifest extension and
fetching linkrev data from a patched phabricator. The file extutil.py
moved from hgext3rd to remotefilelog.
This is not yet ready to be landed, consider it a preview for
now. Planned changes include:
* replace lz4 with zstd
* rename some capabilities, requirements and wireproto commands to mark
them as experimental
* consolidate bits of shallowutil with related functions (eg readfile)
I'm certainly open to other (small) changes, but my rough mission is
to land this largely as-is so we can use it as a model of the
functionality we need going forward for lazy-fetching of file contents
from a server.
# no-check-commit because of a few foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D4782
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 27 Sep 2018 13:03:19 -0400 |
parents | |
children | 60eb35b0c11c |
comparison
equal
deleted
inserted
replaced
40494:9aeb9e2d28a7 | 40495:3a333a582d7b |
---|---|
1 # shallowrepo.py - shallow repository that uses remote filelogs | |
2 # | |
3 # Copyright 2013 Facebook, Inc. | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2 or any later version. | |
7 from __future__ import absolute_import | |
8 | |
9 import os | |
10 | |
11 from mercurial.i18n import _ | |
12 from mercurial.node import hex, nullid, nullrev | |
13 from mercurial import ( | |
14 encoding, | |
15 error, | |
16 localrepo, | |
17 match, | |
18 scmutil, | |
19 sparse, | |
20 util, | |
21 ) | |
22 from mercurial.utils import procutil | |
23 from . import ( | |
24 connectionpool, | |
25 constants, | |
26 contentstore, | |
27 datapack, | |
28 extutil, | |
29 fileserverclient, | |
30 historypack, | |
31 metadatastore, | |
32 remotefilectx, | |
33 remotefilelog, | |
34 shallowutil, | |
35 ) | |
36 | |
37 if util.safehasattr(util, '_hgexecutable'): | |
38 # Before 5be286db | |
39 _hgexecutable = util.hgexecutable | |
40 else: | |
41 from mercurial.utils import procutil | |
42 _hgexecutable = procutil.hgexecutable | |
43 | |
44 requirement = "remotefilelog" | |
45 _prefetching = _('prefetching') | |
46 | |
47 # These make*stores functions are global so that other extensions can replace | |
48 # them. | |
49 def makelocalstores(repo): | |
50 """In-repo stores, like .hg/store/data; can not be discarded.""" | |
51 localpath = os.path.join(repo.svfs.vfs.base, 'data') | |
52 if not os.path.exists(localpath): | |
53 os.makedirs(localpath) | |
54 | |
55 # Instantiate local data stores | |
56 localcontent = contentstore.remotefilelogcontentstore( | |
57 repo, localpath, repo.name, shared=False) | |
58 localmetadata = metadatastore.remotefilelogmetadatastore( | |
59 repo, localpath, repo.name, shared=False) | |
60 return localcontent, localmetadata | |
61 | |
62 def makecachestores(repo): | |
63 """Typically machine-wide, cache of remote data; can be discarded.""" | |
64 # Instantiate shared cache stores | |
65 cachepath = shallowutil.getcachepath(repo.ui) | |
66 cachecontent = contentstore.remotefilelogcontentstore( | |
67 repo, cachepath, repo.name, shared=True) | |
68 cachemetadata = metadatastore.remotefilelogmetadatastore( | |
69 repo, cachepath, repo.name, shared=True) | |
70 | |
71 repo.sharedstore = cachecontent | |
72 repo.shareddatastores.append(cachecontent) | |
73 repo.sharedhistorystores.append(cachemetadata) | |
74 | |
75 return cachecontent, cachemetadata | |
76 | |
77 def makeremotestores(repo, cachecontent, cachemetadata): | |
78 """These stores fetch data from a remote server.""" | |
79 # Instantiate remote stores | |
80 repo.fileservice = fileserverclient.fileserverclient(repo) | |
81 remotecontent = contentstore.remotecontentstore( | |
82 repo.ui, repo.fileservice, cachecontent) | |
83 remotemetadata = metadatastore.remotemetadatastore( | |
84 repo.ui, repo.fileservice, cachemetadata) | |
85 return remotecontent, remotemetadata | |
86 | |
87 def makepackstores(repo): | |
88 """Packs are more efficient (to read from) cache stores.""" | |
89 # Instantiate pack stores | |
90 packpath = shallowutil.getcachepackpath(repo, | |
91 constants.FILEPACK_CATEGORY) | |
92 packcontentstore = datapack.datapackstore(repo.ui, packpath) | |
93 packmetadatastore = historypack.historypackstore(repo.ui, packpath) | |
94 | |
95 repo.shareddatastores.append(packcontentstore) | |
96 repo.sharedhistorystores.append(packmetadatastore) | |
97 shallowutil.reportpackmetrics(repo.ui, 'filestore', packcontentstore, | |
98 packmetadatastore) | |
99 return packcontentstore, packmetadatastore | |
100 | |
101 def makeunionstores(repo): | |
102 """Union stores iterate the other stores and return the first result.""" | |
103 repo.shareddatastores = [] | |
104 repo.sharedhistorystores = [] | |
105 | |
106 packcontentstore, packmetadatastore = makepackstores(repo) | |
107 cachecontent, cachemetadata = makecachestores(repo) | |
108 localcontent, localmetadata = makelocalstores(repo) | |
109 remotecontent, remotemetadata = makeremotestores(repo, cachecontent, | |
110 cachemetadata) | |
111 | |
112 # Instantiate union stores | |
113 repo.contentstore = contentstore.unioncontentstore( | |
114 packcontentstore, cachecontent, | |
115 localcontent, remotecontent, writestore=localcontent) | |
116 repo.metadatastore = metadatastore.unionmetadatastore( | |
117 packmetadatastore, cachemetadata, localmetadata, remotemetadata, | |
118 writestore=localmetadata) | |
119 | |
120 fileservicedatawrite = cachecontent | |
121 fileservicehistorywrite = cachemetadata | |
122 if repo.ui.configbool('remotefilelog', 'fetchpacks'): | |
123 fileservicedatawrite = packcontentstore | |
124 fileservicehistorywrite = packmetadatastore | |
125 repo.fileservice.setstore(repo.contentstore, repo.metadatastore, | |
126 fileservicedatawrite, fileservicehistorywrite) | |
127 shallowutil.reportpackmetrics(repo.ui, 'filestore', | |
128 packcontentstore, packmetadatastore) | |
129 | |
130 def wraprepo(repo): | |
131 class shallowrepository(repo.__class__): | |
132 @util.propertycache | |
133 def name(self): | |
134 return self.ui.config('remotefilelog', 'reponame') | |
135 | |
136 @util.propertycache | |
137 def fallbackpath(self): | |
138 path = repo.ui.config("remotefilelog", "fallbackpath", | |
139 repo.ui.config('paths', 'default')) | |
140 if not path: | |
141 raise error.Abort("no remotefilelog server " | |
142 "configured - is your .hg/hgrc trusted?") | |
143 | |
144 return path | |
145 | |
146 def maybesparsematch(self, *revs, **kwargs): | |
147 ''' | |
148 A wrapper that allows the remotefilelog to invoke sparsematch() if | |
149 this is a sparse repository, or returns None if this is not a | |
150 sparse repository. | |
151 ''' | |
152 if revs: | |
153 return sparse.matcher(repo, revs=revs) | |
154 return sparse.matcher(repo) | |
155 | |
156 def file(self, f): | |
157 if f[0] == '/': | |
158 f = f[1:] | |
159 | |
160 if self.shallowmatch(f): | |
161 return remotefilelog.remotefilelog(self.svfs, f, self) | |
162 else: | |
163 return super(shallowrepository, self).file(f) | |
164 | |
165 def filectx(self, path, *args, **kwargs): | |
166 if self.shallowmatch(path): | |
167 return remotefilectx.remotefilectx(self, path, *args, **kwargs) | |
168 else: | |
169 return super(shallowrepository, self).filectx(path, *args, | |
170 **kwargs) | |
171 | |
172 @localrepo.unfilteredmethod | |
173 def commitctx(self, ctx, error=False): | |
174 """Add a new revision to current repository. | |
175 Revision information is passed via the context argument. | |
176 """ | |
177 | |
178 # some contexts already have manifest nodes, they don't need any | |
179 # prefetching (for example if we're just editing a commit message | |
180 # we can reuse manifest | |
181 if not ctx.manifestnode(): | |
182 # prefetch files that will likely be compared | |
183 m1 = ctx.p1().manifest() | |
184 files = [] | |
185 for f in ctx.modified() + ctx.added(): | |
186 fparent1 = m1.get(f, nullid) | |
187 if fparent1 != nullid: | |
188 files.append((f, hex(fparent1))) | |
189 self.fileservice.prefetch(files) | |
190 return super(shallowrepository, self).commitctx(ctx, | |
191 error=error) | |
192 | |
193 def backgroundprefetch(self, revs, base=None, repack=False, pats=None, | |
194 opts=None): | |
195 """Runs prefetch in background with optional repack | |
196 """ | |
197 cmd = [_hgexecutable(), '-R', repo.origroot, 'prefetch'] | |
198 if repack: | |
199 cmd.append('--repack') | |
200 if revs: | |
201 cmd += ['-r', revs] | |
202 cmd = ' '.join(map(procutil.shellquote, cmd)) | |
203 | |
204 extutil.runshellcommand(cmd, encoding.environ) | |
205 | |
206 def prefetch(self, revs, base=None, pats=None, opts=None): | |
207 """Prefetches all the necessary file revisions for the given revs | |
208 Optionally runs repack in background | |
209 """ | |
210 with repo._lock(repo.svfs, 'prefetchlock', True, None, None, | |
211 _('prefetching in %s') % repo.origroot): | |
212 self._prefetch(revs, base, pats, opts) | |
213 | |
214 def _prefetch(self, revs, base=None, pats=None, opts=None): | |
215 fallbackpath = self.fallbackpath | |
216 if fallbackpath: | |
217 # If we know a rev is on the server, we should fetch the server | |
218 # version of those files, since our local file versions might | |
219 # become obsolete if the local commits are stripped. | |
220 localrevs = repo.revs('outgoing(%s)', fallbackpath) | |
221 if base is not None and base != nullrev: | |
222 serverbase = list(repo.revs('first(reverse(::%s) - %ld)', | |
223 base, localrevs)) | |
224 if serverbase: | |
225 base = serverbase[0] | |
226 else: | |
227 localrevs = repo | |
228 | |
229 mfl = repo.manifestlog | |
230 mfrevlog = mfl.getstorage('') | |
231 if base is not None: | |
232 mfdict = mfl[repo[base].manifestnode()].read() | |
233 skip = set(mfdict.iteritems()) | |
234 else: | |
235 skip = set() | |
236 | |
237 # Copy the skip set to start large and avoid constant resizing, | |
238 # and since it's likely to be very similar to the prefetch set. | |
239 files = skip.copy() | |
240 serverfiles = skip.copy() | |
241 visited = set() | |
242 visited.add(nullrev) | |
243 revnum = 0 | |
244 revcount = len(revs) | |
245 self.ui.progress(_prefetching, revnum, total=revcount) | |
246 for rev in sorted(revs): | |
247 ctx = repo[rev] | |
248 if pats: | |
249 m = scmutil.match(ctx, pats, opts) | |
250 sparsematch = repo.maybesparsematch(rev) | |
251 | |
252 mfnode = ctx.manifestnode() | |
253 mfrev = mfrevlog.rev(mfnode) | |
254 | |
255 # Decompressing manifests is expensive. | |
256 # When possible, only read the deltas. | |
257 p1, p2 = mfrevlog.parentrevs(mfrev) | |
258 if p1 in visited and p2 in visited: | |
259 mfdict = mfl[mfnode].readfast() | |
260 else: | |
261 mfdict = mfl[mfnode].read() | |
262 | |
263 diff = mfdict.iteritems() | |
264 if pats: | |
265 diff = (pf for pf in diff if m(pf[0])) | |
266 if sparsematch: | |
267 diff = (pf for pf in diff if sparsematch(pf[0])) | |
268 if rev not in localrevs: | |
269 serverfiles.update(diff) | |
270 else: | |
271 files.update(diff) | |
272 | |
273 visited.add(mfrev) | |
274 revnum += 1 | |
275 self.ui.progress(_prefetching, revnum, total=revcount) | |
276 | |
277 files.difference_update(skip) | |
278 serverfiles.difference_update(skip) | |
279 self.ui.progress(_prefetching, None) | |
280 | |
281 # Fetch files known to be on the server | |
282 if serverfiles: | |
283 results = [(path, hex(fnode)) for (path, fnode) in serverfiles] | |
284 repo.fileservice.prefetch(results, force=True) | |
285 | |
286 # Fetch files that may or may not be on the server | |
287 if files: | |
288 results = [(path, hex(fnode)) for (path, fnode) in files] | |
289 repo.fileservice.prefetch(results) | |
290 | |
291 def close(self): | |
292 super(shallowrepository, self).close() | |
293 self.connectionpool.close() | |
294 | |
295 repo.__class__ = shallowrepository | |
296 | |
297 repo.shallowmatch = match.always(repo.root, '') | |
298 | |
299 makeunionstores(repo) | |
300 | |
301 repo.includepattern = repo.ui.configlist("remotefilelog", "includepattern", | |
302 None) | |
303 repo.excludepattern = repo.ui.configlist("remotefilelog", "excludepattern", | |
304 None) | |
305 if not util.safehasattr(repo, 'connectionpool'): | |
306 repo.connectionpool = connectionpool.connectionpool(repo) | |
307 | |
308 if repo.includepattern or repo.excludepattern: | |
309 repo.shallowmatch = match.match(repo.root, '', None, | |
310 repo.includepattern, repo.excludepattern) |