comparison hgext/largefiles/lfcommands.py @ 15168:cfccd3bee7b3

hgext: add largefiles extension This code has a number of contributors and a complicated history prior to its introduction that can be seen by visiting: https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles http://hg.gerg.ca/hg-bfiles and looking at the included copyright notices and contributors list.
author various
date Sat, 24 Sep 2011 17:35:45 +0200
parents
children c1a4a3220711
comparison
equal deleted inserted replaced
15167:8df4166b6f63 15168:cfccd3bee7b3
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
5 #
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
8
9 '''High-level command functions: lfadd() et. al, plus the cmdtable.'''
10
11 import os
12 import shutil
13
14 from mercurial import util, match as match_, hg, node, context, error
15 from mercurial.i18n import _
16
17 import lfutil
18 import basestore
19
20 # -- Commands ----------------------------------------------------------
21
22 def lfconvert(ui, src, dest, *pats, **opts):
23 '''Convert a normal repository to a largefiles repository
24
25 Convert source repository creating an identical repository, except that all
26 files that match the patterns given, or are over the given size will be
27 added as largefiles. The size used to determine whether or not to track a
28 file as a largefile is the size of the first version of the file. After
29 running this command you will need to make sure that largefiles is enabled
30 anywhere you intend to push the new repository.'''
31
32 if opts['tonormal']:
33 tolfile = False
34 else:
35 tolfile = True
36 size = opts['size']
37 if not size:
38 size = ui.config(lfutil.longname, 'size', default=None)
39 try:
40 size = int(size)
41 except ValueError:
42 raise util.Abort(_('largefiles.size must be integer, was %s\n') % \
43 size)
44 except TypeError:
45 raise util.Abort(_('size must be specified'))
46
47 try:
48 rsrc = hg.repository(ui, src)
49 if not rsrc.local():
50 raise util.Abort(_('%s is not a local Mercurial repo') % src)
51 except error.RepoError, err:
52 ui.traceback()
53 raise util.Abort(err.args[0])
54 if os.path.exists(dest):
55 if not os.path.isdir(dest):
56 raise util.Abort(_('destination %s already exists') % dest)
57 elif os.listdir(dest):
58 raise util.Abort(_('destination %s is not empty') % dest)
59 try:
60 ui.status(_('initializing destination %s\n') % dest)
61 rdst = hg.repository(ui, dest, create=True)
62 if not rdst.local():
63 raise util.Abort(_('%s is not a local Mercurial repo') % dest)
64 except error.RepoError:
65 ui.traceback()
66 raise util.Abort(_('%s is not a repo') % dest)
67
68 try:
69 # Lock destination to prevent modification while it is converted to.
70 # Don't need to lock src because we are just reading from its history
71 # which can't change.
72 dst_lock = rdst.lock()
73
74 # Get a list of all changesets in the source. The easy way to do this
75 # is to simply walk the changelog, using changelog.nodesbewteen().
76 # Take a look at mercurial/revlog.py:639 for more details.
77 # Use a generator instead of a list to decrease memory usage
78 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
79 rsrc.heads())[0])
80 revmap = {node.nullid: node.nullid}
81 if tolfile:
82 lfiles = set()
83 normalfiles = set()
84 if not pats:
85 pats = ui.config(lfutil.longname, 'patterns', default=())
86 if pats:
87 pats = pats.split(' ')
88 if pats:
89 matcher = match_.match(rsrc.root, '', list(pats))
90 else:
91 matcher = None
92
93 lfiletohash = {}
94 for ctx in ctxs:
95 ui.progress(_('converting revisions'), ctx.rev(),
96 unit=_('revision'), total=rsrc['tip'].rev())
97 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
98 lfiles, normalfiles, matcher, size, lfiletohash)
99 ui.progress(_('converting revisions'), None)
100
101 if os.path.exists(rdst.wjoin(lfutil.shortname)):
102 shutil.rmtree(rdst.wjoin(lfutil.shortname))
103
104 for f in lfiletohash.keys():
105 if os.path.isfile(rdst.wjoin(f)):
106 os.unlink(rdst.wjoin(f))
107 try:
108 os.removedirs(os.path.dirname(rdst.wjoin(f)))
109 except:
110 pass
111
112 else:
113 for ctx in ctxs:
114 ui.progress(_('converting revisions'), ctx.rev(),
115 unit=_('revision'), total=rsrc['tip'].rev())
116 _addchangeset(ui, rsrc, rdst, ctx, revmap)
117
118 ui.progress(_('converting revisions'), None)
119 except:
120 # we failed, remove the new directory
121 shutil.rmtree(rdst.root)
122 raise
123 finally:
124 dst_lock.release()
125
126 def _addchangeset(ui, rsrc, rdst, ctx, revmap):
127 # Convert src parents to dst parents
128 parents = []
129 for p in ctx.parents():
130 parents.append(revmap[p.node()])
131 while len(parents) < 2:
132 parents.append(node.nullid)
133
134 # Generate list of changed files
135 files = set(ctx.files())
136 if node.nullid not in parents:
137 mc = ctx.manifest()
138 mp1 = ctx.parents()[0].manifest()
139 mp2 = ctx.parents()[1].manifest()
140 files |= (set(mp1) | set(mp2)) - set(mc)
141 for f in mc:
142 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
143 files.add(f)
144
145 def getfilectx(repo, memctx, f):
146 if lfutil.standin(f) in files:
147 # if the file isn't in the manifest then it was removed
148 # or renamed, raise IOError to indicate this
149 try:
150 fctx = ctx.filectx(lfutil.standin(f))
151 except error.LookupError:
152 raise IOError()
153 renamed = fctx.renamed()
154 if renamed:
155 renamed = lfutil.splitstandin(renamed[0])
156
157 hash = fctx.data().strip()
158 path = lfutil.findfile(rsrc, hash)
159 ### TODO: What if the file is not cached?
160 data = ''
161 fd = None
162 try:
163 fd = open(path, 'rb')
164 data = fd.read()
165 finally:
166 if fd: fd.close()
167 return context.memfilectx(f, data, 'l' in fctx.flags(),
168 'x' in fctx.flags(), renamed)
169 else:
170 try:
171 fctx = ctx.filectx(f)
172 except error.LookupError:
173 raise IOError()
174 renamed = fctx.renamed()
175 if renamed:
176 renamed = renamed[0]
177 data = fctx.data()
178 if f == '.hgtags':
179 newdata = []
180 for line in data.splitlines():
181 id, name = line.split(' ', 1)
182 newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
183 name))
184 data = ''.join(newdata)
185 return context.memfilectx(f, data, 'l' in fctx.flags(),
186 'x' in fctx.flags(), renamed)
187
188 dstfiles = []
189 for file in files:
190 if lfutil.isstandin(file):
191 dstfiles.append(lfutil.splitstandin(file))
192 else:
193 dstfiles.append(file)
194 # Commit
195 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
196 getfilectx, ctx.user(), ctx.date(), ctx.extra())
197 ret = rdst.commitctx(mctx)
198 rdst.dirstate.setparents(ret)
199 revmap[ctx.node()] = rdst.changelog.tip()
200
201 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
202 matcher, size, lfiletohash):
203 # Convert src parents to dst parents
204 parents = []
205 for p in ctx.parents():
206 parents.append(revmap[p.node()])
207 while len(parents) < 2:
208 parents.append(node.nullid)
209
210 # Generate list of changed files
211 files = set(ctx.files())
212 if node.nullid not in parents:
213 mc = ctx.manifest()
214 mp1 = ctx.parents()[0].manifest()
215 mp2 = ctx.parents()[1].manifest()
216 files |= (set(mp1) | set(mp2)) - set(mc)
217 for f in mc:
218 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
219 files.add(f)
220
221 dstfiles = []
222 for f in files:
223 if f not in lfiles and f not in normalfiles:
224 islfile = _islfile(f, ctx, matcher, size)
225 # If this file was renamed or copied then copy
226 # the lfileness of its predecessor
227 if f in ctx.manifest():
228 fctx = ctx.filectx(f)
229 renamed = fctx.renamed()
230 renamedlfile = renamed and renamed[0] in lfiles
231 islfile |= renamedlfile
232 if 'l' in fctx.flags():
233 if renamedlfile:
234 raise util.Abort(
235 _('Renamed/copied largefile %s becomes symlink') % f)
236 islfile = False
237 if islfile:
238 lfiles.add(f)
239 else:
240 normalfiles.add(f)
241
242 if f in lfiles:
243 dstfiles.append(lfutil.standin(f))
244 # lfile in manifest if it has not been removed/renamed
245 if f in ctx.manifest():
246 if 'l' in ctx.filectx(f).flags():
247 if renamed and renamed[0] in lfiles:
248 raise util.Abort(_('largefile %s becomes symlink') % f)
249
250 # lfile was modified, update standins
251 fullpath = rdst.wjoin(f)
252 lfutil.createdir(os.path.dirname(fullpath))
253 m = util.sha1('')
254 m.update(ctx[f].data())
255 hash = m.hexdigest()
256 if f not in lfiletohash or lfiletohash[f] != hash:
257 try:
258 fd = open(fullpath, 'wb')
259 fd.write(ctx[f].data())
260 finally:
261 if fd:
262 fd.close()
263 executable = 'x' in ctx[f].flags()
264 os.chmod(fullpath, lfutil.getmode(executable))
265 lfutil.writestandin(rdst, lfutil.standin(f), hash,
266 executable)
267 lfiletohash[f] = hash
268 else:
269 # normal file
270 dstfiles.append(f)
271
272 def getfilectx(repo, memctx, f):
273 if lfutil.isstandin(f):
274 # if the file isn't in the manifest then it was removed
275 # or renamed, raise IOError to indicate this
276 srcfname = lfutil.splitstandin(f)
277 try:
278 fctx = ctx.filectx(srcfname)
279 except error.LookupError:
280 raise IOError()
281 renamed = fctx.renamed()
282 if renamed:
283 # standin is always a lfile because lfileness
284 # doesn't change after rename or copy
285 renamed = lfutil.standin(renamed[0])
286
287 return context.memfilectx(f, lfiletohash[srcfname], 'l' in
288 fctx.flags(), 'x' in fctx.flags(), renamed)
289 else:
290 try:
291 fctx = ctx.filectx(f)
292 except error.LookupError:
293 raise IOError()
294 renamed = fctx.renamed()
295 if renamed:
296 renamed = renamed[0]
297
298 data = fctx.data()
299 if f == '.hgtags':
300 newdata = []
301 for line in data.splitlines():
302 id, name = line.split(' ', 1)
303 newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
304 name))
305 data = ''.join(newdata)
306 return context.memfilectx(f, data, 'l' in fctx.flags(),
307 'x' in fctx.flags(), renamed)
308
309 # Commit
310 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
311 getfilectx, ctx.user(), ctx.date(), ctx.extra())
312 ret = rdst.commitctx(mctx)
313 rdst.dirstate.setparents(ret)
314 revmap[ctx.node()] = rdst.changelog.tip()
315
316 def _islfile(file, ctx, matcher, size):
317 '''
318 A file is a lfile if it matches a pattern or is over
319 the given size.
320 '''
321 # Never store hgtags or hgignore as lfiles
322 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
323 return False
324 if matcher and matcher(file):
325 return True
326 try:
327 return ctx.filectx(file).size() >= size * 1024 * 1024
328 except error.LookupError:
329 return False
330
331 def uploadlfiles(ui, rsrc, rdst, files):
332 '''upload largefiles to the central store'''
333
334 # Don't upload locally. All largefiles are in the system wide cache
335 # so the other repo can just get them from there.
336 if not files or rdst.local():
337 return
338
339 store = basestore._openstore(rsrc, rdst, put=True)
340
341 at = 0
342 files = filter(lambda h: not store.exists(h), files)
343 for hash in files:
344 ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files))
345 source = lfutil.findfile(rsrc, hash)
346 if not source:
347 raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash)
348 # XXX check for errors here
349 store.put(source, hash)
350 at += 1
351 ui.progress('uploading largefiles', None)
352
353 def verifylfiles(ui, repo, all=False, contents=False):
354 '''Verify that every big file revision in the current changeset
355 exists in the central store. With --contents, also verify that
356 the contents of each big file revision are correct (SHA-1 hash
357 matches the revision ID). With --all, check every changeset in
358 this repository.'''
359 if all:
360 # Pass a list to the function rather than an iterator because we know a
361 # list will work.
362 revs = range(len(repo))
363 else:
364 revs = ['.']
365
366 store = basestore._openstore(repo)
367 return store.verify(revs, contents=contents)
368
369 def cachelfiles(ui, repo, node):
370 '''cachelfiles ensures that all largefiles needed by the specified revision
371 are present in the repository's largefile cache.
372
373 returns a tuple (cached, missing). cached is the list of files downloaded
374 by this operation; missing is the list of files that were needed but could
375 not be found.'''
376 lfiles = lfutil.listlfiles(repo, node)
377 toget = []
378
379 for lfile in lfiles:
380 expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
381 # if it exists and its hash matches, it might have been locally
382 # modified before updating and the user chose 'local'. in this case,
383 # it will not be in any store, so don't look for it.
384 if (not os.path.exists(repo.wjoin(lfile)) \
385 or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \
386 not lfutil.findfile(repo, expectedhash):
387 toget.append((lfile, expectedhash))
388
389 if toget:
390 store = basestore._openstore(repo)
391 ret = store.get(toget)
392 return ret
393
394 return ([], [])
395
396 def updatelfiles(ui, repo, filelist=None, printmessage=True):
397 wlock = repo.wlock()
398 try:
399 lfdirstate = lfutil.openlfdirstate(ui, repo)
400 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
401
402 if filelist is not None:
403 lfiles = [f for f in lfiles if f in filelist]
404
405 printed = False
406 if printmessage and lfiles:
407 ui.status(_('getting changed largefiles\n'))
408 printed = True
409 cachelfiles(ui, repo, '.')
410
411 updated, removed = 0, 0
412 for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
413 # increment the appropriate counter according to _updatelfile's
414 # return value
415 updated += i > 0 and i or 0
416 removed -= i < 0 and i or 0
417 if printmessage and (removed or updated) and not printed:
418 ui.status(_('getting changed largefiles\n'))
419 printed = True
420
421 lfdirstate.write()
422 if printed and printmessage:
423 ui.status(_('%d largefiles updated, %d removed\n') % (updated,
424 removed))
425 finally:
426 wlock.release()
427
428 def _updatelfile(repo, lfdirstate, lfile):
429 '''updates a single largefile and copies the state of its standin from
430 the repository's dirstate to its state in the lfdirstate.
431
432 returns 1 if the file was modified, -1 if the file was removed, 0 if the
433 file was unchanged, and None if the needed largefile was missing from the
434 cache.'''
435 ret = 0
436 abslfile = repo.wjoin(lfile)
437 absstandin = repo.wjoin(lfutil.standin(lfile))
438 if os.path.exists(absstandin):
439 if os.path.exists(absstandin+'.orig'):
440 shutil.copyfile(abslfile, abslfile+'.orig')
441 expecthash = lfutil.readstandin(repo, lfile)
442 if expecthash != '' and \
443 (not os.path.exists(abslfile) or \
444 expecthash != lfutil.hashfile(abslfile)):
445 if not lfutil.copyfromcache(repo, expecthash, lfile):
446 return None # don't try to set the mode or update the dirstate
447 ret = 1
448 mode = os.stat(absstandin).st_mode
449 if mode != os.stat(abslfile).st_mode:
450 os.chmod(abslfile, mode)
451 ret = 1
452 else:
453 if os.path.exists(abslfile):
454 os.unlink(abslfile)
455 ret = -1
456 state = repo.dirstate[lfutil.standin(lfile)]
457 if state == 'n':
458 lfdirstate.normal(lfile)
459 elif state == 'r':
460 lfdirstate.remove(lfile)
461 elif state == 'a':
462 lfdirstate.add(lfile)
463 elif state == '?':
464 try:
465 # Mercurial >= 1.9
466 lfdirstate.drop(lfile)
467 except AttributeError:
468 # Mercurial <= 1.8
469 lfdirstate.forget(lfile)
470 return ret
471
472 # -- hg commands declarations ------------------------------------------------
473
474
475 cmdtable = {
476 'lfconvert': (lfconvert,
477 [('s', 'size', 0, 'All files over this size (in megabytes) '
478 'will be considered largefiles. This can also be specified in '
479 'your hgrc as [largefiles].size.'),
480 ('','tonormal',False,
481 'Convert from a largefiles repo to a normal repo')],
482 _('hg lfconvert SOURCE DEST [FILE ...]')),
483 }