comparison hgext/largefiles/lfutil.py @ 15168:cfccd3bee7b3

hgext: add largefiles extension This code has a number of contributors and a complicated history prior to its introduction that can be seen by visiting: https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles http://hg.gerg.ca/hg-bfiles and looking at the included copyright notices and contributors list.
author various
date Sat, 24 Sep 2011 17:35:45 +0200
parents
children aa262fff87ac
comparison
equal deleted inserted replaced
15167:8df4166b6f63 15168:cfccd3bee7b3
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
5 #
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
8
9 '''largefiles utility code: must not import other modules in this package.'''
10
11 import os
12 import errno
13 import inspect
14 import shutil
15 import stat
16 import hashlib
17
18 from mercurial import cmdutil, dirstate, httpconnection, match as match_, \
19 url as url_, util
20 from mercurial.i18n import _
21
22 try:
23 from mercurial import scmutil
24 except ImportError:
25 pass
26
27 shortname = '.hglf'
28 longname = 'largefiles'
29
30
31 # -- Portability wrappers ----------------------------------------------
32
33 if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]:
34 # for Mercurial >= 1.5
35 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
36 return dirstate.walk(matcher, [], unknown, ignored)
37 else:
38 # for Mercurial <= 1.4
39 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
40 return dirstate.walk(matcher, unknown, ignored)
41
42 def repo_add(repo, list):
43 try:
44 # Mercurial <= 1.5
45 add = repo.add
46 except AttributeError:
47 # Mercurial >= 1.6
48 add = repo[None].add
49 return add(list)
50
51 def repo_remove(repo, list, unlink=False):
52 try:
53 # Mercurial <= 1.5
54 remove = repo.remove
55 except AttributeError:
56 # Mercurial >= 1.6
57 try:
58 # Mercurial <= 1.8
59 remove = repo[None].remove
60 except AttributeError:
61 # Mercurial >= 1.9
62 def remove(list, unlink):
63 wlock = repo.wlock()
64 try:
65 if unlink:
66 for f in list:
67 try:
68 util.unlinkpath(repo.wjoin(f))
69 except OSError, inst:
70 if inst.errno != errno.ENOENT:
71 raise
72 repo[None].forget(list)
73 finally:
74 wlock.release()
75
76 return remove(list, unlink=unlink)
77
78 def repo_forget(repo, list):
79 try:
80 # Mercurial <= 1.5
81 forget = repo.forget
82 except AttributeError:
83 # Mercurial >= 1.6
84 forget = repo[None].forget
85 return forget(list)
86
87 def findoutgoing(repo, remote, force):
88 # First attempt is for Mercurial <= 1.5 second is for >= 1.6
89 try:
90 return repo.findoutgoing(remote)
91 except AttributeError:
92 from mercurial import discovery
93 try:
94 # Mercurial <= 1.8
95 return discovery.findoutgoing(repo, remote, force=force)
96 except AttributeError:
97 # Mercurial >= 1.9
98 common, _anyinc, _heads = discovery.findcommonincoming(repo,
99 remote, force=force)
100 return repo.changelog.findmissing(common)
101
102 # -- Private worker functions ------------------------------------------
103
104 if os.name == 'nt':
105 from mercurial import win32
106 try:
107 linkfn = win32.oslink
108 except:
109 linkfn = win32.os_link
110 else:
111 linkfn = os.link
112
113 def link(src, dest):
114 try:
115 linkfn(src, dest)
116 except OSError:
117 # If hardlinks fail fall back on copy
118 shutil.copyfile(src, dest)
119 os.chmod(dest, os.stat(src).st_mode)
120
121 def systemcachepath(ui, hash):
122 path = ui.config(longname, 'systemcache', None)
123 if path:
124 path = os.path.join(path, hash)
125 else:
126 if os.name == 'nt':
127 path = os.path.join(os.getenv('LOCALAPPDATA') or \
128 os.getenv('APPDATA'), longname, hash)
129 elif os.name == 'posix':
130 path = os.path.join(os.getenv('HOME'), '.' + longname, hash)
131 else:
132 raise util.Abort(_('Unknown operating system: %s\n') % os.name)
133 return path
134
135 def insystemcache(ui, hash):
136 return os.path.exists(systemcachepath(ui, hash))
137
138 def findfile(repo, hash):
139 if incache(repo, hash):
140 repo.ui.note(_('Found %s in cache\n') % hash)
141 return cachepath(repo, hash)
142 if insystemcache(repo.ui, hash):
143 repo.ui.note(_('Found %s in system cache\n') % hash)
144 return systemcachepath(repo.ui, hash)
145 return None
146
147 class largefiles_dirstate(dirstate.dirstate):
148 def __getitem__(self, key):
149 return super(largefiles_dirstate, self).__getitem__(unixpath(key))
150 def normal(self, f):
151 return super(largefiles_dirstate, self).normal(unixpath(f))
152 def remove(self, f):
153 return super(largefiles_dirstate, self).remove(unixpath(f))
154 def add(self, f):
155 return super(largefiles_dirstate, self).add(unixpath(f))
156 def drop(self, f):
157 return super(largefiles_dirstate, self).drop(unixpath(f))
158 def forget(self, f):
159 return super(largefiles_dirstate, self).forget(unixpath(f))
160
161 def openlfdirstate(ui, repo):
162 '''
163 Return a dirstate object that tracks big files: i.e. its root is the
164 repo root, but it is saved in .hg/largefiles/dirstate.
165 '''
166 admin = repo.join(longname)
167 try:
168 # Mercurial >= 1.9
169 opener = scmutil.opener(admin)
170 except ImportError:
171 # Mercurial <= 1.8
172 opener = util.opener(admin)
173 if hasattr(repo.dirstate, '_validate'):
174 lfdirstate = largefiles_dirstate(opener, ui, repo.root,
175 repo.dirstate._validate)
176 else:
177 lfdirstate = largefiles_dirstate(opener, ui, repo.root)
178
179 # If the largefiles dirstate does not exist, populate and create it. This
180 # ensures that we create it on the first meaningful largefiles operation in
181 # a new clone. It also gives us an easy way to forcibly rebuild largefiles
182 # state:
183 # rm .hg/largefiles/dirstate && hg status
184 # Or even, if things are really messed up:
185 # rm -rf .hg/largefiles && hg status
186 if not os.path.exists(os.path.join(admin, 'dirstate')):
187 util.makedirs(admin)
188 matcher = getstandinmatcher(repo)
189 for standin in dirstate_walk(repo.dirstate, matcher):
190 lfile = splitstandin(standin)
191 hash = readstandin(repo, lfile)
192 lfdirstate.normallookup(lfile)
193 try:
194 if hash == hashfile(lfile):
195 lfdirstate.normal(lfile)
196 except IOError, err:
197 if err.errno != errno.ENOENT:
198 raise
199
200 lfdirstate.write()
201
202 return lfdirstate
203
204 def lfdirstate_status(lfdirstate, repo, rev):
205 wlock = repo.wlock()
206 try:
207 match = match_.always(repo.root, repo.getcwd())
208 s = lfdirstate.status(match, [], False, False, False)
209 unsure, modified, added, removed, missing, unknown, ignored, clean = s
210 for lfile in unsure:
211 if repo[rev][standin(lfile)].data().strip() != \
212 hashfile(repo.wjoin(lfile)):
213 modified.append(lfile)
214 else:
215 clean.append(lfile)
216 lfdirstate.normal(lfile)
217 lfdirstate.write()
218 finally:
219 wlock.release()
220 return (modified, added, removed, missing, unknown, ignored, clean)
221
222 def listlfiles(repo, rev=None, matcher=None):
223 '''list largefiles in the working copy or specified changeset'''
224
225 if matcher is None:
226 matcher = getstandinmatcher(repo)
227
228 # ignore unknown files in working directory
229 return [splitstandin(f) for f in repo[rev].walk(matcher) \
230 if rev is not None or repo.dirstate[f] != '?']
231
232 def incache(repo, hash):
233 return os.path.exists(cachepath(repo, hash))
234
235 def createdir(dir):
236 if not os.path.exists(dir):
237 os.makedirs(dir)
238
239 def cachepath(repo, hash):
240 return repo.join(os.path.join(longname, hash))
241
242 def copyfromcache(repo, hash, filename):
243 '''copyfromcache copies the specified largefile from the repo or system
244 cache to the specified location in the repository. It will not throw an
245 exception on failure, as it is meant to be called only after ensuring that
246 the needed largefile exists in the cache.'''
247 path = findfile(repo, hash)
248 if path is None:
249 return False
250 util.makedirs(os.path.dirname(repo.wjoin(filename)))
251 shutil.copy(path, repo.wjoin(filename))
252 return True
253
254 def copytocache(repo, rev, file, uploaded=False):
255 hash = readstandin(repo, file)
256 if incache(repo, hash):
257 return
258 copytocacheabsolute(repo, repo.wjoin(file), hash)
259
260 def copytocacheabsolute(repo, file, hash):
261 createdir(os.path.dirname(cachepath(repo, hash)))
262 if insystemcache(repo.ui, hash):
263 link(systemcachepath(repo.ui, hash), cachepath(repo, hash))
264 else:
265 shutil.copyfile(file, cachepath(repo, hash))
266 os.chmod(cachepath(repo, hash), os.stat(file).st_mode)
267 linktosystemcache(repo, hash)
268
269 def linktosystemcache(repo, hash):
270 createdir(os.path.dirname(systemcachepath(repo.ui, hash)))
271 link(cachepath(repo, hash), systemcachepath(repo.ui, hash))
272
273 def getstandinmatcher(repo, pats=[], opts={}):
274 '''Return a match object that applies pats to the standin directory'''
275 standindir = repo.pathto(shortname)
276 if pats:
277 # patterns supplied: search standin directory relative to current dir
278 cwd = repo.getcwd()
279 if os.path.isabs(cwd):
280 # cwd is an absolute path for hg -R <reponame>
281 # work relative to the repository root in this case
282 cwd = ''
283 pats = [os.path.join(standindir, cwd, pat) for pat in pats]
284 elif os.path.isdir(standindir):
285 # no patterns: relative to repo root
286 pats = [standindir]
287 else:
288 # no patterns and no standin dir: return matcher that matches nothing
289 match = match_.match(repo.root, None, [], exact=True)
290 match.matchfn = lambda f: False
291 return match
292 return getmatcher(repo, pats, opts, showbad=False)
293
294 def getmatcher(repo, pats=[], opts={}, showbad=True):
295 '''Wrapper around scmutil.match() that adds showbad: if false, neuter
296 the match object\'s bad() method so it does not print any warnings
297 about missing files or directories.'''
298 try:
299 # Mercurial >= 1.9
300 match = scmutil.match(repo[None], pats, opts)
301 except ImportError:
302 # Mercurial <= 1.8
303 match = cmdutil.match(repo, pats, opts)
304
305 if not showbad:
306 match.bad = lambda f, msg: None
307 return match
308
309 def composestandinmatcher(repo, rmatcher):
310 '''Return a matcher that accepts standins corresponding to the files
311 accepted by rmatcher. Pass the list of files in the matcher as the
312 paths specified by the user.'''
313 smatcher = getstandinmatcher(repo, rmatcher.files())
314 isstandin = smatcher.matchfn
315 def composed_matchfn(f):
316 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
317 smatcher.matchfn = composed_matchfn
318
319 return smatcher
320
321 def standin(filename):
322 '''Return the repo-relative path to the standin for the specified big
323 file.'''
324 # Notes:
325 # 1) Most callers want an absolute path, but _create_standin() needs
326 # it repo-relative so lfadd() can pass it to repo_add(). So leave
327 # it up to the caller to use repo.wjoin() to get an absolute path.
328 # 2) Join with '/' because that's what dirstate always uses, even on
329 # Windows. Change existing separator to '/' first in case we are
330 # passed filenames from an external source (like the command line).
331 return shortname + '/' + filename.replace(os.sep, '/')
332
333 def isstandin(filename):
334 '''Return true if filename is a big file standin. filename must
335 be in Mercurial\'s internal form (slash-separated).'''
336 return filename.startswith(shortname + '/')
337
338 def splitstandin(filename):
339 # Split on / because that's what dirstate always uses, even on Windows.
340 # Change local separator to / first just in case we are passed filenames
341 # from an external source (like the command line).
342 bits = filename.replace(os.sep, '/').split('/', 1)
343 if len(bits) == 2 and bits[0] == shortname:
344 return bits[1]
345 else:
346 return None
347
348 def updatestandin(repo, standin):
349 file = repo.wjoin(splitstandin(standin))
350 if os.path.exists(file):
351 hash = hashfile(file)
352 executable = getexecutable(file)
353 writestandin(repo, standin, hash, executable)
354
355 def readstandin(repo, filename, node=None):
356 '''read hex hash from standin for filename at given node, or working
357 directory if no node is given'''
358 return repo[node][standin(filename)].data().strip()
359
360 def writestandin(repo, standin, hash, executable):
361 '''write hhash to <repo.root>/<standin>'''
362 writehash(hash, repo.wjoin(standin), executable)
363
364 def copyandhash(instream, outfile):
365 '''Read bytes from instream (iterable) and write them to outfile,
366 computing the SHA-1 hash of the data along the way. Close outfile
367 when done and return the binary hash.'''
368 hasher = util.sha1('')
369 for data in instream:
370 hasher.update(data)
371 outfile.write(data)
372
373 # Blecch: closing a file that somebody else opened is rude and
374 # wrong. But it's so darn convenient and practical! After all,
375 # outfile was opened just to copy and hash.
376 outfile.close()
377
378 return hasher.digest()
379
380 def hashrepofile(repo, file):
381 return hashfile(repo.wjoin(file))
382
383 def hashfile(file):
384 if not os.path.exists(file):
385 return ''
386 hasher = util.sha1('')
387 fd = open(file, 'rb')
388 for data in blockstream(fd):
389 hasher.update(data)
390 fd.close()
391 return hasher.hexdigest()
392
393 class limitreader(object):
394 def __init__(self, f, limit):
395 self.f = f
396 self.limit = limit
397
398 def read(self, length):
399 if self.limit == 0:
400 return ''
401 length = length > self.limit and self.limit or length
402 self.limit -= length
403 return self.f.read(length)
404
405 def close(self):
406 pass
407
408 def blockstream(infile, blocksize=128 * 1024):
409 """Generator that yields blocks of data from infile and closes infile."""
410 while True:
411 data = infile.read(blocksize)
412 if not data:
413 break
414 yield data
415 # Same blecch as above.
416 infile.close()
417
418 def readhash(filename):
419 rfile = open(filename, 'rb')
420 hash = rfile.read(40)
421 rfile.close()
422 if len(hash) < 40:
423 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
424 % (filename, len(hash)))
425 return hash
426
427 def writehash(hash, filename, executable):
428 util.makedirs(os.path.dirname(filename))
429 if os.path.exists(filename):
430 os.unlink(filename)
431 wfile = open(filename, 'wb')
432
433 try:
434 wfile.write(hash)
435 wfile.write('\n')
436 finally:
437 wfile.close()
438 if os.path.exists(filename):
439 os.chmod(filename, getmode(executable))
440
441 def getexecutable(filename):
442 mode = os.stat(filename).st_mode
443 return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \
444 stat.S_IXOTH)
445
446 def getmode(executable):
447 if executable:
448 return 0755
449 else:
450 return 0644
451
452 def urljoin(first, second, *arg):
453 def join(left, right):
454 if not left.endswith('/'):
455 left += '/'
456 if right.startswith('/'):
457 right = right[1:]
458 return left + right
459
460 url = join(first, second)
461 for a in arg:
462 url = join(url, a)
463 return url
464
465 def hexsha1(data):
466 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
467 object data"""
468 h = hashlib.sha1()
469 for chunk in util.filechunkiter(data):
470 h.update(chunk)
471 return h.hexdigest()
472
473 def httpsendfile(ui, filename):
474 try:
475 # Mercurial >= 1.9
476 return httpconnection.httpsendfile(ui, filename, 'rb')
477 except ImportError:
478 if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]:
479 # Mercurial == 1.8
480 return url_.httpsendfile(ui, filename, 'rb')
481 else:
482 # Mercurial <= 1.7
483 return url_.httpsendfile(filename, 'rb')
484
485 # Convert a path to a unix style path. This is used to give a
486 # canonical path to the lfdirstate.
487 def unixpath(path):
488 return os.path.normpath(path).replace(os.sep, '/')
489
490 def islfilesrepo(repo):
491 return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in
492 repo.store.datafiles())
493
494 def any_(gen):
495 for x in gen:
496 if x:
497 return True
498 return False
499
500 class storeprotonotcapable(BaseException):
501 def __init__(self, storetypes):
502 self.storetypes = storetypes