Mercurial > hg
comparison hgext/largefiles/lfutil.py @ 15168:cfccd3bee7b3
hgext: add largefiles extension
This code has a number of contributors and a complicated history prior to its
introduction that can be seen by visiting:
https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles
http://hg.gerg.ca/hg-bfiles
and looking at the included copyright notices and contributors list.
author | various |
---|---|
date | Sat, 24 Sep 2011 17:35:45 +0200 |
parents | |
children | aa262fff87ac |
comparison
equal
deleted
inserted
replaced
15167:8df4166b6f63 | 15168:cfccd3bee7b3 |
---|---|
1 # Copyright 2009-2010 Gregory P. Ward | |
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated | |
3 # Copyright 2010-2011 Fog Creek Software | |
4 # Copyright 2010-2011 Unity Technologies | |
5 # | |
6 # This software may be used and distributed according to the terms of the | |
7 # GNU General Public License version 2 or any later version. | |
8 | |
9 '''largefiles utility code: must not import other modules in this package.''' | |
10 | |
11 import os | |
12 import errno | |
13 import inspect | |
14 import shutil | |
15 import stat | |
16 import hashlib | |
17 | |
18 from mercurial import cmdutil, dirstate, httpconnection, match as match_, \ | |
19 url as url_, util | |
20 from mercurial.i18n import _ | |
21 | |
22 try: | |
23 from mercurial import scmutil | |
24 except ImportError: | |
25 pass | |
26 | |
27 shortname = '.hglf' | |
28 longname = 'largefiles' | |
29 | |
30 | |
31 # -- Portability wrappers ---------------------------------------------- | |
32 | |
33 if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]: | |
34 # for Mercurial >= 1.5 | |
35 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): | |
36 return dirstate.walk(matcher, [], unknown, ignored) | |
37 else: | |
38 # for Mercurial <= 1.4 | |
39 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): | |
40 return dirstate.walk(matcher, unknown, ignored) | |
41 | |
42 def repo_add(repo, list): | |
43 try: | |
44 # Mercurial <= 1.5 | |
45 add = repo.add | |
46 except AttributeError: | |
47 # Mercurial >= 1.6 | |
48 add = repo[None].add | |
49 return add(list) | |
50 | |
51 def repo_remove(repo, list, unlink=False): | |
52 try: | |
53 # Mercurial <= 1.5 | |
54 remove = repo.remove | |
55 except AttributeError: | |
56 # Mercurial >= 1.6 | |
57 try: | |
58 # Mercurial <= 1.8 | |
59 remove = repo[None].remove | |
60 except AttributeError: | |
61 # Mercurial >= 1.9 | |
62 def remove(list, unlink): | |
63 wlock = repo.wlock() | |
64 try: | |
65 if unlink: | |
66 for f in list: | |
67 try: | |
68 util.unlinkpath(repo.wjoin(f)) | |
69 except OSError, inst: | |
70 if inst.errno != errno.ENOENT: | |
71 raise | |
72 repo[None].forget(list) | |
73 finally: | |
74 wlock.release() | |
75 | |
76 return remove(list, unlink=unlink) | |
77 | |
78 def repo_forget(repo, list): | |
79 try: | |
80 # Mercurial <= 1.5 | |
81 forget = repo.forget | |
82 except AttributeError: | |
83 # Mercurial >= 1.6 | |
84 forget = repo[None].forget | |
85 return forget(list) | |
86 | |
87 def findoutgoing(repo, remote, force): | |
88 # First attempt is for Mercurial <= 1.5 second is for >= 1.6 | |
89 try: | |
90 return repo.findoutgoing(remote) | |
91 except AttributeError: | |
92 from mercurial import discovery | |
93 try: | |
94 # Mercurial <= 1.8 | |
95 return discovery.findoutgoing(repo, remote, force=force) | |
96 except AttributeError: | |
97 # Mercurial >= 1.9 | |
98 common, _anyinc, _heads = discovery.findcommonincoming(repo, | |
99 remote, force=force) | |
100 return repo.changelog.findmissing(common) | |
101 | |
102 # -- Private worker functions ------------------------------------------ | |
103 | |
104 if os.name == 'nt': | |
105 from mercurial import win32 | |
106 try: | |
107 linkfn = win32.oslink | |
108 except: | |
109 linkfn = win32.os_link | |
110 else: | |
111 linkfn = os.link | |
112 | |
113 def link(src, dest): | |
114 try: | |
115 linkfn(src, dest) | |
116 except OSError: | |
117 # If hardlinks fail fall back on copy | |
118 shutil.copyfile(src, dest) | |
119 os.chmod(dest, os.stat(src).st_mode) | |
120 | |
121 def systemcachepath(ui, hash): | |
122 path = ui.config(longname, 'systemcache', None) | |
123 if path: | |
124 path = os.path.join(path, hash) | |
125 else: | |
126 if os.name == 'nt': | |
127 path = os.path.join(os.getenv('LOCALAPPDATA') or \ | |
128 os.getenv('APPDATA'), longname, hash) | |
129 elif os.name == 'posix': | |
130 path = os.path.join(os.getenv('HOME'), '.' + longname, hash) | |
131 else: | |
132 raise util.Abort(_('Unknown operating system: %s\n') % os.name) | |
133 return path | |
134 | |
135 def insystemcache(ui, hash): | |
136 return os.path.exists(systemcachepath(ui, hash)) | |
137 | |
138 def findfile(repo, hash): | |
139 if incache(repo, hash): | |
140 repo.ui.note(_('Found %s in cache\n') % hash) | |
141 return cachepath(repo, hash) | |
142 if insystemcache(repo.ui, hash): | |
143 repo.ui.note(_('Found %s in system cache\n') % hash) | |
144 return systemcachepath(repo.ui, hash) | |
145 return None | |
146 | |
147 class largefiles_dirstate(dirstate.dirstate): | |
148 def __getitem__(self, key): | |
149 return super(largefiles_dirstate, self).__getitem__(unixpath(key)) | |
150 def normal(self, f): | |
151 return super(largefiles_dirstate, self).normal(unixpath(f)) | |
152 def remove(self, f): | |
153 return super(largefiles_dirstate, self).remove(unixpath(f)) | |
154 def add(self, f): | |
155 return super(largefiles_dirstate, self).add(unixpath(f)) | |
156 def drop(self, f): | |
157 return super(largefiles_dirstate, self).drop(unixpath(f)) | |
158 def forget(self, f): | |
159 return super(largefiles_dirstate, self).forget(unixpath(f)) | |
160 | |
161 def openlfdirstate(ui, repo): | |
162 ''' | |
163 Return a dirstate object that tracks big files: i.e. its root is the | |
164 repo root, but it is saved in .hg/largefiles/dirstate. | |
165 ''' | |
166 admin = repo.join(longname) | |
167 try: | |
168 # Mercurial >= 1.9 | |
169 opener = scmutil.opener(admin) | |
170 except ImportError: | |
171 # Mercurial <= 1.8 | |
172 opener = util.opener(admin) | |
173 if hasattr(repo.dirstate, '_validate'): | |
174 lfdirstate = largefiles_dirstate(opener, ui, repo.root, | |
175 repo.dirstate._validate) | |
176 else: | |
177 lfdirstate = largefiles_dirstate(opener, ui, repo.root) | |
178 | |
179 # If the largefiles dirstate does not exist, populate and create it. This | |
180 # ensures that we create it on the first meaningful largefiles operation in | |
181 # a new clone. It also gives us an easy way to forcibly rebuild largefiles | |
182 # state: | |
183 # rm .hg/largefiles/dirstate && hg status | |
184 # Or even, if things are really messed up: | |
185 # rm -rf .hg/largefiles && hg status | |
186 if not os.path.exists(os.path.join(admin, 'dirstate')): | |
187 util.makedirs(admin) | |
188 matcher = getstandinmatcher(repo) | |
189 for standin in dirstate_walk(repo.dirstate, matcher): | |
190 lfile = splitstandin(standin) | |
191 hash = readstandin(repo, lfile) | |
192 lfdirstate.normallookup(lfile) | |
193 try: | |
194 if hash == hashfile(lfile): | |
195 lfdirstate.normal(lfile) | |
196 except IOError, err: | |
197 if err.errno != errno.ENOENT: | |
198 raise | |
199 | |
200 lfdirstate.write() | |
201 | |
202 return lfdirstate | |
203 | |
204 def lfdirstate_status(lfdirstate, repo, rev): | |
205 wlock = repo.wlock() | |
206 try: | |
207 match = match_.always(repo.root, repo.getcwd()) | |
208 s = lfdirstate.status(match, [], False, False, False) | |
209 unsure, modified, added, removed, missing, unknown, ignored, clean = s | |
210 for lfile in unsure: | |
211 if repo[rev][standin(lfile)].data().strip() != \ | |
212 hashfile(repo.wjoin(lfile)): | |
213 modified.append(lfile) | |
214 else: | |
215 clean.append(lfile) | |
216 lfdirstate.normal(lfile) | |
217 lfdirstate.write() | |
218 finally: | |
219 wlock.release() | |
220 return (modified, added, removed, missing, unknown, ignored, clean) | |
221 | |
222 def listlfiles(repo, rev=None, matcher=None): | |
223 '''list largefiles in the working copy or specified changeset''' | |
224 | |
225 if matcher is None: | |
226 matcher = getstandinmatcher(repo) | |
227 | |
228 # ignore unknown files in working directory | |
229 return [splitstandin(f) for f in repo[rev].walk(matcher) \ | |
230 if rev is not None or repo.dirstate[f] != '?'] | |
231 | |
232 def incache(repo, hash): | |
233 return os.path.exists(cachepath(repo, hash)) | |
234 | |
235 def createdir(dir): | |
236 if not os.path.exists(dir): | |
237 os.makedirs(dir) | |
238 | |
239 def cachepath(repo, hash): | |
240 return repo.join(os.path.join(longname, hash)) | |
241 | |
242 def copyfromcache(repo, hash, filename): | |
243 '''copyfromcache copies the specified largefile from the repo or system | |
244 cache to the specified location in the repository. It will not throw an | |
245 exception on failure, as it is meant to be called only after ensuring that | |
246 the needed largefile exists in the cache.''' | |
247 path = findfile(repo, hash) | |
248 if path is None: | |
249 return False | |
250 util.makedirs(os.path.dirname(repo.wjoin(filename))) | |
251 shutil.copy(path, repo.wjoin(filename)) | |
252 return True | |
253 | |
254 def copytocache(repo, rev, file, uploaded=False): | |
255 hash = readstandin(repo, file) | |
256 if incache(repo, hash): | |
257 return | |
258 copytocacheabsolute(repo, repo.wjoin(file), hash) | |
259 | |
260 def copytocacheabsolute(repo, file, hash): | |
261 createdir(os.path.dirname(cachepath(repo, hash))) | |
262 if insystemcache(repo.ui, hash): | |
263 link(systemcachepath(repo.ui, hash), cachepath(repo, hash)) | |
264 else: | |
265 shutil.copyfile(file, cachepath(repo, hash)) | |
266 os.chmod(cachepath(repo, hash), os.stat(file).st_mode) | |
267 linktosystemcache(repo, hash) | |
268 | |
269 def linktosystemcache(repo, hash): | |
270 createdir(os.path.dirname(systemcachepath(repo.ui, hash))) | |
271 link(cachepath(repo, hash), systemcachepath(repo.ui, hash)) | |
272 | |
273 def getstandinmatcher(repo, pats=[], opts={}): | |
274 '''Return a match object that applies pats to the standin directory''' | |
275 standindir = repo.pathto(shortname) | |
276 if pats: | |
277 # patterns supplied: search standin directory relative to current dir | |
278 cwd = repo.getcwd() | |
279 if os.path.isabs(cwd): | |
280 # cwd is an absolute path for hg -R <reponame> | |
281 # work relative to the repository root in this case | |
282 cwd = '' | |
283 pats = [os.path.join(standindir, cwd, pat) for pat in pats] | |
284 elif os.path.isdir(standindir): | |
285 # no patterns: relative to repo root | |
286 pats = [standindir] | |
287 else: | |
288 # no patterns and no standin dir: return matcher that matches nothing | |
289 match = match_.match(repo.root, None, [], exact=True) | |
290 match.matchfn = lambda f: False | |
291 return match | |
292 return getmatcher(repo, pats, opts, showbad=False) | |
293 | |
294 def getmatcher(repo, pats=[], opts={}, showbad=True): | |
295 '''Wrapper around scmutil.match() that adds showbad: if false, neuter | |
296 the match object\'s bad() method so it does not print any warnings | |
297 about missing files or directories.''' | |
298 try: | |
299 # Mercurial >= 1.9 | |
300 match = scmutil.match(repo[None], pats, opts) | |
301 except ImportError: | |
302 # Mercurial <= 1.8 | |
303 match = cmdutil.match(repo, pats, opts) | |
304 | |
305 if not showbad: | |
306 match.bad = lambda f, msg: None | |
307 return match | |
308 | |
309 def composestandinmatcher(repo, rmatcher): | |
310 '''Return a matcher that accepts standins corresponding to the files | |
311 accepted by rmatcher. Pass the list of files in the matcher as the | |
312 paths specified by the user.''' | |
313 smatcher = getstandinmatcher(repo, rmatcher.files()) | |
314 isstandin = smatcher.matchfn | |
315 def composed_matchfn(f): | |
316 return isstandin(f) and rmatcher.matchfn(splitstandin(f)) | |
317 smatcher.matchfn = composed_matchfn | |
318 | |
319 return smatcher | |
320 | |
321 def standin(filename): | |
322 '''Return the repo-relative path to the standin for the specified big | |
323 file.''' | |
324 # Notes: | |
325 # 1) Most callers want an absolute path, but _create_standin() needs | |
326 # it repo-relative so lfadd() can pass it to repo_add(). So leave | |
327 # it up to the caller to use repo.wjoin() to get an absolute path. | |
328 # 2) Join with '/' because that's what dirstate always uses, even on | |
329 # Windows. Change existing separator to '/' first in case we are | |
330 # passed filenames from an external source (like the command line). | |
331 return shortname + '/' + filename.replace(os.sep, '/') | |
332 | |
333 def isstandin(filename): | |
334 '''Return true if filename is a big file standin. filename must | |
335 be in Mercurial\'s internal form (slash-separated).''' | |
336 return filename.startswith(shortname + '/') | |
337 | |
338 def splitstandin(filename): | |
339 # Split on / because that's what dirstate always uses, even on Windows. | |
340 # Change local separator to / first just in case we are passed filenames | |
341 # from an external source (like the command line). | |
342 bits = filename.replace(os.sep, '/').split('/', 1) | |
343 if len(bits) == 2 and bits[0] == shortname: | |
344 return bits[1] | |
345 else: | |
346 return None | |
347 | |
348 def updatestandin(repo, standin): | |
349 file = repo.wjoin(splitstandin(standin)) | |
350 if os.path.exists(file): | |
351 hash = hashfile(file) | |
352 executable = getexecutable(file) | |
353 writestandin(repo, standin, hash, executable) | |
354 | |
355 def readstandin(repo, filename, node=None): | |
356 '''read hex hash from standin for filename at given node, or working | |
357 directory if no node is given''' | |
358 return repo[node][standin(filename)].data().strip() | |
359 | |
360 def writestandin(repo, standin, hash, executable): | |
361 '''write hhash to <repo.root>/<standin>''' | |
362 writehash(hash, repo.wjoin(standin), executable) | |
363 | |
364 def copyandhash(instream, outfile): | |
365 '''Read bytes from instream (iterable) and write them to outfile, | |
366 computing the SHA-1 hash of the data along the way. Close outfile | |
367 when done and return the binary hash.''' | |
368 hasher = util.sha1('') | |
369 for data in instream: | |
370 hasher.update(data) | |
371 outfile.write(data) | |
372 | |
373 # Blecch: closing a file that somebody else opened is rude and | |
374 # wrong. But it's so darn convenient and practical! After all, | |
375 # outfile was opened just to copy and hash. | |
376 outfile.close() | |
377 | |
378 return hasher.digest() | |
379 | |
380 def hashrepofile(repo, file): | |
381 return hashfile(repo.wjoin(file)) | |
382 | |
383 def hashfile(file): | |
384 if not os.path.exists(file): | |
385 return '' | |
386 hasher = util.sha1('') | |
387 fd = open(file, 'rb') | |
388 for data in blockstream(fd): | |
389 hasher.update(data) | |
390 fd.close() | |
391 return hasher.hexdigest() | |
392 | |
393 class limitreader(object): | |
394 def __init__(self, f, limit): | |
395 self.f = f | |
396 self.limit = limit | |
397 | |
398 def read(self, length): | |
399 if self.limit == 0: | |
400 return '' | |
401 length = length > self.limit and self.limit or length | |
402 self.limit -= length | |
403 return self.f.read(length) | |
404 | |
405 def close(self): | |
406 pass | |
407 | |
408 def blockstream(infile, blocksize=128 * 1024): | |
409 """Generator that yields blocks of data from infile and closes infile.""" | |
410 while True: | |
411 data = infile.read(blocksize) | |
412 if not data: | |
413 break | |
414 yield data | |
415 # Same blecch as above. | |
416 infile.close() | |
417 | |
418 def readhash(filename): | |
419 rfile = open(filename, 'rb') | |
420 hash = rfile.read(40) | |
421 rfile.close() | |
422 if len(hash) < 40: | |
423 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)') | |
424 % (filename, len(hash))) | |
425 return hash | |
426 | |
427 def writehash(hash, filename, executable): | |
428 util.makedirs(os.path.dirname(filename)) | |
429 if os.path.exists(filename): | |
430 os.unlink(filename) | |
431 wfile = open(filename, 'wb') | |
432 | |
433 try: | |
434 wfile.write(hash) | |
435 wfile.write('\n') | |
436 finally: | |
437 wfile.close() | |
438 if os.path.exists(filename): | |
439 os.chmod(filename, getmode(executable)) | |
440 | |
441 def getexecutable(filename): | |
442 mode = os.stat(filename).st_mode | |
443 return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \ | |
444 stat.S_IXOTH) | |
445 | |
446 def getmode(executable): | |
447 if executable: | |
448 return 0755 | |
449 else: | |
450 return 0644 | |
451 | |
452 def urljoin(first, second, *arg): | |
453 def join(left, right): | |
454 if not left.endswith('/'): | |
455 left += '/' | |
456 if right.startswith('/'): | |
457 right = right[1:] | |
458 return left + right | |
459 | |
460 url = join(first, second) | |
461 for a in arg: | |
462 url = join(url, a) | |
463 return url | |
464 | |
465 def hexsha1(data): | |
466 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like | |
467 object data""" | |
468 h = hashlib.sha1() | |
469 for chunk in util.filechunkiter(data): | |
470 h.update(chunk) | |
471 return h.hexdigest() | |
472 | |
473 def httpsendfile(ui, filename): | |
474 try: | |
475 # Mercurial >= 1.9 | |
476 return httpconnection.httpsendfile(ui, filename, 'rb') | |
477 except ImportError: | |
478 if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]: | |
479 # Mercurial == 1.8 | |
480 return url_.httpsendfile(ui, filename, 'rb') | |
481 else: | |
482 # Mercurial <= 1.7 | |
483 return url_.httpsendfile(filename, 'rb') | |
484 | |
485 # Convert a path to a unix style path. This is used to give a | |
486 # canonical path to the lfdirstate. | |
487 def unixpath(path): | |
488 return os.path.normpath(path).replace(os.sep, '/') | |
489 | |
490 def islfilesrepo(repo): | |
491 return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in | |
492 repo.store.datafiles()) | |
493 | |
494 def any_(gen): | |
495 for x in gen: | |
496 if x: | |
497 return True | |
498 return False | |
499 | |
500 class storeprotonotcapable(BaseException): | |
501 def __init__(self, storetypes): | |
502 self.storetypes = storetypes |