Mercurial > hg
annotate hgext/largefiles/lfutil.py @ 15226:2223ea21c98f
largefiles: cleanup import, now that we can assume > 1.9 for bundled extension
author | Na'Tosha Bard <natosha@unity3d.com> |
---|---|
date | Tue, 11 Oct 2011 14:01:24 +0200 |
parents | 7c604d8c7e83 |
children | a7686abf73a6 |
rev | line source |
---|---|
15168 | 1 # Copyright 2009-2010 Gregory P. Ward |
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated | |
3 # Copyright 2010-2011 Fog Creek Software | |
4 # Copyright 2010-2011 Unity Technologies | |
5 # | |
6 # This software may be used and distributed according to the terms of the | |
7 # GNU General Public License version 2 or any later version. | |
8 | |
9 '''largefiles utility code: must not import other modules in this package.''' | |
10 | |
11 import os | |
12 import errno | |
13 import shutil | |
14 import stat | |
15 import hashlib | |
16 | |
15226
2223ea21c98f
largefiles: cleanup import, now that we can assume > 1.9 for bundled extension
Na'Tosha Bard <natosha@unity3d.com>
parents:
15224
diff
changeset
|
17 from mercurial import dirstate, httpconnection, match as match_, util, scmutil |
15168 | 18 from mercurial.i18n import _ |
19 | |
20 shortname = '.hglf' | |
21 longname = 'largefiles' | |
22 | |
23 | |
24 # -- Portability wrappers ---------------------------------------------- | |
25 | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
26 def dirstate_walk(dirstate, matcher, unknown=False, ignored=False): |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
27 return dirstate.walk(matcher, [], unknown, ignored) |
15168 | 28 |
29 def repo_add(repo, list): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
30 add = repo[None].add |
15168 | 31 return add(list) |
32 | |
33 def repo_remove(repo, list, unlink=False): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
34 def remove(list, unlink): |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
35 wlock = repo.wlock() |
15168 | 36 try: |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
37 if unlink: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
38 for f in list: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
39 try: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
40 util.unlinkpath(repo.wjoin(f)) |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
41 except OSError, inst: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
42 if inst.errno != errno.ENOENT: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
43 raise |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
44 repo[None].forget(list) |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
45 finally: |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
46 wlock.release() |
15168 | 47 return remove(list, unlink=unlink) |
48 | |
49 def repo_forget(repo, list): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
50 forget = repo[None].forget |
15168 | 51 return forget(list) |
52 | |
53 def findoutgoing(repo, remote, force): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
54 from mercurial import discovery |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
55 common, _anyinc, _heads = discovery.findcommonincoming(repo, |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
56 remote, force=force) |
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
57 return repo.changelog.findmissing(common) |
15168 | 58 |
59 # -- Private worker functions ------------------------------------------ | |
60 | |
61 def link(src, dest): | |
62 try: | |
15206
f85c76b16f27
largefiles: fix commit of specified file on non-windows
Na'Tosha Bard <natosha@unity3d.com>
parents:
15188
diff
changeset
|
63 util.oslink(src, dest) |
15168 | 64 except OSError: |
65 # If hardlinks fail fall back on copy | |
66 shutil.copyfile(src, dest) | |
67 os.chmod(dest, os.stat(src).st_mode) | |
68 | |
69 def systemcachepath(ui, hash): | |
70 path = ui.config(longname, 'systemcache', None) | |
71 if path: | |
72 path = os.path.join(path, hash) | |
73 else: | |
74 if os.name == 'nt': | |
75 path = os.path.join(os.getenv('LOCALAPPDATA') or \ | |
76 os.getenv('APPDATA'), longname, hash) | |
77 elif os.name == 'posix': | |
78 path = os.path.join(os.getenv('HOME'), '.' + longname, hash) | |
79 else: | |
80 raise util.Abort(_('Unknown operating system: %s\n') % os.name) | |
81 return path | |
82 | |
83 def insystemcache(ui, hash): | |
84 return os.path.exists(systemcachepath(ui, hash)) | |
85 | |
86 def findfile(repo, hash): | |
87 if incache(repo, hash): | |
88 repo.ui.note(_('Found %s in cache\n') % hash) | |
89 return cachepath(repo, hash) | |
90 if insystemcache(repo.ui, hash): | |
91 repo.ui.note(_('Found %s in system cache\n') % hash) | |
92 return systemcachepath(repo.ui, hash) | |
93 return None | |
94 | |
95 class largefiles_dirstate(dirstate.dirstate): | |
96 def __getitem__(self, key): | |
97 return super(largefiles_dirstate, self).__getitem__(unixpath(key)) | |
98 def normal(self, f): | |
99 return super(largefiles_dirstate, self).normal(unixpath(f)) | |
100 def remove(self, f): | |
101 return super(largefiles_dirstate, self).remove(unixpath(f)) | |
102 def add(self, f): | |
103 return super(largefiles_dirstate, self).add(unixpath(f)) | |
104 def drop(self, f): | |
105 return super(largefiles_dirstate, self).drop(unixpath(f)) | |
106 def forget(self, f): | |
107 return super(largefiles_dirstate, self).forget(unixpath(f)) | |
108 | |
109 def openlfdirstate(ui, repo): | |
110 ''' | |
111 Return a dirstate object that tracks big files: i.e. its root is the | |
112 repo root, but it is saved in .hg/largefiles/dirstate. | |
113 ''' | |
114 admin = repo.join(longname) | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
115 opener = scmutil.opener(admin) |
15169
aa262fff87ac
largefile: fix up hasattr usage
Matt Mackall <mpm@selenic.com>
parents:
15168
diff
changeset
|
116 if util.safehasattr(repo.dirstate, '_validate'): |
15168 | 117 lfdirstate = largefiles_dirstate(opener, ui, repo.root, |
118 repo.dirstate._validate) | |
119 else: | |
120 lfdirstate = largefiles_dirstate(opener, ui, repo.root) | |
121 | |
122 # If the largefiles dirstate does not exist, populate and create it. This | |
123 # ensures that we create it on the first meaningful largefiles operation in | |
124 # a new clone. It also gives us an easy way to forcibly rebuild largefiles | |
125 # state: | |
126 # rm .hg/largefiles/dirstate && hg status | |
127 # Or even, if things are really messed up: | |
128 # rm -rf .hg/largefiles && hg status | |
129 if not os.path.exists(os.path.join(admin, 'dirstate')): | |
130 util.makedirs(admin) | |
131 matcher = getstandinmatcher(repo) | |
132 for standin in dirstate_walk(repo.dirstate, matcher): | |
133 lfile = splitstandin(standin) | |
134 hash = readstandin(repo, lfile) | |
135 lfdirstate.normallookup(lfile) | |
136 try: | |
137 if hash == hashfile(lfile): | |
138 lfdirstate.normal(lfile) | |
139 except IOError, err: | |
140 if err.errno != errno.ENOENT: | |
141 raise | |
142 | |
143 lfdirstate.write() | |
144 | |
145 return lfdirstate | |
146 | |
147 def lfdirstate_status(lfdirstate, repo, rev): | |
148 wlock = repo.wlock() | |
149 try: | |
150 match = match_.always(repo.root, repo.getcwd()) | |
151 s = lfdirstate.status(match, [], False, False, False) | |
152 unsure, modified, added, removed, missing, unknown, ignored, clean = s | |
153 for lfile in unsure: | |
154 if repo[rev][standin(lfile)].data().strip() != \ | |
155 hashfile(repo.wjoin(lfile)): | |
156 modified.append(lfile) | |
157 else: | |
158 clean.append(lfile) | |
159 lfdirstate.normal(lfile) | |
160 lfdirstate.write() | |
161 finally: | |
162 wlock.release() | |
163 return (modified, added, removed, missing, unknown, ignored, clean) | |
164 | |
165 def listlfiles(repo, rev=None, matcher=None): | |
166 '''list largefiles in the working copy or specified changeset''' | |
167 | |
168 if matcher is None: | |
169 matcher = getstandinmatcher(repo) | |
170 | |
171 # ignore unknown files in working directory | |
172 return [splitstandin(f) for f in repo[rev].walk(matcher) \ | |
173 if rev is not None or repo.dirstate[f] != '?'] | |
174 | |
175 def incache(repo, hash): | |
176 return os.path.exists(cachepath(repo, hash)) | |
177 | |
178 def createdir(dir): | |
179 if not os.path.exists(dir): | |
180 os.makedirs(dir) | |
181 | |
182 def cachepath(repo, hash): | |
183 return repo.join(os.path.join(longname, hash)) | |
184 | |
185 def copyfromcache(repo, hash, filename): | |
186 '''copyfromcache copies the specified largefile from the repo or system | |
187 cache to the specified location in the repository. It will not throw an | |
188 exception on failure, as it is meant to be called only after ensuring that | |
189 the needed largefile exists in the cache.''' | |
190 path = findfile(repo, hash) | |
191 if path is None: | |
192 return False | |
193 util.makedirs(os.path.dirname(repo.wjoin(filename))) | |
194 shutil.copy(path, repo.wjoin(filename)) | |
195 return True | |
196 | |
197 def copytocache(repo, rev, file, uploaded=False): | |
198 hash = readstandin(repo, file) | |
199 if incache(repo, hash): | |
200 return | |
201 copytocacheabsolute(repo, repo.wjoin(file), hash) | |
202 | |
203 def copytocacheabsolute(repo, file, hash): | |
204 createdir(os.path.dirname(cachepath(repo, hash))) | |
205 if insystemcache(repo.ui, hash): | |
206 link(systemcachepath(repo.ui, hash), cachepath(repo, hash)) | |
207 else: | |
208 shutil.copyfile(file, cachepath(repo, hash)) | |
209 os.chmod(cachepath(repo, hash), os.stat(file).st_mode) | |
210 linktosystemcache(repo, hash) | |
211 | |
212 def linktosystemcache(repo, hash): | |
213 createdir(os.path.dirname(systemcachepath(repo.ui, hash))) | |
214 link(cachepath(repo, hash), systemcachepath(repo.ui, hash)) | |
215 | |
216 def getstandinmatcher(repo, pats=[], opts={}): | |
217 '''Return a match object that applies pats to the standin directory''' | |
218 standindir = repo.pathto(shortname) | |
219 if pats: | |
220 # patterns supplied: search standin directory relative to current dir | |
221 cwd = repo.getcwd() | |
222 if os.path.isabs(cwd): | |
223 # cwd is an absolute path for hg -R <reponame> | |
224 # work relative to the repository root in this case | |
225 cwd = '' | |
226 pats = [os.path.join(standindir, cwd, pat) for pat in pats] | |
227 elif os.path.isdir(standindir): | |
228 # no patterns: relative to repo root | |
229 pats = [standindir] | |
230 else: | |
231 # no patterns and no standin dir: return matcher that matches nothing | |
232 match = match_.match(repo.root, None, [], exact=True) | |
233 match.matchfn = lambda f: False | |
234 return match | |
235 return getmatcher(repo, pats, opts, showbad=False) | |
236 | |
237 def getmatcher(repo, pats=[], opts={}, showbad=True): | |
238 '''Wrapper around scmutil.match() that adds showbad: if false, neuter | |
239 the match object\'s bad() method so it does not print any warnings | |
240 about missing files or directories.''' | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
241 match = scmutil.match(repo[None], pats, opts) |
15168 | 242 |
243 if not showbad: | |
244 match.bad = lambda f, msg: None | |
245 return match | |
246 | |
247 def composestandinmatcher(repo, rmatcher): | |
248 '''Return a matcher that accepts standins corresponding to the files | |
249 accepted by rmatcher. Pass the list of files in the matcher as the | |
250 paths specified by the user.''' | |
251 smatcher = getstandinmatcher(repo, rmatcher.files()) | |
252 isstandin = smatcher.matchfn | |
253 def composed_matchfn(f): | |
254 return isstandin(f) and rmatcher.matchfn(splitstandin(f)) | |
255 smatcher.matchfn = composed_matchfn | |
256 | |
257 return smatcher | |
258 | |
259 def standin(filename): | |
260 '''Return the repo-relative path to the standin for the specified big | |
261 file.''' | |
262 # Notes: | |
263 # 1) Most callers want an absolute path, but _create_standin() needs | |
264 # it repo-relative so lfadd() can pass it to repo_add(). So leave | |
265 # it up to the caller to use repo.wjoin() to get an absolute path. | |
266 # 2) Join with '/' because that's what dirstate always uses, even on | |
267 # Windows. Change existing separator to '/' first in case we are | |
268 # passed filenames from an external source (like the command line). | |
269 return shortname + '/' + filename.replace(os.sep, '/') | |
270 | |
271 def isstandin(filename): | |
272 '''Return true if filename is a big file standin. filename must | |
273 be in Mercurial\'s internal form (slash-separated).''' | |
274 return filename.startswith(shortname + '/') | |
275 | |
276 def splitstandin(filename): | |
277 # Split on / because that's what dirstate always uses, even on Windows. | |
278 # Change local separator to / first just in case we are passed filenames | |
279 # from an external source (like the command line). | |
280 bits = filename.replace(os.sep, '/').split('/', 1) | |
281 if len(bits) == 2 and bits[0] == shortname: | |
282 return bits[1] | |
283 else: | |
284 return None | |
285 | |
286 def updatestandin(repo, standin): | |
287 file = repo.wjoin(splitstandin(standin)) | |
288 if os.path.exists(file): | |
289 hash = hashfile(file) | |
290 executable = getexecutable(file) | |
291 writestandin(repo, standin, hash, executable) | |
292 | |
293 def readstandin(repo, filename, node=None): | |
294 '''read hex hash from standin for filename at given node, or working | |
295 directory if no node is given''' | |
296 return repo[node][standin(filename)].data().strip() | |
297 | |
298 def writestandin(repo, standin, hash, executable): | |
299 '''write hhash to <repo.root>/<standin>''' | |
300 writehash(hash, repo.wjoin(standin), executable) | |
301 | |
302 def copyandhash(instream, outfile): | |
303 '''Read bytes from instream (iterable) and write them to outfile, | |
304 computing the SHA-1 hash of the data along the way. Close outfile | |
305 when done and return the binary hash.''' | |
306 hasher = util.sha1('') | |
307 for data in instream: | |
308 hasher.update(data) | |
309 outfile.write(data) | |
310 | |
311 # Blecch: closing a file that somebody else opened is rude and | |
312 # wrong. But it's so darn convenient and practical! After all, | |
313 # outfile was opened just to copy and hash. | |
314 outfile.close() | |
315 | |
316 return hasher.digest() | |
317 | |
318 def hashrepofile(repo, file): | |
319 return hashfile(repo.wjoin(file)) | |
320 | |
321 def hashfile(file): | |
322 if not os.path.exists(file): | |
323 return '' | |
324 hasher = util.sha1('') | |
325 fd = open(file, 'rb') | |
326 for data in blockstream(fd): | |
327 hasher.update(data) | |
328 fd.close() | |
329 return hasher.hexdigest() | |
330 | |
331 class limitreader(object): | |
332 def __init__(self, f, limit): | |
333 self.f = f | |
334 self.limit = limit | |
335 | |
336 def read(self, length): | |
337 if self.limit == 0: | |
338 return '' | |
339 length = length > self.limit and self.limit or length | |
340 self.limit -= length | |
341 return self.f.read(length) | |
342 | |
343 def close(self): | |
344 pass | |
345 | |
346 def blockstream(infile, blocksize=128 * 1024): | |
347 """Generator that yields blocks of data from infile and closes infile.""" | |
348 while True: | |
349 data = infile.read(blocksize) | |
350 if not data: | |
351 break | |
352 yield data | |
353 # Same blecch as above. | |
354 infile.close() | |
355 | |
356 def readhash(filename): | |
357 rfile = open(filename, 'rb') | |
358 hash = rfile.read(40) | |
359 rfile.close() | |
360 if len(hash) < 40: | |
361 raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)') | |
362 % (filename, len(hash))) | |
363 return hash | |
364 | |
365 def writehash(hash, filename, executable): | |
366 util.makedirs(os.path.dirname(filename)) | |
367 if os.path.exists(filename): | |
368 os.unlink(filename) | |
369 wfile = open(filename, 'wb') | |
370 | |
371 try: | |
372 wfile.write(hash) | |
373 wfile.write('\n') | |
374 finally: | |
375 wfile.close() | |
376 if os.path.exists(filename): | |
377 os.chmod(filename, getmode(executable)) | |
378 | |
379 def getexecutable(filename): | |
380 mode = os.stat(filename).st_mode | |
381 return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \ | |
382 stat.S_IXOTH) | |
383 | |
384 def getmode(executable): | |
385 if executable: | |
386 return 0755 | |
387 else: | |
388 return 0644 | |
389 | |
390 def urljoin(first, second, *arg): | |
391 def join(left, right): | |
392 if not left.endswith('/'): | |
393 left += '/' | |
394 if right.startswith('/'): | |
395 right = right[1:] | |
396 return left + right | |
397 | |
398 url = join(first, second) | |
399 for a in arg: | |
400 url = join(url, a) | |
401 return url | |
402 | |
403 def hexsha1(data): | |
404 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like | |
405 object data""" | |
406 h = hashlib.sha1() | |
407 for chunk in util.filechunkiter(data): | |
408 h.update(chunk) | |
409 return h.hexdigest() | |
410 | |
411 def httpsendfile(ui, filename): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15206
diff
changeset
|
412 return httpconnection.httpsendfile(ui, filename, 'rb') |
15168 | 413 |
414 # Convert a path to a unix style path. This is used to give a | |
415 # canonical path to the lfdirstate. | |
416 def unixpath(path): | |
417 return os.path.normpath(path).replace(os.sep, '/') | |
418 | |
419 def islfilesrepo(repo): | |
15170
c1a4a3220711
largefiles: fix over-long lines
Matt Mackall <mpm@selenic.com>
parents:
15169
diff
changeset
|
420 return ('largefiles' in repo.requirements and |
15188
8e115063950d
largefiles: don't break existing tests (syntax error, bad imports)
Greg Ward <greg@gerg.ca>
parents:
15171
diff
changeset
|
421 any_(shortname + '/' in f[0] for f in repo.store.datafiles())) |
15168 | 422 |
423 def any_(gen): | |
424 for x in gen: | |
425 if x: | |
426 return True | |
427 return False | |
428 | |
429 class storeprotonotcapable(BaseException): | |
430 def __init__(self, storetypes): | |
431 self.storetypes = storetypes |