Mercurial > hg
annotate hgext/largefiles/proto.py @ 18488:a977b42df8b3 stable
largefiles: don't verify largefile hashes on servers when processing statlfile
When changesets referencing largefiles are pushed then the corresponding
largefiles will be pushed too - unless the target already has them. The client
will use statlfile to make sure it only sends largefiles that the target
doesn't have. The server would however on every statlfile check that the
content of the largefile had the expected hash. What should be cheap thus
became an expensive operation that trashed the disk and the cache.
Largefile hashes are already checked by putlfile before being stored on the
server. A server should thus be able to keep its largefile store free of
errors - even more than it can keep revlogs free of errors. Verification should
happen when running 'hg verify' locally on the server. Rehashing every
largefile on every remote stat is too expensive.
Clients will also stat lfiles before downloading them. When the server verified
the hash in stat it meant that it had to read the file twice to serve it.
With this change the server will assume its own hashes are ok without checking
them on every statlfile.
Some consequences of this change:
- in case of server side corruption the problem will be detected by the
existing check on the client side - not on server side
- clients that could upload an uncorrupted largefile when pushing will no
longer magically heal the server (and break hardlinks) - a client will now
only upload its uncorrupted files after the corrupted file has been removed
on the server side
- client side verify will no longer report corruption in files it doesn't have
(Issue3123 discussed related problems - and how they have been fixed.)
author | Mads Kiilerich <madski@unity3d.com> |
---|---|
date | Mon, 28 Jan 2013 15:19:44 +0100 |
parents | 3598c585e464 |
children | d2c4d37f7db5 |
rev | line source |
---|---|
15168 | 1 # Copyright 2011 Fog Creek Software |
2 # | |
3 # This software may be used and distributed according to the terms of the | |
4 # GNU General Public License version 2 or any later version. | |
5 | |
6 import os | |
7 import urllib2 | |
8 | |
17192
1ac628cd7113
peer: introduce real peer classes
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
17127
diff
changeset
|
9 from mercurial import error, httppeer, util, wireproto |
17127
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
10 from mercurial.wireproto import batchable, future |
15168 | 11 from mercurial.i18n import _ |
12 | |
13 import lfutil | |
14 | |
15255
7ab05d752405
largefiles: cosmetics, whitespace, code style
Greg Ward <greg@gerg.ca>
parents:
15252
diff
changeset
|
15 LARGEFILES_REQUIRED_MSG = ('\nThis repository uses the largefiles extension.' |
7ab05d752405
largefiles: cosmetics, whitespace, code style
Greg Ward <greg@gerg.ca>
parents:
15252
diff
changeset
|
16 '\n\nPlease enable it in your Mercurial config ' |
7ab05d752405
largefiles: cosmetics, whitespace, code style
Greg Ward <greg@gerg.ca>
parents:
15252
diff
changeset
|
17 'file.\n') |
15168 | 18 |
19 def putlfile(repo, proto, sha): | |
15317
41f371150ccb
largefiles: make the store primary, and the user cache secondary
Benjamin Pollack <benjamin@bitquabit.com>
parents:
15316
diff
changeset
|
20 '''Put a largefile into a repository's local store and into the |
41f371150ccb
largefiles: make the store primary, and the user cache secondary
Benjamin Pollack <benjamin@bitquabit.com>
parents:
15316
diff
changeset
|
21 user cache.''' |
15168 | 22 proto.redirect() |
15391
a5a6a9b7f3b9
largefiles: replace tempfile.NamedTemporaryFile with tempfile.mkstemp
Hao Lian <hao@fogcreek.com>
parents:
15317
diff
changeset
|
23 |
16594
5516fdf3fe24
largefiles: in putlfile, ensure tempfile's directory exists prior to creation
hlian
parents:
16247
diff
changeset
|
24 path = lfutil.storepath(repo, sha) |
5516fdf3fe24
largefiles: in putlfile, ensure tempfile's directory exists prior to creation
hlian
parents:
16247
diff
changeset
|
25 util.makedirs(os.path.dirname(path)) |
5516fdf3fe24
largefiles: in putlfile, ensure tempfile's directory exists prior to creation
hlian
parents:
16247
diff
changeset
|
26 tmpfp = util.atomictempfile(path, createmode=repo.store.createmode) |
5516fdf3fe24
largefiles: in putlfile, ensure tempfile's directory exists prior to creation
hlian
parents:
16247
diff
changeset
|
27 |
15168 | 28 try: |
29 try: | |
15391
a5a6a9b7f3b9
largefiles: replace tempfile.NamedTemporaryFile with tempfile.mkstemp
Hao Lian <hao@fogcreek.com>
parents:
15317
diff
changeset
|
30 proto.getfile(tmpfp) |
16155
1b2b42e866be
largefiles: respect store.createmode and avoid extra file copy
Martin Geisler <mg@aragost.com>
parents:
15778
diff
changeset
|
31 tmpfp._fp.seek(0) |
1b2b42e866be
largefiles: respect store.createmode and avoid extra file copy
Martin Geisler <mg@aragost.com>
parents:
15778
diff
changeset
|
32 if sha != lfutil.hexsha1(tmpfp._fp): |
15778
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
33 raise IOError(0, _('largefile contents do not match hash')) |
15391
a5a6a9b7f3b9
largefiles: replace tempfile.NamedTemporaryFile with tempfile.mkstemp
Hao Lian <hao@fogcreek.com>
parents:
15317
diff
changeset
|
34 tmpfp.close() |
16155
1b2b42e866be
largefiles: respect store.createmode and avoid extra file copy
Martin Geisler <mg@aragost.com>
parents:
15778
diff
changeset
|
35 lfutil.linktousercache(repo, sha) |
15391
a5a6a9b7f3b9
largefiles: replace tempfile.NamedTemporaryFile with tempfile.mkstemp
Hao Lian <hao@fogcreek.com>
parents:
15317
diff
changeset
|
36 except IOError, e: |
15778
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
37 repo.ui.warn(_('largefiles: failed to put %s into store: %s') % |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
38 (sha, e.strerror)) |
15168 | 39 return wireproto.pushres(1) |
40 finally: | |
16155
1b2b42e866be
largefiles: respect store.createmode and avoid extra file copy
Martin Geisler <mg@aragost.com>
parents:
15778
diff
changeset
|
41 tmpfp.discard() |
15168 | 42 |
43 return wireproto.pushres(0) | |
44 | |
45 def getlfile(repo, proto, sha): | |
15252
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
46 '''Retrieve a largefile from the repository-local cache or system |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
47 cache.''' |
15168 | 48 filename = lfutil.findfile(repo, sha) |
49 if not filename: | |
50 raise util.Abort(_('requested largefile %s not present in cache') % sha) | |
51 f = open(filename, 'rb') | |
52 length = os.fstat(f.fileno())[6] | |
15252
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
53 |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
54 # Since we can't set an HTTP content-length header here, and |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
55 # Mercurial core provides no way to give the length of a streamres |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
56 # (and reading the entire file into RAM would be ill-advised), we |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
57 # just send the length on the first line of the response, like the |
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
58 # ssh proto does for string responses. |
15168 | 59 def generator(): |
60 yield '%d\n' % length | |
61 for chunk in f: | |
62 yield chunk | |
63 return wireproto.streamres(generator()) | |
64 | |
65 def statlfile(repo, proto, sha): | |
18488
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
66 '''Return '2\n' if the largefile is missing, '0\n' if it seems to be in |
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
67 good condition. |
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
68 |
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
69 The value 1 is reserved for mismatched checksum, but that is too expensive |
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
70 to be verified on every stat and must be caught be running 'hg verify' |
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
71 server side.''' |
15168 | 72 filename = lfutil.findfile(repo, sha) |
73 if not filename: | |
74 return '2\n' | |
18488
a977b42df8b3
largefiles: don't verify largefile hashes on servers when processing statlfile
Mads Kiilerich <madski@unity3d.com>
parents:
18298
diff
changeset
|
75 return '0\n' |
15168 | 76 |
77 def wirereposetup(ui, repo): | |
78 class lfileswirerepository(repo.__class__): | |
79 def putlfile(self, sha, fd): | |
80 # unfortunately, httprepository._callpush tries to convert its | |
81 # input file-like into a bundle before sending it, so we can't use | |
82 # it ... | |
17192
1ac628cd7113
peer: introduce real peer classes
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
17127
diff
changeset
|
83 if issubclass(self.__class__, httppeer.httppeer): |
15778
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
84 res = None |
15168 | 85 try: |
15778
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
86 res = self._call('putlfile', data=fd, sha=sha, |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
87 headers={'content-type':'application/mercurial-0.1'}) |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
88 d, output = res.split('\n', 1) |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
89 for l in output.splitlines(True): |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
90 self.ui.warn(_('remote: '), l, '\n') |
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
91 return int(d) |
15168 | 92 except (ValueError, urllib2.HTTPError): |
15778
f15c646bffc7
largefiles: display remote errors from putlfile (issue3123) (issue3149)
Kevin Gessner <kevin@fogcreek.com>
parents:
15391
diff
changeset
|
93 self.ui.warn(_('unexpected putlfile response: %s') % res) |
15168 | 94 return 1 |
95 # ... but we can't use sshrepository._call because the data= | |
96 # argument won't get sent, and _callpush does exactly what we want | |
97 # in this case: send the data straight through | |
98 else: | |
99 try: | |
100 ret, output = self._callpush("putlfile", fd, sha=sha) | |
101 if ret == "": | |
102 raise error.ResponseError(_('putlfile failed:'), | |
103 output) | |
104 return int(ret) | |
105 except IOError: | |
106 return 1 | |
107 except ValueError: | |
108 raise error.ResponseError( | |
109 _('putlfile failed (unexpected response):'), ret) | |
110 | |
111 def getlfile(self, sha): | |
112 stream = self._callstream("getlfile", sha=sha) | |
113 length = stream.readline() | |
114 try: | |
115 length = int(length) | |
116 except ValueError: | |
15170
c1a4a3220711
largefiles: fix over-long lines
Matt Mackall <mpm@selenic.com>
parents:
15168
diff
changeset
|
117 self._abort(error.ResponseError(_("unexpected response:"), |
c1a4a3220711
largefiles: fix over-long lines
Matt Mackall <mpm@selenic.com>
parents:
15168
diff
changeset
|
118 length)) |
15168 | 119 return (length, stream) |
120 | |
17127
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
121 @batchable |
15168 | 122 def statlfile(self, sha): |
17127
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
123 f = future() |
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
124 result = {'sha': sha} |
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
125 yield result, f |
15168 | 126 try: |
17127
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
127 yield int(f.value) |
15168 | 128 except (ValueError, urllib2.HTTPError): |
15252
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
129 # If the server returns anything but an integer followed by a |
15168 | 130 # newline, newline, it's not speaking our language; if we get |
131 # an HTTP error, we can't be sure the largefile is present; | |
15252
6e809bb4f969
largefiles: improve comments, internal docstrings
Greg Ward <greg@gerg.ca>
parents:
15224
diff
changeset
|
132 # either way, consider it missing. |
17127
9e1616307c4c
largefiles: batch statlfile requests when pushing a largefiles repo (issue3386)
Na'Tosha Bard <natosha@unity3d.com>
parents:
16594
diff
changeset
|
133 yield 2 |
15168 | 134 |
135 repo.__class__ = lfileswirerepository | |
136 | |
137 # advertise the largefiles=serve capability | |
138 def capabilities(repo, proto): | |
16247
d87d9d8a8e03
largefiles: remove use of underscores that breaks coding convention
Na'Tosha Bard <natosha@unity3d.com>
parents:
16155
diff
changeset
|
139 return capabilitiesorig(repo, proto) + ' largefiles=serve' |
15168 | 140 |
141 def heads(repo, proto): | |
142 if lfutil.islfilesrepo(repo): | |
15224
7c604d8c7e83
largefiles: remove pre-1.9 code from extension first bundled with 1.9
Na'Tosha Bard <natosha@unity3d.com>
parents:
15170
diff
changeset
|
143 return wireproto.ooberror(LARGEFILES_REQUIRED_MSG) |
15168 | 144 return wireproto.heads(repo, proto) |
145 | |
16247
d87d9d8a8e03
largefiles: remove use of underscores that breaks coding convention
Na'Tosha Bard <natosha@unity3d.com>
parents:
16155
diff
changeset
|
146 def sshrepocallstream(self, cmd, **args): |
15168 | 147 if cmd == 'heads' and self.capable('largefiles'): |
148 cmd = 'lheads' | |
149 if cmd == 'batch' and self.capable('largefiles'): | |
150 args['cmds'] = args['cmds'].replace('heads ', 'lheads ') | |
16247
d87d9d8a8e03
largefiles: remove use of underscores that breaks coding convention
Na'Tosha Bard <natosha@unity3d.com>
parents:
16155
diff
changeset
|
151 return ssholdcallstream(self, cmd, **args) |
15168 | 152 |
16247
d87d9d8a8e03
largefiles: remove use of underscores that breaks coding convention
Na'Tosha Bard <natosha@unity3d.com>
parents:
16155
diff
changeset
|
153 def httprepocallstream(self, cmd, **args): |
15168 | 154 if cmd == 'heads' and self.capable('largefiles'): |
155 cmd = 'lheads' | |
156 if cmd == 'batch' and self.capable('largefiles'): | |
157 args['cmds'] = args['cmds'].replace('heads ', 'lheads ') | |
16247
d87d9d8a8e03
largefiles: remove use of underscores that breaks coding convention
Na'Tosha Bard <natosha@unity3d.com>
parents:
16155
diff
changeset
|
158 return httpoldcallstream(self, cmd, **args) |