comparison mercurial/httpconnection.py @ 14244:e7525a555a64

url: use new http support if requested by the user The new http library is wired in via an extra module (httpconnection.py), as it requires similar but different plumbing to connect the library to Mercurial's internals and urllib2. Eventualy we should be able to remove all of keepalive.py and its associated tangle in url.py and replace it all with the code in httpconnection.py. To use the new library, set 'ui.usehttp2' to true. The underlying http library uses the logging module liberally, so if things break you can use 'ui.http2debuglevel' to set the log level to INFO or DEBUG to get that logging information (for example, ui.http2debuglevel=info.)
author Augie Fackler <durin42@gmail.com>
date Fri, 06 May 2011 10:22:08 -0500
parents
children 84256ba2fbf7
comparison
equal deleted inserted replaced
14243:861f28212398 14244:e7525a555a64
1 # httpconnection.py - urllib2 handler for new http support
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 # Copyright 2011 Google, Inc.
7 #
8 # This software may be used and distributed according to the terms of the
9 # GNU General Public License version 2 or any later version.
10 import logging
11 import socket
12 import urllib
13 import urllib2
14 import os
15
16 from mercurial import httpclient
17 from mercurial import sslutil
18 from mercurial import util
19 from mercurial.i18n import _
20
21 # moved here from url.py to avoid a cycle
22 class httpsendfile(object):
23 """This is a wrapper around the objects returned by python's "open".
24
25 Its purpose is to send file-like objects via HTTP and, to do so, it
26 defines a __len__ attribute to feed the Content-Length header.
27 """
28
29 def __init__(self, ui, *args, **kwargs):
30 # We can't just "self._data = open(*args, **kwargs)" here because there
31 # is an "open" function defined in this module that shadows the global
32 # one
33 self.ui = ui
34 self._data = open(*args, **kwargs)
35 self.seek = self._data.seek
36 self.close = self._data.close
37 self.write = self._data.write
38 self._len = os.fstat(self._data.fileno()).st_size
39 self._pos = 0
40 self._total = len(self) / 1024 * 2
41
42 def read(self, *args, **kwargs):
43 try:
44 ret = self._data.read(*args, **kwargs)
45 except EOFError:
46 self.ui.progress(_('sending'), None)
47 self._pos += len(ret)
48 # We pass double the max for total because we currently have
49 # to send the bundle twice in the case of a server that
50 # requires authentication. Since we can't know until we try
51 # once whether authentication will be required, just lie to
52 # the user and maybe the push succeeds suddenly at 50%.
53 self.ui.progress(_('sending'), self._pos / 1024,
54 unit=_('kb'), total=self._total)
55 return ret
56
57 def __len__(self):
58 return self._len
59
60 # moved here from url.py to avoid a cycle
61 def readauthforuri(ui, uri):
62 # Read configuration
63 config = dict()
64 for key, val in ui.configitems('auth'):
65 if '.' not in key:
66 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
67 continue
68 group, setting = key.rsplit('.', 1)
69 gdict = config.setdefault(group, dict())
70 if setting in ('username', 'cert', 'key'):
71 val = util.expandpath(val)
72 gdict[setting] = val
73
74 # Find the best match
75 scheme, hostpath = uri.split('://', 1)
76 bestlen = 0
77 bestauth = None
78 for group, auth in config.iteritems():
79 prefix = auth.get('prefix')
80 if not prefix:
81 continue
82 p = prefix.split('://', 1)
83 if len(p) > 1:
84 schemes, prefix = [p[0]], p[1]
85 else:
86 schemes = (auth.get('schemes') or 'https').split()
87 if (prefix == '*' or hostpath.startswith(prefix)) and \
88 len(prefix) > bestlen and scheme in schemes:
89 bestlen = len(prefix)
90 bestauth = group, auth
91 return bestauth
92
93 # Mercurial (at least until we can remove the old codepath) requires
94 # that the http response object be sufficiently file-like, so we
95 # provide a close() method here.
96 class HTTPResponse(httpclient.HTTPResponse):
97 def close(self):
98 pass
99
100 class HTTPConnection(httpclient.HTTPConnection):
101 response_class = HTTPResponse
102 def request(self, method, uri, body=None, headers={}):
103 if isinstance(body, httpsendfile):
104 body.seek(0)
105 httpclient.HTTPConnection.request(self, method, uri, body=body,
106 headers=headers)
107
108
109 _configuredlogging = False
110 # Subclass BOTH of these because otherwise urllib2 "helpfully"
111 # reinserts them since it notices we don't include any subclasses of
112 # them.
113 class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
114 def __init__(self, ui, pwmgr):
115 global _configuredlogging
116 urllib2.AbstractHTTPHandler.__init__(self)
117 self.ui = ui
118 self.pwmgr = pwmgr
119 self._connections = {}
120 loglevel = ui.config('ui', 'http2debuglevel', default=None)
121 if loglevel and not _configuredlogging:
122 _configuredlogging = True
123 logger = logging.getLogger('mercurial.http')
124 logger.setLevel(getattr(logging, loglevel.upper()))
125 logger.addHandler(logging.StreamHandler())
126
127 def close_all(self):
128 """Close and remove all connection objects being kept for reuse."""
129 for openconns in self._connections.values():
130 for conn in openconns:
131 conn.close()
132 self._connections = {}
133
134 # shamelessly borrowed from urllib2.AbstractHTTPHandler
135 def do_open(self, http_class, req):
136 """Return an addinfourl object for the request, using http_class.
137
138 http_class must implement the HTTPConnection API from httplib.
139 The addinfourl return value is a file-like object. It also
140 has methods and attributes including:
141 - info(): return a mimetools.Message object for the headers
142 - geturl(): return the original request URL
143 - code: HTTP status code
144 """
145 # If using a proxy, the host returned by get_host() is
146 # actually the proxy. On Python 2.6.1, the real destination
147 # hostname is encoded in the URI in the urllib2 request
148 # object. On Python 2.6.5, it's stored in the _tunnel_host
149 # attribute which has no accessor.
150 tunhost = getattr(req, '_tunnel_host', None)
151 host = req.get_host()
152 if tunhost:
153 proxyhost = host
154 host = tunhost
155 elif req.has_proxy():
156 proxyhost = req.get_host()
157 host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
158 else:
159 proxyhost = None
160
161 if proxyhost:
162 if ':' in proxyhost:
163 # Note: this means we'll explode if we try and use an
164 # IPv6 http proxy. This isn't a regression, so we
165 # won't worry about it for now.
166 proxyhost, proxyport = proxyhost.rsplit(':', 1)
167 else:
168 proxyport = 3128 # squid default
169 proxy = (proxyhost, proxyport)
170 else:
171 proxy = None
172
173 if not host:
174 raise urllib2.URLError('no host given')
175
176 allconns = self._connections.get((host, proxy), [])
177 conns = [c for c in allconns if not c.busy()]
178 if conns:
179 h = conns[0]
180 else:
181 if allconns:
182 self.ui.debug('all connections for %s busy, making a new '
183 'one\n' % host)
184 timeout = None
185 if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
186 timeout = req.timeout
187 h = http_class(host, timeout=timeout, proxy_hostport=proxy)
188 self._connections.setdefault((host, proxy), []).append(h)
189
190 headers = dict(req.headers)
191 headers.update(req.unredirected_hdrs)
192 headers = dict(
193 (name.title(), val) for name, val in headers.items())
194 try:
195 path = req.get_selector()
196 if '://' in path:
197 path = path.split('://', 1)[1].split('/', 1)[1]
198 if path[0] != '/':
199 path = '/' + path
200 h.request(req.get_method(), path, req.data, headers)
201 r = h.getresponse()
202 except socket.error, err: # XXX what error?
203 raise urllib2.URLError(err)
204
205 # Pick apart the HTTPResponse object to get the addinfourl
206 # object initialized properly.
207 r.recv = r.read
208
209 resp = urllib.addinfourl(r, r.headers, req.get_full_url())
210 resp.code = r.status
211 resp.msg = r.reason
212 return resp
213
214 # httplib always uses the given host/port as the socket connect
215 # target, and then allows full URIs in the request path, which it
216 # then observes and treats as a signal to do proxying instead.
217 def http_open(self, req):
218 if req.get_full_url().startswith('https'):
219 return self.https_open(req)
220 return self.do_open(HTTPConnection, req)
221
222 def https_open(self, req):
223 res = readauthforuri(self.ui, req.get_full_url())
224 if res:
225 group, auth = res
226 self.auth = auth
227 self.ui.debug("using auth.%s.* for authentication\n" % group)
228 else:
229 self.auth = None
230 return self.do_open(self._makesslconnection, req)
231
232 def _makesslconnection(self, host, port=443, *args, **kwargs):
233 keyfile = None
234 certfile = None
235
236 if args: # key_file
237 keyfile = args.pop(0)
238 if args: # cert_file
239 certfile = args.pop(0)
240
241 # if the user has specified different key/cert files in
242 # hgrc, we prefer these
243 if self.auth and 'key' in self.auth and 'cert' in self.auth:
244 keyfile = self.auth['key']
245 certfile = self.auth['cert']
246
247 # let host port take precedence
248 if ':' in host and '[' not in host or ']:' in host:
249 host, port = host.rsplit(':', 1)
250 port = int(port)
251 if '[' in host:
252 host = host[1:-1]
253
254 if keyfile:
255 kwargs['keyfile'] = keyfile
256 if certfile:
257 kwargs['certfile'] = certfile
258
259 kwargs.update(sslutil.sslkwargs(self.ui, host))
260
261 con = HTTPConnection(host, port, use_ssl=True,
262 ssl_validator=sslutil.validator(self.ui, host),
263 **kwargs)
264 return con