mercurial/httpconnection.py
changeset 14244 e7525a555a64
child 14294 84256ba2fbf7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/httpconnection.py	Fri May 06 10:22:08 2011 -0500
@@ -0,0 +1,264 @@
+# httpconnection.py - urllib2 handler for new http support
+#
+# Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
+# Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
+# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
+# Copyright 2011 Google, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+import logging
+import socket
+import urllib
+import urllib2
+import os
+
+from mercurial import httpclient
+from mercurial import sslutil
+from mercurial import util
+from mercurial.i18n import _
+
+# moved here from url.py to avoid a cycle
+class httpsendfile(object):
+    """This is a wrapper around the objects returned by python's "open".
+
+    Its purpose is to send file-like objects via HTTP and, to do so, it
+    defines a __len__ attribute to feed the Content-Length header.
+    """
+
+    def __init__(self, ui, *args, **kwargs):
+        # We can't just "self._data = open(*args, **kwargs)" here because there
+        # is an "open" function defined in this module that shadows the global
+        # one
+        self.ui = ui
+        self._data = open(*args, **kwargs)
+        self.seek = self._data.seek
+        self.close = self._data.close
+        self.write = self._data.write
+        self._len = os.fstat(self._data.fileno()).st_size
+        self._pos = 0
+        self._total = len(self) / 1024 * 2
+
+    def read(self, *args, **kwargs):
+        try:
+            ret = self._data.read(*args, **kwargs)
+        except EOFError:
+            self.ui.progress(_('sending'), None)
+        self._pos += len(ret)
+        # We pass double the max for total because we currently have
+        # to send the bundle twice in the case of a server that
+        # requires authentication. Since we can't know until we try
+        # once whether authentication will be required, just lie to
+        # the user and maybe the push succeeds suddenly at 50%.
+        self.ui.progress(_('sending'), self._pos / 1024,
+                         unit=_('kb'), total=self._total)
+        return ret
+
+    def __len__(self):
+        return self._len
+
+# moved here from url.py to avoid a cycle
+def readauthforuri(ui, uri):
+    # Read configuration
+    config = dict()
+    for key, val in ui.configitems('auth'):
+        if '.' not in key:
+            ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
+            continue
+        group, setting = key.rsplit('.', 1)
+        gdict = config.setdefault(group, dict())
+        if setting in ('username', 'cert', 'key'):
+            val = util.expandpath(val)
+        gdict[setting] = val
+
+    # Find the best match
+    scheme, hostpath = uri.split('://', 1)
+    bestlen = 0
+    bestauth = None
+    for group, auth in config.iteritems():
+        prefix = auth.get('prefix')
+        if not prefix:
+            continue
+        p = prefix.split('://', 1)
+        if len(p) > 1:
+            schemes, prefix = [p[0]], p[1]
+        else:
+            schemes = (auth.get('schemes') or 'https').split()
+        if (prefix == '*' or hostpath.startswith(prefix)) and \
+            len(prefix) > bestlen and scheme in schemes:
+            bestlen = len(prefix)
+            bestauth = group, auth
+    return bestauth
+
+# Mercurial (at least until we can remove the old codepath) requires
+# that the http response object be sufficiently file-like, so we
+# provide a close() method here.
+class HTTPResponse(httpclient.HTTPResponse):
+    def close(self):
+        pass
+
+class HTTPConnection(httpclient.HTTPConnection):
+    response_class = HTTPResponse
+    def request(self, method, uri, body=None, headers={}):
+        if isinstance(body, httpsendfile):
+            body.seek(0)
+        httpclient.HTTPConnection.request(self, method, uri, body=body,
+                                          headers=headers)
+
+
+_configuredlogging = False
+# Subclass BOTH of these because otherwise urllib2 "helpfully"
+# reinserts them since it notices we don't include any subclasses of
+# them.
+class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
+    def __init__(self, ui, pwmgr):
+        global _configuredlogging
+        urllib2.AbstractHTTPHandler.__init__(self)
+        self.ui = ui
+        self.pwmgr = pwmgr
+        self._connections = {}
+        loglevel = ui.config('ui', 'http2debuglevel', default=None)
+        if loglevel and not _configuredlogging:
+            _configuredlogging = True
+            logger = logging.getLogger('mercurial.http')
+            logger.setLevel(getattr(logging, loglevel.upper()))
+            logger.addHandler(logging.StreamHandler())
+
+    def close_all(self):
+        """Close and remove all connection objects being kept for reuse."""
+        for openconns in self._connections.values():
+            for conn in openconns:
+                conn.close()
+        self._connections = {}
+
+    # shamelessly borrowed from urllib2.AbstractHTTPHandler
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        # If using a proxy, the host returned by get_host() is
+        # actually the proxy. On Python 2.6.1, the real destination
+        # hostname is encoded in the URI in the urllib2 request
+        # object. On Python 2.6.5, it's stored in the _tunnel_host
+        # attribute which has no accessor.
+        tunhost = getattr(req, '_tunnel_host', None)
+        host = req.get_host()
+        if tunhost:
+            proxyhost = host
+            host = tunhost
+        elif req.has_proxy():
+            proxyhost = req.get_host()
+            host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
+        else:
+            proxyhost = None
+
+        if proxyhost:
+            if ':' in proxyhost:
+                # Note: this means we'll explode if we try and use an
+                # IPv6 http proxy. This isn't a regression, so we
+                # won't worry about it for now.
+                proxyhost, proxyport = proxyhost.rsplit(':', 1)
+            else:
+                proxyport = 3128 # squid default
+            proxy = (proxyhost, proxyport)
+        else:
+            proxy = None
+
+        if not host:
+            raise urllib2.URLError('no host given')
+
+        allconns = self._connections.get((host, proxy), [])
+        conns = [c for c in allconns if not c.busy()]
+        if conns:
+            h = conns[0]
+        else:
+            if allconns:
+                self.ui.debug('all connections for %s busy, making a new '
+                              'one\n' % host)
+            timeout = None
+            if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                timeout = req.timeout
+            h = http_class(host, timeout=timeout, proxy_hostport=proxy)
+            self._connections.setdefault((host, proxy), []).append(h)
+
+        headers = dict(req.headers)
+        headers.update(req.unredirected_hdrs)
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
+        try:
+            path = req.get_selector()
+            if '://' in path:
+                path = path.split('://', 1)[1].split('/', 1)[1]
+            if path[0] != '/':
+                path = '/' + path
+            h.request(req.get_method(), path, req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise urllib2.URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+        r.recv = r.read
+
+        resp = urllib.addinfourl(r, r.headers, req.get_full_url())
+        resp.code = r.status
+        resp.msg = r.reason
+        return resp
+
+    # httplib always uses the given host/port as the socket connect
+    # target, and then allows full URIs in the request path, which it
+    # then observes and treats as a signal to do proxying instead.
+    def http_open(self, req):
+        if req.get_full_url().startswith('https'):
+            return self.https_open(req)
+        return self.do_open(HTTPConnection, req)
+
+    def https_open(self, req):
+        res = readauthforuri(self.ui, req.get_full_url())
+        if res:
+            group, auth = res
+            self.auth = auth
+            self.ui.debug("using auth.%s.* for authentication\n" % group)
+        else:
+            self.auth = None
+        return self.do_open(self._makesslconnection, req)
+
+    def _makesslconnection(self, host, port=443, *args, **kwargs):
+        keyfile = None
+        certfile = None
+
+        if args: # key_file
+            keyfile = args.pop(0)
+        if args: # cert_file
+            certfile = args.pop(0)
+
+        # if the user has specified different key/cert files in
+        # hgrc, we prefer these
+        if self.auth and 'key' in self.auth and 'cert' in self.auth:
+            keyfile = self.auth['key']
+            certfile = self.auth['cert']
+
+        # let host port take precedence
+        if ':' in host and '[' not in host or ']:' in host:
+            host, port = host.rsplit(':', 1)
+            port = int(port)
+            if '[' in host:
+                host = host[1:-1]
+
+        if keyfile:
+            kwargs['keyfile'] = keyfile
+        if certfile:
+            kwargs['certfile'] = certfile
+
+        kwargs.update(sslutil.sslkwargs(self.ui, host))
+
+        con = HTTPConnection(host, port, use_ssl=True,
+                             ssl_validator=sslutil.validator(self.ui, host),
+                             **kwargs)
+        return con