changeset 14244:e7525a555a64

url: use new http support if requested by the user The new http library is wired in via an extra module (httpconnection.py), as it requires similar but different plumbing to connect the library to Mercurial's internals and urllib2. Eventualy we should be able to remove all of keepalive.py and its associated tangle in url.py and replace it all with the code in httpconnection.py. To use the new library, set 'ui.usehttp2' to true. The underlying http library uses the logging module liberally, so if things break you can use 'ui.http2debuglevel' to set the log level to INFO or DEBUG to get that logging information (for example, ui.http2debuglevel=info.)
author Augie Fackler <durin42@gmail.com>
date Fri, 06 May 2011 10:22:08 -0500
parents 861f28212398
children 13d44e4235f8
files mercurial/httpconnection.py mercurial/httprepo.py mercurial/url.py
diffstat 3 files changed, 276 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/httpconnection.py	Fri May 06 10:22:08 2011 -0500
@@ -0,0 +1,264 @@
+# httpconnection.py - urllib2 handler for new http support
+#
+# Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
+# Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
+# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
+# Copyright 2011 Google, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+import logging
+import socket
+import urllib
+import urllib2
+import os
+
+from mercurial import httpclient
+from mercurial import sslutil
+from mercurial import util
+from mercurial.i18n import _
+
+# moved here from url.py to avoid a cycle
+class httpsendfile(object):
+    """This is a wrapper around the objects returned by python's "open".
+
+    Its purpose is to send file-like objects via HTTP and, to do so, it
+    defines a __len__ attribute to feed the Content-Length header.
+    """
+
+    def __init__(self, ui, *args, **kwargs):
+        # We can't just "self._data = open(*args, **kwargs)" here because there
+        # is an "open" function defined in this module that shadows the global
+        # one
+        self.ui = ui
+        self._data = open(*args, **kwargs)
+        self.seek = self._data.seek
+        self.close = self._data.close
+        self.write = self._data.write
+        self._len = os.fstat(self._data.fileno()).st_size
+        self._pos = 0
+        self._total = len(self) / 1024 * 2
+
+    def read(self, *args, **kwargs):
+        try:
+            ret = self._data.read(*args, **kwargs)
+        except EOFError:
+            self.ui.progress(_('sending'), None)
+        self._pos += len(ret)
+        # We pass double the max for total because we currently have
+        # to send the bundle twice in the case of a server that
+        # requires authentication. Since we can't know until we try
+        # once whether authentication will be required, just lie to
+        # the user and maybe the push succeeds suddenly at 50%.
+        self.ui.progress(_('sending'), self._pos / 1024,
+                         unit=_('kb'), total=self._total)
+        return ret
+
+    def __len__(self):
+        return self._len
+
+# moved here from url.py to avoid a cycle
+def readauthforuri(ui, uri):
+    # Read configuration
+    config = dict()
+    for key, val in ui.configitems('auth'):
+        if '.' not in key:
+            ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
+            continue
+        group, setting = key.rsplit('.', 1)
+        gdict = config.setdefault(group, dict())
+        if setting in ('username', 'cert', 'key'):
+            val = util.expandpath(val)
+        gdict[setting] = val
+
+    # Find the best match
+    scheme, hostpath = uri.split('://', 1)
+    bestlen = 0
+    bestauth = None
+    for group, auth in config.iteritems():
+        prefix = auth.get('prefix')
+        if not prefix:
+            continue
+        p = prefix.split('://', 1)
+        if len(p) > 1:
+            schemes, prefix = [p[0]], p[1]
+        else:
+            schemes = (auth.get('schemes') or 'https').split()
+        if (prefix == '*' or hostpath.startswith(prefix)) and \
+            len(prefix) > bestlen and scheme in schemes:
+            bestlen = len(prefix)
+            bestauth = group, auth
+    return bestauth
+
+# Mercurial (at least until we can remove the old codepath) requires
+# that the http response object be sufficiently file-like, so we
+# provide a close() method here.
+class HTTPResponse(httpclient.HTTPResponse):
+    def close(self):
+        pass
+
+class HTTPConnection(httpclient.HTTPConnection):
+    response_class = HTTPResponse
+    def request(self, method, uri, body=None, headers={}):
+        if isinstance(body, httpsendfile):
+            body.seek(0)
+        httpclient.HTTPConnection.request(self, method, uri, body=body,
+                                          headers=headers)
+
+
+_configuredlogging = False
+# Subclass BOTH of these because otherwise urllib2 "helpfully"
+# reinserts them since it notices we don't include any subclasses of
+# them.
+class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
+    def __init__(self, ui, pwmgr):
+        global _configuredlogging
+        urllib2.AbstractHTTPHandler.__init__(self)
+        self.ui = ui
+        self.pwmgr = pwmgr
+        self._connections = {}
+        loglevel = ui.config('ui', 'http2debuglevel', default=None)
+        if loglevel and not _configuredlogging:
+            _configuredlogging = True
+            logger = logging.getLogger('mercurial.http')
+            logger.setLevel(getattr(logging, loglevel.upper()))
+            logger.addHandler(logging.StreamHandler())
+
+    def close_all(self):
+        """Close and remove all connection objects being kept for reuse."""
+        for openconns in self._connections.values():
+            for conn in openconns:
+                conn.close()
+        self._connections = {}
+
+    # shamelessly borrowed from urllib2.AbstractHTTPHandler
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a mimetools.Message object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        # If using a proxy, the host returned by get_host() is
+        # actually the proxy. On Python 2.6.1, the real destination
+        # hostname is encoded in the URI in the urllib2 request
+        # object. On Python 2.6.5, it's stored in the _tunnel_host
+        # attribute which has no accessor.
+        tunhost = getattr(req, '_tunnel_host', None)
+        host = req.get_host()
+        if tunhost:
+            proxyhost = host
+            host = tunhost
+        elif req.has_proxy():
+            proxyhost = req.get_host()
+            host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
+        else:
+            proxyhost = None
+
+        if proxyhost:
+            if ':' in proxyhost:
+                # Note: this means we'll explode if we try and use an
+                # IPv6 http proxy. This isn't a regression, so we
+                # won't worry about it for now.
+                proxyhost, proxyport = proxyhost.rsplit(':', 1)
+            else:
+                proxyport = 3128 # squid default
+            proxy = (proxyhost, proxyport)
+        else:
+            proxy = None
+
+        if not host:
+            raise urllib2.URLError('no host given')
+
+        allconns = self._connections.get((host, proxy), [])
+        conns = [c for c in allconns if not c.busy()]
+        if conns:
+            h = conns[0]
+        else:
+            if allconns:
+                self.ui.debug('all connections for %s busy, making a new '
+                              'one\n' % host)
+            timeout = None
+            if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                timeout = req.timeout
+            h = http_class(host, timeout=timeout, proxy_hostport=proxy)
+            self._connections.setdefault((host, proxy), []).append(h)
+
+        headers = dict(req.headers)
+        headers.update(req.unredirected_hdrs)
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
+        try:
+            path = req.get_selector()
+            if '://' in path:
+                path = path.split('://', 1)[1].split('/', 1)[1]
+            if path[0] != '/':
+                path = '/' + path
+            h.request(req.get_method(), path, req.data, headers)
+            r = h.getresponse()
+        except socket.error, err: # XXX what error?
+            raise urllib2.URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+        r.recv = r.read
+
+        resp = urllib.addinfourl(r, r.headers, req.get_full_url())
+        resp.code = r.status
+        resp.msg = r.reason
+        return resp
+
+    # httplib always uses the given host/port as the socket connect
+    # target, and then allows full URIs in the request path, which it
+    # then observes and treats as a signal to do proxying instead.
+    def http_open(self, req):
+        if req.get_full_url().startswith('https'):
+            return self.https_open(req)
+        return self.do_open(HTTPConnection, req)
+
+    def https_open(self, req):
+        res = readauthforuri(self.ui, req.get_full_url())
+        if res:
+            group, auth = res
+            self.auth = auth
+            self.ui.debug("using auth.%s.* for authentication\n" % group)
+        else:
+            self.auth = None
+        return self.do_open(self._makesslconnection, req)
+
+    def _makesslconnection(self, host, port=443, *args, **kwargs):
+        keyfile = None
+        certfile = None
+
+        if args: # key_file
+            keyfile = args.pop(0)
+        if args: # cert_file
+            certfile = args.pop(0)
+
+        # if the user has specified different key/cert files in
+        # hgrc, we prefer these
+        if self.auth and 'key' in self.auth and 'cert' in self.auth:
+            keyfile = self.auth['key']
+            certfile = self.auth['cert']
+
+        # let host port take precedence
+        if ':' in host and '[' not in host or ']:' in host:
+            host, port = host.rsplit(':', 1)
+            port = int(port)
+            if '[' in host:
+                host = host[1:-1]
+
+        if keyfile:
+            kwargs['keyfile'] = keyfile
+        if certfile:
+            kwargs['certfile'] = certfile
+
+        kwargs.update(sslutil.sslkwargs(self.ui, host))
+
+        con = HTTPConnection(host, port, use_ssl=True,
+                             ssl_validator=sslutil.validator(self.ui, host),
+                             **kwargs)
+        return con
--- a/mercurial/httprepo.py	Fri May 06 09:57:55 2011 -0500
+++ b/mercurial/httprepo.py	Fri May 06 10:22:08 2011 -0500
@@ -8,7 +8,7 @@
 
 from node import nullid
 from i18n import _
-import changegroup, statichttprepo, error, url, util, wireproto
+import changegroup, statichttprepo, error, httpconnection, url, util, wireproto
 import os, urllib, urllib2, zlib, httplib
 import errno, socket
 
@@ -180,7 +180,7 @@
                 break
 
         tempname = changegroup.writebundle(cg, None, type)
-        fp = url.httpsendfile(self.ui, tempname, "rb")
+        fp = httpconnection.httpsendfile(self.ui, tempname, "rb")
         headers = {'Content-Type': 'application/mercurial-0.1'}
 
         try:
--- a/mercurial/url.py	Fri May 06 09:57:55 2011 -0500
+++ b/mercurial/url.py	Fri May 06 10:22:08 2011 -0500
@@ -8,41 +8,9 @@
 # GNU General Public License version 2 or any later version.
 
 import urllib, urllib2, httplib, os, socket, cStringIO
-import __builtin__
 from i18n import _
 import keepalive, util, sslutil
-
-def readauthforuri(ui, uri):
-    # Read configuration
-    config = dict()
-    for key, val in ui.configitems('auth'):
-        if '.' not in key:
-            ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
-            continue
-        group, setting = key.rsplit('.', 1)
-        gdict = config.setdefault(group, dict())
-        if setting in ('username', 'cert', 'key'):
-            val = util.expandpath(val)
-        gdict[setting] = val
-
-    # Find the best match
-    scheme, hostpath = uri.split('://', 1)
-    bestlen = 0
-    bestauth = None
-    for group, auth in config.iteritems():
-        prefix = auth.get('prefix')
-        if not prefix:
-            continue
-        p = prefix.split('://', 1)
-        if len(p) > 1:
-            schemes, prefix = [p[0]], p[1]
-        else:
-            schemes = (auth.get('schemes') or 'https').split()
-        if (prefix == '*' or hostpath.startswith(prefix)) and \
-            len(prefix) > bestlen and scheme in schemes:
-            bestlen = len(prefix)
-            bestauth = group, auth
-    return bestauth
+import httpconnection as httpconnectionmod
 
 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
     def __init__(self, ui):
@@ -58,7 +26,7 @@
             return (user, passwd)
 
         if not user:
-            res = readauthforuri(self.ui, authuri)
+            res = httpconnectionmod.readauthforuri(self.ui, authuri)
             if res:
                 group, auth = res
                 user, passwd = auth.get('username'), auth.get('password')
@@ -149,48 +117,10 @@
 
         return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
 
-class httpsendfile(object):
-    """This is a wrapper around the objects returned by python's "open".
-
-    Its purpose is to send file-like objects via HTTP and, to do so, it
-    defines a __len__ attribute to feed the Content-Length header.
-    """
-
-    def __init__(self, ui, *args, **kwargs):
-        # We can't just "self._data = open(*args, **kwargs)" here because there
-        # is an "open" function defined in this module that shadows the global
-        # one
-        self.ui = ui
-        self._data = __builtin__.open(*args, **kwargs)
-        self.seek = self._data.seek
-        self.close = self._data.close
-        self.write = self._data.write
-        self._len = os.fstat(self._data.fileno()).st_size
-        self._pos = 0
-        self._total = len(self) / 1024 * 2
-
-    def read(self, *args, **kwargs):
-        try:
-            ret = self._data.read(*args, **kwargs)
-        except EOFError:
-            self.ui.progress(_('sending'), None)
-        self._pos += len(ret)
-        # We pass double the max for total because we currently have
-        # to send the bundle twice in the case of a server that
-        # requires authentication. Since we can't know until we try
-        # once whether authentication will be required, just lie to
-        # the user and maybe the push succeeds suddenly at 50%.
-        self.ui.progress(_('sending'), self._pos / 1024,
-                         unit=_('kb'), total=self._total)
-        return ret
-
-    def __len__(self):
-        return self._len
-
 def _gen_sendfile(orgsend):
     def _sendfile(self, data):
         # send a file
-        if isinstance(data, httpsendfile):
+        if isinstance(data, httpconnectionmod.httpsendfile):
             # if auth required, some data sent twice, so rewind here
             data.seek(0)
             for chunk in util.filechunkiter(data):
@@ -412,7 +342,7 @@
             return keepalive.KeepAliveHandler._start_transaction(self, h, req)
 
         def https_open(self, req):
-            res = readauthforuri(self.ui, req.get_full_url())
+            res = httpconnectionmod.readauthforuri(self.ui, req.get_full_url())
             if res:
                 group, auth = res
                 self.auth = auth
@@ -495,9 +425,12 @@
     construct an opener suitable for urllib2
     authinfo will be added to the password manager
     '''
-    handlers = [httphandler()]
-    if has_https:
-        handlers.append(httpshandler(ui))
+    if ui.configbool('ui', 'usehttp2', False):
+        handlers = [httpconnectionmod.http2handler(ui, passwordmgr(ui))]
+    else:
+        handlers = [httphandler()]
+        if has_https:
+            handlers.append(httpshandler(ui))
 
     handlers.append(proxyhandler(ui))