http: fix many problems with url parsing and auth. added proxy test.
authorVadim Gelfer <vadim.gelfer@gmail.com>
Mon, 22 May 2006 15:42:49 -0700
changeset 2337 3f24bc5dee81
parent 2336 f77edcffb837
child 2338 391c5d0f9ef3
child 2339 11422943cf72
http: fix many problems with url parsing and auth. added proxy test. problems fixed: - https scheme handled properly for real and proxy urls. - url of form "http://user:password@host:port/path" now ok. - no-proxy check uses proper host names.
mercurial/httprepo.py
tests/test-http-proxy
tests/test-http-proxy.out
tests/tinyproxy.py
--- a/mercurial/httprepo.py	Mon May 22 09:00:24 2006 -0700
+++ b/mercurial/httprepo.py	Mon May 22 15:42:49 2006 -0700
@@ -22,6 +22,9 @@
         if authinfo != (None, None):
             return authinfo
 
+        if not ui.interactive:
+            raise util.Abort(_('http authorization required'))
+
         self.ui.write(_("http authorization required\n"))
         self.ui.status(_("realm: %s\n") % realm)
         user = self.ui.prompt(_("user:"), default=None)
@@ -30,37 +33,95 @@
         self.add_password(realm, authuri, user, passwd)
         return (user, passwd)
 
+def netlocsplit(netloc):
+    '''split [user[:passwd]@]host[:port] into 4-tuple.'''
+
+    a = netloc.find('@')
+    if a == -1:
+        user, passwd = None, None
+    else:
+        userpass, netloc = netloc[:a], netloc[a+1:]
+        c = userpass.find(':')
+        if c == -1:
+            user, passwd = urllib.unquote(userpass), None
+        else:
+            user = urllib.unquote(userpass[:c])
+            passwd = urllib.unquote(userpass[c+1:])
+    c = netloc.find(':')
+    if c == -1:
+        host, port = netloc, None
+    else:
+        host, port = netloc[:c], netloc[c+1:]
+    return host, port, user, passwd
+
+def netlocunsplit(host, port, user=None, passwd=None):
+    '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
+    if port:
+        hostport = host + ':' + port
+    else:
+        hostport = host
+    if user:
+        if passwd:
+            userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
+        else:
+            userpass = urllib.quote(user)
+        return userpass + '@' + hostport
+    return hostport
+
 class httprepository(remoterepository):
     def __init__(self, ui, path):
-        # fix missing / after hostname
-        s = urlparse.urlsplit(path)
-        partial = s[2]
-        if not partial: partial = "/"
-        self.url = urlparse.urlunsplit((s[0], s[1], partial, '', ''))
+        scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
+        if query or frag:
+            raise util.Abort(_('unsupported URL component: "%s"') %
+                             (query or frag))
+        if not urlpath: urlpath = '/'
+        host, port, user, passwd = netlocsplit(netloc)
+
+        # urllib cannot handle URLs with embedded user or passwd
+        self.url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
+                                        urlpath, '', ''))
         self.ui = ui
-        no_list = [ "localhost", "127.0.0.1" ]
-        host = ui.config("http_proxy", "host")
-        if host is None:
-            host = os.environ.get("http_proxy")
-        if host and host.startswith('http://'):
-            host = host[7:]
-        user = ui.config("http_proxy", "user")
-        passwd = ui.config("http_proxy", "passwd")
-        no = ui.config("http_proxy", "no")
-        if no is None:
-            no = os.environ.get("no_proxy")
-        if no:
-            no_list = no_list + no.split(",")
+
+        proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
+        proxyauthinfo = None
+        handler = urllib2.BaseHandler()
+
+        if proxyurl:
+            # proxy can be proper url or host[:port]
+            if not (proxyurl.startswith('http:') or
+                    proxyurl.startswith('https:')):
+                proxyurl = 'http://' + proxyurl + '/'
+            snpqf = urlparse.urlsplit(proxyurl)
+            proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
+            hpup = netlocsplit(proxynetloc)
+
+            proxyhost, proxyport, proxyuser, proxypasswd = hpup
+            if not proxyuser:
+                proxyuser = ui.config("http_proxy", "user")
+                proxypasswd = ui.config("http_proxy", "passwd")
 
-        no_proxy = 0
-        for h in no_list:
-            if (path.startswith("http://" + h + "/") or
-                path.startswith("http://" + h + ":") or
-                path == "http://" + h):
-                no_proxy = 1
+            # see if we should use a proxy for this url
+            no_list = [ "localhost", "127.0.0.1" ]
+            no_list.extend([p.strip().lower() for
+                            p in ui.config("http_proxy", "no", '').split(',')
+                            if p.strip()])
+            no_list.extend([p.strip().lower() for
+                            p in os.getenv("no_proxy", '').split(',')
+                            if p.strip()])
+            # "http_proxy.always" config is for running tests on localhost
+            if (not ui.configbool("http_proxy", "always") and
+                host.lower() in no_list):
+                ui.debug(_('disabling proxy for %s\n') % host)
+            else:
+                proxyurl = urlparse.urlunsplit((
+                    proxyscheme, netlocunsplit(proxyhost, proxyport,
+                                               proxyuser, proxypasswd or ''),
+                    proxypath, proxyquery, proxyfrag))
+                handler = urllib2.ProxyHandler({scheme: proxyurl})
+                ui.debug(_('proxying through %s\n') % proxyurl)
 
-        # Note: urllib2 takes proxy values from the environment and those will
-        # take precedence
+        # urllib2 takes proxy values from the environment and those
+        # will take precedence if found, so drop them
         for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
             try:
                 if os.environ.has_key(env):
@@ -68,24 +129,15 @@
             except OSError:
                 pass
 
-        proxy_handler = urllib2.BaseHandler()
-        if host and not no_proxy:
-            proxy_handler = urllib2.ProxyHandler({"http" : "http://" + host})
+        passmgr = passwordmgr(ui)
+        if user:
+            ui.debug(_('will use user %s for http auth\n') % user)
+            passmgr.add_password(None, host, user, passwd or '')
 
-        proxyauthinfo = None
-        if user and passwd:
-            passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
-            passmgr.add_password(None, host, user, passwd)
-            proxyauthinfo = urllib2.ProxyBasicAuthHandler(passmgr)
-
-        if ui.interactive:
-            passmgr = passwordmgr(ui)
-            opener = urllib2.build_opener(
-                proxy_handler, proxyauthinfo,
-                urllib2.HTTPBasicAuthHandler(passmgr),
-                urllib2.HTTPDigestAuthHandler(passmgr))
-        else:
-            opener = urllib2.build_opener(proxy_handler, proxyauthinfo)
+        opener = urllib2.build_opener(
+            handler,
+            urllib2.HTTPBasicAuthHandler(passmgr),
+            urllib2.HTTPDigestAuthHandler(passmgr))
 
         # 1.0 here is the _protocol_ version
         opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-http-proxy	Mon May 22 15:42:49 2006 -0700
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+hg init a
+cd a
+echo a > a
+hg ci -Ama -d '1123456789 0'
+hg serve -p 20059 -d --pid-file=hg.pid
+
+cd ..
+("$TESTDIR/tinyproxy.py" 20060 localhost >/dev/null 2>&1 </dev/null &
+echo $! > proxy.pid)
+sleep 2
+
+echo %% url for proxy
+http_proxy=http://localhost:20060/ hg --config http_proxy.always=True clone http://localhost:20059/ b
+
+echo %% host:port for proxy
+http_proxy=localhost:20060 hg clone --config http_proxy.always=True http://localhost:20059/ c
+
+echo %% proxy url with user name and password
+http_proxy=http://user:passwd@localhost:20060 hg clone --config http_proxy.always=True http://localhost:20059/ d
+
+echo %% url with user name and password
+http_proxy=http://user:passwd@localhost:20060 hg clone --config http_proxy.always=True http://user:passwd@localhost:20059/ e
+
+echo %% bad host:port for proxy
+http_proxy=localhost:20061 hg clone --config http_proxy.always=True http://localhost:20059/ f
+
+kill $(cat proxy.pid a/hg.pid)
+exit 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-http-proxy.out	Mon May 22 15:42:49 2006 -0700
@@ -0,0 +1,31 @@
+adding a
+%% url for proxy
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 1 changesets with 1 changes to 1 files
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+%% host:port for proxy
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 1 changesets with 1 changes to 1 files
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+%% proxy url with user name and password
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 1 changesets with 1 changes to 1 files
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+%% url with user name and password
+requesting all changes
+adding changesets
+adding manifests
+adding file changes
+added 1 changesets with 1 changes to 1 files
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+%% bad host:port for proxy
+abort: error: Connection refused
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/tinyproxy.py	Mon May 22 15:42:49 2006 -0700
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+__doc__ = """Tiny HTTP Proxy.
+
+This module implements GET, HEAD, POST, PUT and DELETE methods
+on BaseHTTPServer, and behaves as an HTTP proxy.  The CONNECT
+method is also implemented experimentally, but has not been
+tested yet.
+
+Any help will be greatly appreciated.           SUZUKI Hisao
+"""
+
+__version__ = "0.2.1"
+
+import BaseHTTPServer, select, socket, SocketServer, urlparse
+
+class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
+    __base = BaseHTTPServer.BaseHTTPRequestHandler
+    __base_handle = __base.handle
+
+    server_version = "TinyHTTPProxy/" + __version__
+    rbufsize = 0                        # self.rfile Be unbuffered
+
+    def handle(self):
+        (ip, port) =  self.client_address
+        if hasattr(self, 'allowed_clients') and ip not in self.allowed_clients:
+            self.raw_requestline = self.rfile.readline()
+            if self.parse_request(): self.send_error(403)
+        else:
+            self.__base_handle()
+
+    def _connect_to(self, netloc, soc):
+        i = netloc.find(':')
+        if i >= 0:
+            host_port = netloc[:i], int(netloc[i+1:])
+        else:
+            host_port = netloc, 80
+        print "\t" "connect to %s:%d" % host_port
+        try: soc.connect(host_port)
+        except socket.error, arg:
+            try: msg = arg[1]
+            except: msg = arg
+            self.send_error(404, msg)
+            return 0
+        return 1
+
+    def do_CONNECT(self):
+        soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        try:
+            if self._connect_to(self.path, soc):
+                self.log_request(200)
+                self.wfile.write(self.protocol_version +
+                                 " 200 Connection established\r\n")
+                self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
+                self.wfile.write("\r\n")
+                self._read_write(soc, 300)
+        finally:
+            print "\t" "bye"
+            soc.close()
+            self.connection.close()
+
+    def do_GET(self):
+        (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
+            self.path, 'http')
+        if scm != 'http' or fragment or not netloc:
+            self.send_error(400, "bad url %s" % self.path)
+            return
+        soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        try:
+            if self._connect_to(netloc, soc):
+                self.log_request()
+                soc.send("%s %s %s\r\n" % (
+                    self.command,
+                    urlparse.urlunparse(('', '', path, params, query, '')),
+                    self.request_version))
+                self.headers['Connection'] = 'close'
+                del self.headers['Proxy-Connection']
+                for key_val in self.headers.items():
+                    soc.send("%s: %s\r\n" % key_val)
+                soc.send("\r\n")
+                self._read_write(soc)
+        finally:
+            print "\t" "bye"
+            soc.close()
+            self.connection.close()
+
+    def _read_write(self, soc, max_idling=20):
+        iw = [self.connection, soc]
+        ow = []
+        count = 0
+        while 1:
+            count += 1
+            (ins, _, exs) = select.select(iw, ow, iw, 3)
+            if exs: break
+            if ins:
+                for i in ins:
+                    if i is soc:
+                        out = self.connection
+                    else:
+                        out = soc
+                    data = i.recv(8192)
+                    if data:
+                        out.send(data)
+                        count = 0
+            else:
+                print "\t" "idle", count
+            if count == max_idling: break
+
+    do_HEAD = do_GET
+    do_POST = do_GET
+    do_PUT  = do_GET
+    do_DELETE=do_GET
+
+class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
+                           BaseHTTPServer.HTTPServer): pass
+
+if __name__ == '__main__':
+    from sys import argv
+    if argv[1:] and argv[1] in ('-h', '--help'):
+        print argv[0], "[port [allowed_client_name ...]]"
+    else:
+        if argv[2:]:
+            allowed = []
+            for name in argv[2:]:
+                client = socket.gethostbyname(name)
+                allowed.append(client)
+                print "Accept: %s (%s)" % (client, name)
+            ProxyHandler.allowed_clients = allowed
+            del argv[2:]
+        else:
+            print "Any clients will be served..."
+        BaseHTTPServer.test(ProxyHandler, ThreadingHTTPServer)