changeset 5066:167c422c745f

httprepo: quote the path part of the URL This should fix a 'hg clone "http://hg.example.org/path with spaces/"' The code tries to do the right thing when the user passes a path that's already escaped in part (e.g. "http://hg.example.org/path%20with spaces/"). If we're redirected, urllib2 will happily follow the URL it's given without escaping anything. I'm not sure what we would have to hook to work around that.
author Alexis S. L. Carvalho <alexis@cecm.usp.br>
date Mon, 06 Aug 2007 00:35:06 -0300
parents b304c2496f52
children 73169e5d2732 79373ec3f27d
files mercurial/httprepo.py tests/test-doctest.py
diffstat 2 files changed, 43 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/httprepo.py	Sun Aug 05 23:04:56 2007 -0300
+++ b/mercurial/httprepo.py	Mon Aug 06 00:35:06 2007 -0300
@@ -144,6 +144,43 @@
         raise IOError(None, _('connection ended unexpectedly'))
     yield zd.flush()
 
+_safe = ('abcdefghijklmnopqrstuvwxyz'
+         'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+         '0123456789' '_.-/')
+_safeset = None
+_hex = None
+def quotepath(path):
+    '''quote the path part of a URL
+
+    This is similar to urllib.quote, but it also tries to avoid
+    quoting things twice (inspired by wget):
+
+    >>> quotepath('abc def')
+    'abc%20def'
+    >>> quotepath('abc%20def')
+    'abc%20def'
+    >>> quotepath('abc%20 def')
+    'abc%20%20def'
+    >>> quotepath('abc def%20')
+    'abc%20def%20'
+    >>> quotepath('abc def%2')
+    'abc%20def%252'
+    >>> quotepath('abc def%')
+    'abc%20def%25'
+    '''
+    global _safeset, _hex
+    if _safeset is None:
+        _safeset = util.set(_safe)
+        _hex = util.set('abcdefABCDEF0123456789')
+    l = list(path)
+    for i in xrange(len(l)):
+        c = l[i]
+        if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
+            pass
+        elif c not in _safeset:
+            l[i] = '%%%02X' % ord(c)
+    return ''.join(l)
+
 class httprepository(remoterepository):
     def __init__(self, ui, path):
         self.path = path
@@ -153,13 +190,16 @@
         if query or frag:
             raise util.Abort(_('unsupported URL component: "%s"') %
                              (query or frag))
-        if not urlpath: urlpath = '/'
+        if not urlpath:
+            urlpath = '/'
+        urlpath = quotepath(urlpath)
         host, port, user, passwd = netlocsplit(netloc)
 
         # urllib cannot handle URLs with embedded user or passwd
         self._url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
                                          urlpath, '', ''))
         self.ui = ui
+        self.ui.debug(_('using %s\n') % self._url)
 
         proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
         # XXX proxyauthinfo = None
--- a/tests/test-doctest.py	Sun Aug 05 23:04:56 2007 -0300
+++ b/tests/test-doctest.py	Mon Aug 06 00:35:06 2007 -0300
@@ -5,3 +5,5 @@
 
 doctest.testmod(mercurial.changelog)
 
+import mercurial.httprepo
+doctest.testmod(mercurial.httprepo)