mercurial/url.py
changeset 13770 4e8f2310f310
parent 13544 66d65bccbf06
child 13772 463aca32a937
equal deleted inserted replaced
13769:8796fb6af67e 13770:4e8f2310f310
    20         not result.startswith(scheme + '://') and
    20         not result.startswith(scheme + '://') and
    21         url.startswith(scheme + '://')
    21         url.startswith(scheme + '://')
    22        ):
    22        ):
    23         result = scheme + '://' + result[len(scheme + ':'):]
    23         result = scheme + '://' + result[len(scheme + ':'):]
    24     return result
    24     return result
       
    25 
       
    26 class url(object):
       
    27     """Reliable URL parser.
       
    28 
       
    29     This parses URLs and provides attributes for the following
       
    30     components:
       
    31 
       
    32     <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
       
    33 
       
    34     Missing components are set to None. The only exception is
       
    35     fragment, which is set to '' if present but empty.
       
    36 
       
    37     If parse_fragment is False, fragment is included in query. If
       
    38     parse_query is False, query is included in path. If both are
       
    39     False, both fragment and query are included in path.
       
    40 
       
    41     See http://www.ietf.org/rfc/rfc2396.txt for more information.
       
    42 
       
    43     Examples:
       
    44 
       
    45     >>> url('http://www.ietf.org/rfc/rfc2396.txt')
       
    46     <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
       
    47     >>> url('ssh://[::1]:2200//home/joe/repo')
       
    48     <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
       
    49     >>> url('file:///home/joe/repo')
       
    50     <url scheme: 'file', path: '/home/joe/repo'>
       
    51     >>> url('bundle:foo')
       
    52     <url scheme: 'bundle', path: 'foo'>
       
    53 
       
    54     Authentication credentials:
       
    55 
       
    56     >>> url('ssh://joe:xyz@x/repo')
       
    57     <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
       
    58     >>> url('ssh://joe@x/repo')
       
    59     <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
       
    60 
       
    61     Query strings and fragments:
       
    62 
       
    63     >>> url('http://host/a?b#c')
       
    64     <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
       
    65     >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False)
       
    66     <url scheme: 'http', host: 'host', path: 'a?b#c'>
       
    67     """
       
    68 
       
    69     _safechars = "!~*'()+"
       
    70     _safepchars = "/!~*'()+"
       
    71 
       
    72     def __init__(self, path, parse_query=True, parse_fragment=True):
       
    73         # We slowly chomp away at path until we have only the path left
       
    74         self.scheme = self.user = self.passwd = self.host = None
       
    75         self.port = self.path = self.query = self.fragment = None
       
    76         self._localpath = True
       
    77 
       
    78         if not path.startswith('/') and ':' in path:
       
    79             parts = path.split(':', 1)
       
    80             if parts[0]:
       
    81                 self.scheme, path = parts
       
    82                 self._localpath = False
       
    83 
       
    84         if not path:
       
    85             path = None
       
    86             if self._localpath:
       
    87                 self.path = ''
       
    88                 return
       
    89         else:
       
    90             if parse_fragment and '#' in path:
       
    91                 path, self.fragment = path.split('#', 1)
       
    92                 if not path:
       
    93                     path = None
       
    94             if self._localpath:
       
    95                 self.path = path
       
    96                 return
       
    97 
       
    98             if parse_query and '?' in path:
       
    99                 path, self.query = path.split('?', 1)
       
   100                 if not path:
       
   101                     path = None
       
   102                 if not self.query:
       
   103                     self.query = None
       
   104 
       
   105             # // is required to specify a host/authority
       
   106             if path and path.startswith('//'):
       
   107                 parts = path[2:].split('/', 1)
       
   108                 if len(parts) > 1:
       
   109                     self.host, path = parts
       
   110                     path = path
       
   111                 else:
       
   112                     self.host = parts[0]
       
   113                     path = None
       
   114                 if not self.host:
       
   115                     self.host = None
       
   116                     if path:
       
   117                         path = '/' + path
       
   118 
       
   119             if self.host and '@' in self.host:
       
   120                 self.user, self.host = self.host.rsplit('@', 1)
       
   121                 if ':' in self.user:
       
   122                     self.user, self.passwd = self.user.split(':', 1)
       
   123                 if not self.host:
       
   124                     self.host = None
       
   125 
       
   126             # Don't split on colons in IPv6 addresses without ports
       
   127             if (self.host and ':' in self.host and
       
   128                 not (self.host.startswith('[') and self.host.endswith(']'))):
       
   129                 self.host, self.port = self.host.rsplit(':', 1)
       
   130                 if not self.host:
       
   131                     self.host = None
       
   132         self.path = path
       
   133 
       
   134         for a in ('user', 'passwd', 'host', 'port',
       
   135                   'path', 'query', 'fragment'):
       
   136             v = getattr(self, a)
       
   137             if v is not None:
       
   138                 setattr(self, a, urllib.unquote(v))
       
   139 
       
   140     def __repr__(self):
       
   141         attrs = []
       
   142         for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
       
   143                   'query', 'fragment'):
       
   144             v = getattr(self, a)
       
   145             if v is not None:
       
   146                 attrs.append('%s: %r' % (a, v))
       
   147         return '<url %s>' % ', '.join(attrs)
       
   148 
       
   149     def __str__(self):
       
   150         """Join the URL's components back into a URL string.
       
   151 
       
   152         Examples:
       
   153 
       
   154         >>> str(url('http://user:pw@host:80/?foo#bar'))
       
   155         'http://user:pw@host:80/?foo#bar'
       
   156         >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
       
   157         'ssh://user:pw@[::1]:2200//home/joe#'
       
   158         >>> str(url('http://localhost:80//'))
       
   159         'http://localhost:80//'
       
   160         >>> str(url('http://localhost:80/'))
       
   161         'http://localhost:80/'
       
   162         >>> str(url('http://localhost:80'))
       
   163         'http://localhost:80'
       
   164         >>> str(url('bundle:foo'))
       
   165         'bundle:foo'
       
   166         >>> str(url('path'))
       
   167         'path'
       
   168         """
       
   169         if self._localpath:
       
   170             s = self.path
       
   171             if self.fragment:
       
   172                 s += '#' + self.fragment
       
   173             return s
       
   174 
       
   175         s = self.scheme + ':'
       
   176         if (self.user or self.passwd or self.host or
       
   177             self.scheme and not self.path):
       
   178             s += '//'
       
   179         if self.user:
       
   180             s += urllib.quote(self.user, safe=self._safechars)
       
   181         if self.passwd:
       
   182             s += ':' + urllib.quote(self.passwd, safe=self._safechars)
       
   183         if self.user or self.passwd:
       
   184             s += '@'
       
   185         if self.host:
       
   186             if not (self.host.startswith('[') and self.host.endswith(']')):
       
   187                 s += urllib.quote(self.host)
       
   188             else:
       
   189                 s += self.host
       
   190         if self.port:
       
   191             s += ':' + urllib.quote(self.port)
       
   192         if ((self.host and self.path is not None) or
       
   193             (self.host and self.query or self.fragment)):
       
   194             s += '/'
       
   195         if self.path:
       
   196             s += urllib.quote(self.path, safe=self._safepchars)
       
   197         if self.query:
       
   198             s += '?' + urllib.quote(self.query, safe=self._safepchars)
       
   199         if self.fragment is not None:
       
   200             s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
       
   201         return s
       
   202 
       
   203     def authinfo(self):
       
   204         user, passwd = self.user, self.passwd
       
   205         try:
       
   206             self.user, self.passwd = None, None
       
   207             s = str(self)
       
   208         finally:
       
   209             self.user, self.passwd = user, passwd
       
   210         if not self.user:
       
   211             return (s, None)
       
   212         return (s, (None, (str(self), self.host),
       
   213                     self.user, self.passwd or ''))
       
   214 
       
   215 def has_scheme(path):
       
   216     return bool(url(path).scheme)
    25 
   217 
    26 def hidepassword(url):
   218 def hidepassword(url):
    27     '''hide user credential in a url string'''
   219     '''hide user credential in a url string'''
    28     scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
   220     scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
    29     netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
   221     netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)