comparison mercurial/url.py @ 13770:4e8f2310f310

url: provide url object This adds a url object that re-implements urlsplit() and unsplit(). The implementation splits out usernames, passwords, and ports. The implementation is based on the behavior specified by RFC 2396[1]. However, it is much more forgiving than the RFC's specification; it places no specific restrictions on what characters are allowed in each segment of the URL other than what is necessary to split the URL into its constituent parts. [1]: http://www.ietf.org/rfc/rfc2396.txt
author Brodie Rao <brodie@bitheap.org>
date Fri, 25 Mar 2011 22:58:56 -0700
parents 66d65bccbf06
children 463aca32a937
comparison
equal deleted inserted replaced
13769:8796fb6af67e 13770:4e8f2310f310
20 not result.startswith(scheme + '://') and 20 not result.startswith(scheme + '://') and
21 url.startswith(scheme + '://') 21 url.startswith(scheme + '://')
22 ): 22 ):
23 result = scheme + '://' + result[len(scheme + ':'):] 23 result = scheme + '://' + result[len(scheme + ':'):]
24 return result 24 return result
25
26 class url(object):
27 """Reliable URL parser.
28
29 This parses URLs and provides attributes for the following
30 components:
31
32 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
33
34 Missing components are set to None. The only exception is
35 fragment, which is set to '' if present but empty.
36
37 If parse_fragment is False, fragment is included in query. If
38 parse_query is False, query is included in path. If both are
39 False, both fragment and query are included in path.
40
41 See http://www.ietf.org/rfc/rfc2396.txt for more information.
42
43 Examples:
44
45 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
46 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
47 >>> url('ssh://[::1]:2200//home/joe/repo')
48 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
49 >>> url('file:///home/joe/repo')
50 <url scheme: 'file', path: '/home/joe/repo'>
51 >>> url('bundle:foo')
52 <url scheme: 'bundle', path: 'foo'>
53
54 Authentication credentials:
55
56 >>> url('ssh://joe:xyz@x/repo')
57 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
58 >>> url('ssh://joe@x/repo')
59 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
60
61 Query strings and fragments:
62
63 >>> url('http://host/a?b#c')
64 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
65 >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False)
66 <url scheme: 'http', host: 'host', path: 'a?b#c'>
67 """
68
69 _safechars = "!~*'()+"
70 _safepchars = "/!~*'()+"
71
72 def __init__(self, path, parse_query=True, parse_fragment=True):
73 # We slowly chomp away at path until we have only the path left
74 self.scheme = self.user = self.passwd = self.host = None
75 self.port = self.path = self.query = self.fragment = None
76 self._localpath = True
77
78 if not path.startswith('/') and ':' in path:
79 parts = path.split(':', 1)
80 if parts[0]:
81 self.scheme, path = parts
82 self._localpath = False
83
84 if not path:
85 path = None
86 if self._localpath:
87 self.path = ''
88 return
89 else:
90 if parse_fragment and '#' in path:
91 path, self.fragment = path.split('#', 1)
92 if not path:
93 path = None
94 if self._localpath:
95 self.path = path
96 return
97
98 if parse_query and '?' in path:
99 path, self.query = path.split('?', 1)
100 if not path:
101 path = None
102 if not self.query:
103 self.query = None
104
105 # // is required to specify a host/authority
106 if path and path.startswith('//'):
107 parts = path[2:].split('/', 1)
108 if len(parts) > 1:
109 self.host, path = parts
110 path = path
111 else:
112 self.host = parts[0]
113 path = None
114 if not self.host:
115 self.host = None
116 if path:
117 path = '/' + path
118
119 if self.host and '@' in self.host:
120 self.user, self.host = self.host.rsplit('@', 1)
121 if ':' in self.user:
122 self.user, self.passwd = self.user.split(':', 1)
123 if not self.host:
124 self.host = None
125
126 # Don't split on colons in IPv6 addresses without ports
127 if (self.host and ':' in self.host and
128 not (self.host.startswith('[') and self.host.endswith(']'))):
129 self.host, self.port = self.host.rsplit(':', 1)
130 if not self.host:
131 self.host = None
132 self.path = path
133
134 for a in ('user', 'passwd', 'host', 'port',
135 'path', 'query', 'fragment'):
136 v = getattr(self, a)
137 if v is not None:
138 setattr(self, a, urllib.unquote(v))
139
140 def __repr__(self):
141 attrs = []
142 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
143 'query', 'fragment'):
144 v = getattr(self, a)
145 if v is not None:
146 attrs.append('%s: %r' % (a, v))
147 return '<url %s>' % ', '.join(attrs)
148
149 def __str__(self):
150 """Join the URL's components back into a URL string.
151
152 Examples:
153
154 >>> str(url('http://user:pw@host:80/?foo#bar'))
155 'http://user:pw@host:80/?foo#bar'
156 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
157 'ssh://user:pw@[::1]:2200//home/joe#'
158 >>> str(url('http://localhost:80//'))
159 'http://localhost:80//'
160 >>> str(url('http://localhost:80/'))
161 'http://localhost:80/'
162 >>> str(url('http://localhost:80'))
163 'http://localhost:80'
164 >>> str(url('bundle:foo'))
165 'bundle:foo'
166 >>> str(url('path'))
167 'path'
168 """
169 if self._localpath:
170 s = self.path
171 if self.fragment:
172 s += '#' + self.fragment
173 return s
174
175 s = self.scheme + ':'
176 if (self.user or self.passwd or self.host or
177 self.scheme and not self.path):
178 s += '//'
179 if self.user:
180 s += urllib.quote(self.user, safe=self._safechars)
181 if self.passwd:
182 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
183 if self.user or self.passwd:
184 s += '@'
185 if self.host:
186 if not (self.host.startswith('[') and self.host.endswith(']')):
187 s += urllib.quote(self.host)
188 else:
189 s += self.host
190 if self.port:
191 s += ':' + urllib.quote(self.port)
192 if ((self.host and self.path is not None) or
193 (self.host and self.query or self.fragment)):
194 s += '/'
195 if self.path:
196 s += urllib.quote(self.path, safe=self._safepchars)
197 if self.query:
198 s += '?' + urllib.quote(self.query, safe=self._safepchars)
199 if self.fragment is not None:
200 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
201 return s
202
203 def authinfo(self):
204 user, passwd = self.user, self.passwd
205 try:
206 self.user, self.passwd = None, None
207 s = str(self)
208 finally:
209 self.user, self.passwd = user, passwd
210 if not self.user:
211 return (s, None)
212 return (s, (None, (str(self), self.host),
213 self.user, self.passwd or ''))
214
215 def has_scheme(path):
216 return bool(url(path).scheme)
25 217
26 def hidepassword(url): 218 def hidepassword(url):
27 '''hide user credential in a url string''' 219 '''hide user credential in a url string'''
28 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) 220 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
29 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) 221 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)