20 not result.startswith(scheme + '://') and |
20 not result.startswith(scheme + '://') and |
21 url.startswith(scheme + '://') |
21 url.startswith(scheme + '://') |
22 ): |
22 ): |
23 result = scheme + '://' + result[len(scheme + ':'):] |
23 result = scheme + '://' + result[len(scheme + ':'):] |
24 return result |
24 return result |
|
25 |
|
26 class url(object): |
|
27 """Reliable URL parser. |
|
28 |
|
29 This parses URLs and provides attributes for the following |
|
30 components: |
|
31 |
|
32 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> |
|
33 |
|
34 Missing components are set to None. The only exception is |
|
35 fragment, which is set to '' if present but empty. |
|
36 |
|
37 If parse_fragment is False, fragment is included in query. If |
|
38 parse_query is False, query is included in path. If both are |
|
39 False, both fragment and query are included in path. |
|
40 |
|
41 See http://www.ietf.org/rfc/rfc2396.txt for more information. |
|
42 |
|
43 Examples: |
|
44 |
|
45 >>> url('http://www.ietf.org/rfc/rfc2396.txt') |
|
46 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> |
|
47 >>> url('ssh://[::1]:2200//home/joe/repo') |
|
48 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> |
|
49 >>> url('file:///home/joe/repo') |
|
50 <url scheme: 'file', path: '/home/joe/repo'> |
|
51 >>> url('bundle:foo') |
|
52 <url scheme: 'bundle', path: 'foo'> |
|
53 |
|
54 Authentication credentials: |
|
55 |
|
56 >>> url('ssh://joe:xyz@x/repo') |
|
57 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> |
|
58 >>> url('ssh://joe@x/repo') |
|
59 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> |
|
60 |
|
61 Query strings and fragments: |
|
62 |
|
63 >>> url('http://host/a?b#c') |
|
64 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> |
|
65 >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False) |
|
66 <url scheme: 'http', host: 'host', path: 'a?b#c'> |
|
67 """ |
|
68 |
|
69 _safechars = "!~*'()+" |
|
70 _safepchars = "/!~*'()+" |
|
71 |
|
72 def __init__(self, path, parse_query=True, parse_fragment=True): |
|
73 # We slowly chomp away at path until we have only the path left |
|
74 self.scheme = self.user = self.passwd = self.host = None |
|
75 self.port = self.path = self.query = self.fragment = None |
|
76 self._localpath = True |
|
77 |
|
78 if not path.startswith('/') and ':' in path: |
|
79 parts = path.split(':', 1) |
|
80 if parts[0]: |
|
81 self.scheme, path = parts |
|
82 self._localpath = False |
|
83 |
|
84 if not path: |
|
85 path = None |
|
86 if self._localpath: |
|
87 self.path = '' |
|
88 return |
|
89 else: |
|
90 if parse_fragment and '#' in path: |
|
91 path, self.fragment = path.split('#', 1) |
|
92 if not path: |
|
93 path = None |
|
94 if self._localpath: |
|
95 self.path = path |
|
96 return |
|
97 |
|
98 if parse_query and '?' in path: |
|
99 path, self.query = path.split('?', 1) |
|
100 if not path: |
|
101 path = None |
|
102 if not self.query: |
|
103 self.query = None |
|
104 |
|
105 # // is required to specify a host/authority |
|
106 if path and path.startswith('//'): |
|
107 parts = path[2:].split('/', 1) |
|
108 if len(parts) > 1: |
|
109 self.host, path = parts |
|
110 path = path |
|
111 else: |
|
112 self.host = parts[0] |
|
113 path = None |
|
114 if not self.host: |
|
115 self.host = None |
|
116 if path: |
|
117 path = '/' + path |
|
118 |
|
119 if self.host and '@' in self.host: |
|
120 self.user, self.host = self.host.rsplit('@', 1) |
|
121 if ':' in self.user: |
|
122 self.user, self.passwd = self.user.split(':', 1) |
|
123 if not self.host: |
|
124 self.host = None |
|
125 |
|
126 # Don't split on colons in IPv6 addresses without ports |
|
127 if (self.host and ':' in self.host and |
|
128 not (self.host.startswith('[') and self.host.endswith(']'))): |
|
129 self.host, self.port = self.host.rsplit(':', 1) |
|
130 if not self.host: |
|
131 self.host = None |
|
132 self.path = path |
|
133 |
|
134 for a in ('user', 'passwd', 'host', 'port', |
|
135 'path', 'query', 'fragment'): |
|
136 v = getattr(self, a) |
|
137 if v is not None: |
|
138 setattr(self, a, urllib.unquote(v)) |
|
139 |
|
140 def __repr__(self): |
|
141 attrs = [] |
|
142 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', |
|
143 'query', 'fragment'): |
|
144 v = getattr(self, a) |
|
145 if v is not None: |
|
146 attrs.append('%s: %r' % (a, v)) |
|
147 return '<url %s>' % ', '.join(attrs) |
|
148 |
|
149 def __str__(self): |
|
150 """Join the URL's components back into a URL string. |
|
151 |
|
152 Examples: |
|
153 |
|
154 >>> str(url('http://user:pw@host:80/?foo#bar')) |
|
155 'http://user:pw@host:80/?foo#bar' |
|
156 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) |
|
157 'ssh://user:pw@[::1]:2200//home/joe#' |
|
158 >>> str(url('http://localhost:80//')) |
|
159 'http://localhost:80//' |
|
160 >>> str(url('http://localhost:80/')) |
|
161 'http://localhost:80/' |
|
162 >>> str(url('http://localhost:80')) |
|
163 'http://localhost:80' |
|
164 >>> str(url('bundle:foo')) |
|
165 'bundle:foo' |
|
166 >>> str(url('path')) |
|
167 'path' |
|
168 """ |
|
169 if self._localpath: |
|
170 s = self.path |
|
171 if self.fragment: |
|
172 s += '#' + self.fragment |
|
173 return s |
|
174 |
|
175 s = self.scheme + ':' |
|
176 if (self.user or self.passwd or self.host or |
|
177 self.scheme and not self.path): |
|
178 s += '//' |
|
179 if self.user: |
|
180 s += urllib.quote(self.user, safe=self._safechars) |
|
181 if self.passwd: |
|
182 s += ':' + urllib.quote(self.passwd, safe=self._safechars) |
|
183 if self.user or self.passwd: |
|
184 s += '@' |
|
185 if self.host: |
|
186 if not (self.host.startswith('[') and self.host.endswith(']')): |
|
187 s += urllib.quote(self.host) |
|
188 else: |
|
189 s += self.host |
|
190 if self.port: |
|
191 s += ':' + urllib.quote(self.port) |
|
192 if ((self.host and self.path is not None) or |
|
193 (self.host and self.query or self.fragment)): |
|
194 s += '/' |
|
195 if self.path: |
|
196 s += urllib.quote(self.path, safe=self._safepchars) |
|
197 if self.query: |
|
198 s += '?' + urllib.quote(self.query, safe=self._safepchars) |
|
199 if self.fragment is not None: |
|
200 s += '#' + urllib.quote(self.fragment, safe=self._safepchars) |
|
201 return s |
|
202 |
|
203 def authinfo(self): |
|
204 user, passwd = self.user, self.passwd |
|
205 try: |
|
206 self.user, self.passwd = None, None |
|
207 s = str(self) |
|
208 finally: |
|
209 self.user, self.passwd = user, passwd |
|
210 if not self.user: |
|
211 return (s, None) |
|
212 return (s, (None, (str(self), self.host), |
|
213 self.user, self.passwd or '')) |
|
214 |
|
215 def has_scheme(path): |
|
216 return bool(url(path).scheme) |
25 |
217 |
26 def hidepassword(url): |
218 def hidepassword(url): |
27 '''hide user credential in a url string''' |
219 '''hide user credential in a url string''' |
28 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
220 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
29 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) |
221 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) |