comparison mercurial/util.py @ 14076:924c82157d46

url: move URL parsing functions into util to improve startup time The introduction of the new URL parsing code has created a startup time regression. This is mainly due to the use of url.hasscheme() in the ui class. It ends up importing many libraries that the url module requires. This fix helps marginally, but if we can get rid of the urllib import in the URL parser all together, startup time will go back to normal. perfstartup time before the URL refactoring (8796fb6af67e): ! wall 0.050692 comb 0.000000 user 0.000000 sys 0.000000 (best of 100) current startup time (139fb11210bb): ! wall 0.070685 comb 0.000000 user 0.000000 sys 0.000000 (best of 100) after this change: ! wall 0.064667 comb 0.000000 user 0.000000 sys 0.000000 (best of 100)
author Brodie Rao <brodie@bitheap.org>
date Sat, 30 Apr 2011 09:43:20 -0700
parents e4bfb9c337f3
children c285bdb0572a
comparison
equal deleted inserted replaced
14075:bc101902a68d 14076:924c82157d46
15 15
16 from i18n import _ 16 from i18n import _
17 import error, osutil, encoding 17 import error, osutil, encoding
18 import errno, re, shutil, sys, tempfile, traceback 18 import errno, re, shutil, sys, tempfile, traceback
19 import os, time, calendar, textwrap, unicodedata, signal 19 import os, time, calendar, textwrap, unicodedata, signal
20 import imp, socket 20 import imp, socket, urllib
21 21
22 # Python compatibility 22 # Python compatibility
23 23
24 def sha1(s): 24 def sha1(s):
25 return _fastsha1(s) 25 return _fastsha1(s)
1281 """Parse s into a boolean. 1281 """Parse s into a boolean.
1282 1282
1283 If s is not a valid boolean, returns None. 1283 If s is not a valid boolean, returns None.
1284 """ 1284 """
1285 return _booleans.get(s.lower(), None) 1285 return _booleans.get(s.lower(), None)
1286
1287 class url(object):
1288 """Reliable URL parser.
1289
1290 This parses URLs and provides attributes for the following
1291 components:
1292
1293 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
1294
1295 Missing components are set to None. The only exception is
1296 fragment, which is set to '' if present but empty.
1297
1298 If parsefragment is False, fragment is included in query. If
1299 parsequery is False, query is included in path. If both are
1300 False, both fragment and query are included in path.
1301
1302 See http://www.ietf.org/rfc/rfc2396.txt for more information.
1303
1304 Note that for backward compatibility reasons, bundle URLs do not
1305 take host names. That means 'bundle://../' has a path of '../'.
1306
1307 Examples:
1308
1309 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
1310 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
1311 >>> url('ssh://[::1]:2200//home/joe/repo')
1312 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
1313 >>> url('file:///home/joe/repo')
1314 <url scheme: 'file', path: '/home/joe/repo'>
1315 >>> url('bundle:foo')
1316 <url scheme: 'bundle', path: 'foo'>
1317 >>> url('bundle://../foo')
1318 <url scheme: 'bundle', path: '../foo'>
1319 >>> url('c:\\\\foo\\\\bar')
1320 <url path: 'c:\\\\foo\\\\bar'>
1321
1322 Authentication credentials:
1323
1324 >>> url('ssh://joe:xyz@x/repo')
1325 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
1326 >>> url('ssh://joe@x/repo')
1327 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
1328
1329 Query strings and fragments:
1330
1331 >>> url('http://host/a?b#c')
1332 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
1333 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
1334 <url scheme: 'http', host: 'host', path: 'a?b#c'>
1335 """
1336
1337 _safechars = "!~*'()+"
1338 _safepchars = "/!~*'()+"
1339 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
1340
1341 def __init__(self, path, parsequery=True, parsefragment=True):
1342 # We slowly chomp away at path until we have only the path left
1343 self.scheme = self.user = self.passwd = self.host = None
1344 self.port = self.path = self.query = self.fragment = None
1345 self._localpath = True
1346 self._hostport = ''
1347 self._origpath = path
1348
1349 # special case for Windows drive letters
1350 if hasdriveletter(path):
1351 self.path = path
1352 return
1353
1354 # For compatibility reasons, we can't handle bundle paths as
1355 # normal URLS
1356 if path.startswith('bundle:'):
1357 self.scheme = 'bundle'
1358 path = path[7:]
1359 if path.startswith('//'):
1360 path = path[2:]
1361 self.path = path
1362 return
1363
1364 if self._matchscheme(path):
1365 parts = path.split(':', 1)
1366 if parts[0]:
1367 self.scheme, path = parts
1368 self._localpath = False
1369
1370 if not path:
1371 path = None
1372 if self._localpath:
1373 self.path = ''
1374 return
1375 else:
1376 if parsefragment and '#' in path:
1377 path, self.fragment = path.split('#', 1)
1378 if not path:
1379 path = None
1380 if self._localpath:
1381 self.path = path
1382 return
1383
1384 if parsequery and '?' in path:
1385 path, self.query = path.split('?', 1)
1386 if not path:
1387 path = None
1388 if not self.query:
1389 self.query = None
1390
1391 # // is required to specify a host/authority
1392 if path and path.startswith('//'):
1393 parts = path[2:].split('/', 1)
1394 if len(parts) > 1:
1395 self.host, path = parts
1396 path = path
1397 else:
1398 self.host = parts[0]
1399 path = None
1400 if not self.host:
1401 self.host = None
1402 if path:
1403 path = '/' + path
1404
1405 if self.host and '@' in self.host:
1406 self.user, self.host = self.host.rsplit('@', 1)
1407 if ':' in self.user:
1408 self.user, self.passwd = self.user.split(':', 1)
1409 if not self.host:
1410 self.host = None
1411
1412 # Don't split on colons in IPv6 addresses without ports
1413 if (self.host and ':' in self.host and
1414 not (self.host.startswith('[') and self.host.endswith(']'))):
1415 self._hostport = self.host
1416 self.host, self.port = self.host.rsplit(':', 1)
1417 if not self.host:
1418 self.host = None
1419
1420 if (self.host and self.scheme == 'file' and
1421 self.host not in ('localhost', '127.0.0.1', '[::1]')):
1422 raise Abort(_('file:// URLs can only refer to localhost'))
1423
1424 self.path = path
1425
1426 for a in ('user', 'passwd', 'host', 'port',
1427 'path', 'query', 'fragment'):
1428 v = getattr(self, a)
1429 if v is not None:
1430 setattr(self, a, urllib.unquote(v))
1431
1432 def __repr__(self):
1433 attrs = []
1434 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
1435 'query', 'fragment'):
1436 v = getattr(self, a)
1437 if v is not None:
1438 attrs.append('%s: %r' % (a, v))
1439 return '<url %s>' % ', '.join(attrs)
1440
1441 def __str__(self):
1442 """Join the URL's components back into a URL string.
1443
1444 Examples:
1445
1446 >>> str(url('http://user:pw@host:80/?foo#bar'))
1447 'http://user:pw@host:80/?foo#bar'
1448 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
1449 'ssh://user:pw@[::1]:2200//home/joe#'
1450 >>> str(url('http://localhost:80//'))
1451 'http://localhost:80//'
1452 >>> str(url('http://localhost:80/'))
1453 'http://localhost:80/'
1454 >>> str(url('http://localhost:80'))
1455 'http://localhost:80/'
1456 >>> str(url('bundle:foo'))
1457 'bundle:foo'
1458 >>> str(url('bundle://../foo'))
1459 'bundle:../foo'
1460 >>> str(url('path'))
1461 'path'
1462 """
1463 if self._localpath:
1464 s = self.path
1465 if self.scheme == 'bundle':
1466 s = 'bundle:' + s
1467 if self.fragment:
1468 s += '#' + self.fragment
1469 return s
1470
1471 s = self.scheme + ':'
1472 if (self.user or self.passwd or self.host or
1473 self.scheme and not self.path):
1474 s += '//'
1475 if self.user:
1476 s += urllib.quote(self.user, safe=self._safechars)
1477 if self.passwd:
1478 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
1479 if self.user or self.passwd:
1480 s += '@'
1481 if self.host:
1482 if not (self.host.startswith('[') and self.host.endswith(']')):
1483 s += urllib.quote(self.host)
1484 else:
1485 s += self.host
1486 if self.port:
1487 s += ':' + urllib.quote(self.port)
1488 if self.host:
1489 s += '/'
1490 if self.path:
1491 s += urllib.quote(self.path, safe=self._safepchars)
1492 if self.query:
1493 s += '?' + urllib.quote(self.query, safe=self._safepchars)
1494 if self.fragment is not None:
1495 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
1496 return s
1497
1498 def authinfo(self):
1499 user, passwd = self.user, self.passwd
1500 try:
1501 self.user, self.passwd = None, None
1502 s = str(self)
1503 finally:
1504 self.user, self.passwd = user, passwd
1505 if not self.user:
1506 return (s, None)
1507 return (s, (None, (str(self), self.host),
1508 self.user, self.passwd or ''))
1509
1510 def localpath(self):
1511 if self.scheme == 'file' or self.scheme == 'bundle':
1512 path = self.path or '/'
1513 # For Windows, we need to promote hosts containing drive
1514 # letters to paths with drive letters.
1515 if hasdriveletter(self._hostport):
1516 path = self._hostport + '/' + self.path
1517 elif self.host is not None and self.path:
1518 path = '/' + path
1519 # We also need to handle the case of file:///C:/, which
1520 # should return C:/, not /C:/.
1521 elif hasdriveletter(path):
1522 # Strip leading slash from paths with drive names
1523 return path[1:]
1524 return path
1525 return self._origpath
1526
1527 def hasscheme(path):
1528 return bool(url(path).scheme)
1529
1530 def hasdriveletter(path):
1531 return path[1:2] == ':' and path[0:1].isalpha()
1532
1533 def localpath(path):
1534 return url(path, parsequery=False, parsefragment=False).localpath()
1535
1536 def hidepassword(u):
1537 '''hide user credential in a url string'''
1538 u = url(u)
1539 if u.passwd:
1540 u.passwd = '***'
1541 return str(u)
1542
1543 def removeauth(u):
1544 '''remove all authentication information from a url string'''
1545 u = url(u)
1546 u.user = u.passwd = None
1547 return str(u)