comparison mercurial/url.py @ 13848:b2798c1defff

url: be stricter about detecting schemes While the URL parser is very forgiving about what characters are allowed in each component, it's useful to be strict about the scheme so we don't accidentally interpret local paths with colons as URLs. This restricts schemes to containing alphanumeric characters, dashes, pluses, and dots (as specified in RFC 2396).
author Brodie Rao <brodie@bitheap.org>
date Thu, 31 Mar 2011 17:37:33 -0700
parents f1823b9f073b
children fab10e7cacd6
comparison
equal deleted inserted replaced
13845:ddcb57a2eaeb 13848:b2798c1defff
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 # 6 #
7 # This software may be used and distributed according to the terms of the 7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version. 8 # GNU General Public License version 2 or any later version.
9 9
10 import urllib, urllib2, httplib, os, socket, cStringIO 10 import urllib, urllib2, httplib, os, socket, cStringIO, re
11 import __builtin__ 11 import __builtin__
12 from i18n import _ 12 from i18n import _
13 import keepalive, util 13 import keepalive, util
14 14
15 class url(object): 15 class url(object):
62 <url scheme: 'http', host: 'host', path: 'a?b#c'> 62 <url scheme: 'http', host: 'host', path: 'a?b#c'>
63 """ 63 """
64 64
65 _safechars = "!~*'()+" 65 _safechars = "!~*'()+"
66 _safepchars = "/!~*'()+" 66 _safepchars = "/!~*'()+"
67 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
67 68
68 def __init__(self, path, parsequery=True, parsefragment=True): 69 def __init__(self, path, parsequery=True, parsefragment=True):
69 # We slowly chomp away at path until we have only the path left 70 # We slowly chomp away at path until we have only the path left
70 self.scheme = self.user = self.passwd = self.host = None 71 self.scheme = self.user = self.passwd = self.host = None
71 self.port = self.path = self.query = self.fragment = None 72 self.port = self.path = self.query = self.fragment = None
86 if path.startswith('//'): 87 if path.startswith('//'):
87 path = path[2:] 88 path = path[2:]
88 self.path = path 89 self.path = path
89 return 90 return
90 91
91 if not path.startswith('/') and ':' in path: 92 if self._matchscheme(path):
92 parts = path.split(':', 1) 93 parts = path.split(':', 1)
93 if parts[0]: 94 if parts[0]:
94 self.scheme, path = parts 95 self.scheme, path = parts
95 self._localpath = False 96 self._localpath = False
96 97