# HG changeset patch # User Brodie Rao # Date 1301618253 25200 # Node ID b2798c1deffffe4fff4034e0b3e6c56a28aa76de # Parent ddcb57a2eaeb50c068a20d420f562dec52b388b5 url: be stricter about detecting schemes While the URL parser is very forgiving about what characters are allowed in each component, it's useful to be strict about the scheme so we don't accidentally interpret local paths with colons as URLs. This restricts schemes to containing alphanumeric characters, dashes, pluses, and dots (as specified in RFC 2396). diff -r ddcb57a2eaeb -r b2798c1defff mercurial/url.py --- a/mercurial/url.py Fri Apr 01 11:45:29 2011 -0500 +++ b/mercurial/url.py Thu Mar 31 17:37:33 2011 -0700 @@ -7,7 +7,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -import urllib, urllib2, httplib, os, socket, cStringIO +import urllib, urllib2, httplib, os, socket, cStringIO, re import __builtin__ from i18n import _ import keepalive, util @@ -64,6 +64,7 @@ _safechars = "!~*'()+" _safepchars = "/!~*'()+" + _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match def __init__(self, path, parsequery=True, parsefragment=True): # We slowly chomp away at path until we have only the path left @@ -88,7 +89,7 @@ self.path = path return - if not path.startswith('/') and ':' in path: + if self._matchscheme(path): parts = path.split(':', 1) if parts[0]: self.scheme, path = parts diff -r ddcb57a2eaeb -r b2798c1defff tests/test-url.py --- a/tests/test-url.py Fri Apr 01 11:45:29 2011 -0500 +++ b/tests/test-url.py Thu Mar 31 17:37:33 2011 -0700 @@ -157,6 +157,12 @@ >>> url('/x///z/y/') + >>> url('/foo:bar') + + >>> url('\\\\foo:bar') + + >>> url('./foo:bar') + Non-localhost file URL: