--- a/.hgsigs Thu Jun 18 15:13:38 2020 +0200
+++ b/.hgsigs Wed Jul 01 14:28:12 2020 -0400
@@ -197,3 +197,4 @@
26ce8e7515036d3431a03aaeb7bc72dd96cb1112 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6YlRUVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6Z3YP/iOqphn99v0z2OupCl0q8CepbcdZMJWW3j00OAHYSO43M0FULpMpzC2o+kZDeqeLyzN7DsjoGts2cUnAOe9WX73sPkX1n1dbiDcUSsRqNND+tCkEZMtTn4DaGNIq1zSkkm8Q7O/1uwZPnX6FaIRMBs9qGbdfmMPNEvzny2tgrKc3ra1+AA8RCdtsbpqhjy+xf+EKVB/SMsQVVSJEgPkUkW6PwpaspdrxQKgZrb7C7Jx/gRVzMTUmCQe1sVCSnZNO3I/woAqDY2UNg7/hBubeRh/EjoH1o4ONTXgBQdYCl7QdcwDHpDc2HstonrFq51qxBecHDVw+ZKQds63Ixtxuab3SK0o/SWabZ1v8bGaWnyWnRWXL/1qkyFWly+fjEGGlv1kHl3n0UmwlUY8FQJCYDZgR0FqQGXAF3vMJOEp82ysk6jWN/7NRzcnoUC7HpNo1jPMiPRjskgVf3bhErfUQnhlF1YsVu/jPTixyfftbiaZmwILMkaPF8Kg3Cyf63p2cdcnTHdbP1U6ncR+BucthlbFei4WL0J2iERb8TBeCxOyCHlEUq8kampjbmPXN7VxnK4oX3xeBTf8mMbvrD5Fv3svRD+SkCCKu/MwQvB1VT6q425TSKHbCWeNqGjVLvetpx+skVH7eaXLEQ3wlCfo/0OQTRimx2O73EnOF5r8Q2POm
cf3e07d7648a4371ce584d15dd692e7a6845792f 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6sS5sVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6FQcP/1usy9WxajBppBZ54ep+qesxufLoux5qkRU7j4XZ0Id4/IcKQZeik0C/0mFMjc+dYhQDGpDiuXCADKMv5h2DCIoaWUC0GueVtVkPhhMW3zMg/BmepV7dhUuipfQ4fck8gYuaBOclunLX1MFd+CS/6BQ6XIrsKasnx9WrbO2JpieBXv+8I5mslChaZf2AxeIvUVb2BkKqsCD0rqbIjTjtfHWJpaH6spFa7XX/BZWeEYz2Nc6LVJNZY0AmvJh8ebpoGOx85dokRIEAzTmBh04SbkChi+350ki6MvG3Ax+3yrUZVc1PJtBDreL7dMs7Y3ENafSMhKnBrRaPVMyUHEm2Ygn4cmJ1YiGw4OWha1n7dtRW/uI96lXKDt8iLAQ4WBRojPhYNl4L3b6/6voCgpZUOpd7PgTRc3/00siCmYIOQzAO0HkDsALoNpk8LcCxpPFYTr8dF3bSsAT9fuaLNV6tI2ofbRLXh0gFXYdaWu10eVRrSMUMiH7n3H6EpzLa4sNdyFrK0vU4aSTlBERcjj2rj86dY0XQQL181V7Yhg8m8nyj+BzraRh7et2UXNsVosOnbTa1XX0qFVu+qAVp2BeqC4k31jm0MJk+1pDzkuAPs07z3ITwkDmTHjzxm5qoZyZ1/n37BB6miD+8xJYNH7vBX/yrDW790HbloasQOcXcerNR
065704cbdbdbb05dcd6bb814eb9bbdd982211b28 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl7amzkVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6AKEP/26Hoe8VqkuGwU0ZDsK6YgErXEPs8xtgZ9A2iouDkIqw2dm1TDmWnB5X8XaWmhAWFMUdjcqd1ZZJrAyD0p13xUOm3D+hlDXYTd2INkLwS8cVu22czZ5eoxtPkjuGYlPvek9b3vrrejkZ4vpamdS3iSvIx+TzvEW+w5eZFh9s1a9gR77hcZZoir24vtM9MsNnnBuI/5/fdWkhBoe17HSU4II56ckNXDrGO0nuqrWDxPr64WAcz6EmlTGc+cUqOM45Uc0sCr3GNQGEm6VCAw5oXq2Vt9O6sjgExLxr8zdud6w5hl9b8h2MrxyisgcnVR7efbumaRuNb8QZZPzk5QqlRxbaEcStyIXzAdar4fArQUY2vrmv1WyLJR3S/G3p8QkyWYL3CZNKjCAVxSa5ytS5Dr/bM2sWaEnIHqq+W6DOagpWV4uRRnwaId9tB9b0KBoFElXZRlaq0FlNYG8RLg65ZlkF+lj6RACO23epxapadcJwibDQiNYX20mcSEFDkSEgECnLQBecA2WZvw134RRbL3vuvB49SKS0ZEJ95myXMZa9kyIJY/g+oAFBuyZeK9O8DwGii0zFDOi6VWDTZzc3/15RRS6ehqQyYrLQntYtVGwHpxnUrp2kBjk3hDIvaYOcFbTnhTGcQCzckFnIZN2oxr5YZOI+Fpfak6RQTVhnHh0/
+0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl78z0gVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6IrkP/2m/DJ93BR/SljCFe7KnExrDTzDI/i69x+ljomRZJmMRa86zRkclgd5L49woExDd1ZGebUY650V16adKNmVpz2rS6bQOgEr2NBD5fL+GiTX6UJ1VMgmQ8x1m8DYuI8pfBWbqQuZIl1vCEc0RmT3tHLZ7T8XgG9RXa4XielI2uhyimJPyZsE1K7c8Fa6UakH++DhYFBj+3QYbwS2fFDdA29L/4N5JLUzHkIbF7tPg7P1RBk+vhopKz9MMIu4S95LU+Gk7eQ3FfE8Jnv959hX2o/B2sdT2tEPIuDRSxZhSKLdlGbMy5IZvc/bZ+a5jlb2w23tlpfgzQxNarFqpX/weiJCtsxzeMXQHEVFG/+VuIOIYbfILWzySFcnSvcAtmNXExxH2F9j+XmQkLysnsgIfplNVEEIgZDBPGAkAQ+lH7UrEdw31ciSrCDsjXDaPQWcmk4zkfrXlwN7R9zJguJ+OuZ/Ga7NXWdZAC+YkPSKAfCesdUefcesyiresO8GEk9DyRNQsX/gl5BjEeuqYyUsve5541IMqscvdosg6HrU/RrmeR7sM7tZrDwCWdOWu/GdFatQ+k6zArSrMTKUBztzV93MIwUHDrnd+7OOYDfAuqGy7oM2KoW0Jp8sS2hotIJZ9a+VGwQcxCJ93I5sVT6ePBdmBoIAFW+rbncnD+E/RvVpl
--- a/.hgtags Thu Jun 18 15:13:38 2020 +0200
+++ b/.hgtags Wed Jul 01 14:28:12 2020 -0400
@@ -210,3 +210,4 @@
26ce8e7515036d3431a03aaeb7bc72dd96cb1112 5.4rc0
cf3e07d7648a4371ce584d15dd692e7a6845792f 5.4
065704cbdbdbb05dcd6bb814eb9bbdd982211b28 5.4.1
+0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 5.4.2
--- a/hgext/convert/subversion.py Thu Jun 18 15:13:38 2020 +0200
+++ b/hgext/convert/subversion.py Wed Jul 01 14:28:12 2020 -0400
@@ -3,6 +3,8 @@
# Copyright(C) 2007 Daniel Holth et al
from __future__ import absolute_import
+import codecs
+import locale
import os
import re
import xml.dom.minidom
@@ -63,6 +65,38 @@
svn = None
+# In Subversion, paths and URLs are Unicode (encoded as UTF-8), which
+# Subversion converts from / to native strings when interfacing with the OS.
+# When passing paths and URLs to Subversion, we have to recode them such that
+# it roundstrips with what Subversion is doing.
+
+fsencoding = None
+
+
+def init_fsencoding():
+ global fsencoding, fsencoding_is_utf8
+ if fsencoding is not None:
+ return
+ if pycompat.iswindows:
+ # On Windows, filenames are Unicode, but we store them using the MBCS
+ # encoding.
+ fsencoding = 'mbcs'
+ else:
+ # This is the encoding used to convert UTF-8 back to natively-encoded
+ # strings in Subversion 1.14.0 or earlier with APR 1.7.0 or earlier.
+ with util.with_lc_ctype():
+ fsencoding = locale.nl_langinfo(locale.CODESET) or 'ISO-8859-1'
+ fsencoding = codecs.lookup(fsencoding).name
+ fsencoding_is_utf8 = fsencoding == codecs.lookup('utf-8').name
+
+
+def fs2svn(s):
+ if fsencoding_is_utf8:
+ return s
+ else:
+ return s.decode(fsencoding).encode('utf-8')
+
+
class SvnPathNotFound(Exception):
pass
@@ -106,8 +140,15 @@
def geturl(path):
+ """Convert path or URL to a SVN URL, encoded in UTF-8.
+
+ This can raise UnicodeDecodeError if the path or URL can't be converted to
+ unicode using `fsencoding`.
+ """
try:
- return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
+ return svn.client.url_from_path(
+ svn.core.svn_path_canonicalize(fs2svn(path))
+ )
except svn.core.SubversionException:
# svn.client.url_from_path() fails with local repositories
pass
@@ -117,7 +158,7 @@
path = b'/' + util.normpath(path)
# Module URL is later compared with the repository URL returned
# by svn API, which is UTF-8.
- path = encoding.tolocal(path)
+ path = fs2svn(path)
path = b'file://%s' % quote(path)
return svn.core.svn_path_canonicalize(path)
@@ -284,7 +325,9 @@
def httpcheck(ui, path, proto):
try:
opener = urlreq.buildopener()
- rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
+ rsp = opener.open(
+ pycompat.strurl(b'%s://%s/!svn/ver/0/.svn' % (proto, path)), b'rb'
+ )
data = rsp.read()
except urlerr.httperror as inst:
if inst.code != 404:
@@ -311,6 +354,32 @@
}
+class NonUtf8PercentEncodedBytes(Exception):
+ pass
+
+
+# Subversion paths are Unicode. Since the percent-decoding is done on
+# UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
+def url2pathname_like_subversion(unicodepath):
+ if pycompat.ispy3:
+ # On Python 3, we have to pass unicode to urlreq.url2pathname().
+ # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
+ # handler.
+ unicodepath = urlreq.url2pathname(unicodepath)
+ if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
+ raise NonUtf8PercentEncodedBytes
+ else:
+ return unicodepath
+ else:
+ # If we passed unicode on Python 2, it would be converted using the
+ # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
+ unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
+ try:
+ return unicodepath.decode('utf-8')
+ except UnicodeDecodeError:
+ raise NonUtf8PercentEncodedBytes
+
+
def issvnurl(ui, url):
try:
proto, path = url.split(b'://', 1)
@@ -322,31 +391,58 @@
and path[2:6].lower() == b'%3a/'
):
path = path[:2] + b':/' + path[6:]
- # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
- # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
- # py3 will decode percent-encoded bytes using the utf-8 encoding
- # and the "replace" error handler. This means that it will not
- # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
- # url.open() uses the reverse function (urlreq.pathname2url()) and
- # has a similar problem
- # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
- # sense to solve both problems together and handle all file URLs
- # consistently. For now, we warn.
- unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
- if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
+ try:
+ unicodepath = path.decode(fsencoding)
+ except UnicodeDecodeError:
ui.warn(
_(
- b'on Python 3, we currently do not support non-UTF-8 '
- b'percent-encoded bytes in file URLs for Subversion '
- b'repositories\n'
+ b'Subversion requires that file URLs can be converted '
+ b'to Unicode using the current locale encoding (%s)\n'
+ )
+ % pycompat.sysbytes(fsencoding)
+ )
+ return False
+ try:
+ unicodepath = url2pathname_like_subversion(unicodepath)
+ except NonUtf8PercentEncodedBytes:
+ ui.warn(
+ _(
+ b'Subversion does not support non-UTF-8 '
+ b'percent-encoded bytes in file URLs\n'
)
)
- path = pycompat.fsencode(unicodepath)
+ return False
+ # Below, we approximate how Subversion checks the path. On Unix, we
+ # should therefore convert the path to bytes using `fsencoding`
+ # (like Subversion does). On Windows, the right thing would
+ # actually be to leave the path as unicode. For now, we restrict
+ # the path to MBCS.
+ path = unicodepath.encode(fsencoding)
except ValueError:
proto = b'file'
path = os.path.abspath(url)
+ try:
+ path.decode(fsencoding)
+ except UnicodeDecodeError:
+ ui.warn(
+ _(
+ b'Subversion requires that paths can be converted to '
+ b'Unicode using the current locale encoding (%s)\n'
+ )
+ % pycompat.sysbytes(fsencoding)
+ )
+ return False
if proto == b'file':
path = util.pconvert(path)
+ elif proto in (b'http', 'https'):
+ if not encoding.isasciistr(path):
+ ui.warn(
+ _(
+ b"Subversion sources don't support non-ASCII characters in "
+ b"HTTP(S) URLs. Please percent-encode them.\n"
+ )
+ )
+ return False
check = protomap.get(proto, lambda *args: False)
while b'/' in path:
if check(ui, path, proto):
@@ -373,6 +469,7 @@
def __init__(self, ui, repotype, url, revs=None):
super(svn_source, self).__init__(ui, repotype, url, revs=revs)
+ init_fsencoding()
if not (
url.startswith(b'svn://')
or url.startswith(b'svn+ssh://')
--- a/tests/test-convert-svn-encoding.t Thu Jun 18 15:13:38 2020 +0200
+++ b/tests/test-convert-svn-encoding.t Wed Jul 01 14:28:12 2020 -0400
@@ -153,22 +153,65 @@
$ cd ..
-#if py3
-For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
-bytes in a filename.
+Subversion sources don't support non-ASCII characters in HTTP(S) URLs.
+
+ $ XFF=$($PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")')
+ $ hg convert --source-type=svn http://localhost:$HGPORT/$XFF test
+ initializing destination test repository
+ Subversion sources don't support non-ASCII characters in HTTP(S) URLs. Please percent-encode them.
+ http://localhost:$HGPORT/\xff does not look like a Subversion repository (esc)
+ abort: http://localhost:$HGPORT/\xff: missing or unsupported repository (esc)
+ [255]
+
+In Subversion, paths are Unicode (encoded as UTF-8). Therefore paths that can't
+be converted between UTF-8 and the locale encoding (which is always ASCII in
+tests) don't work.
- $ hg convert file:///%ff test
+ $ cp -R svn-repo $XFF
+ $ hg convert $XFF test
+ initializing destination test repository
+ Subversion requires that paths can be converted to Unicode using the current locale encoding (ascii)
+ \xff does not look like a CVS checkout (glob) (esc)
+ $TESTTMP/\xff does not look like a Git repository (esc)
+ \xff does not look like a Subversion repository (glob) (esc)
+ \xff is not a local Mercurial repository (glob) (esc)
+ \xff does not look like a darcs repository (glob) (esc)
+ \xff does not look like a monotone repository (glob) (esc)
+ \xff does not look like a GNU Arch repository (glob) (esc)
+ \xff does not look like a Bazaar repository (glob) (esc)
+ cannot find required "p4" tool
+ abort: \xff: missing or unsupported repository (glob) (esc)
+ [255]
+ $ hg convert file://$TESTTMP/$XFF test
initializing destination test repository
- on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
- file:///%ff does not look like a CVS checkout
- $TESTTMP/file:/%ff does not look like a Git repository
- file:///%ff does not look like a Subversion repository
- file:///%ff is not a local Mercurial repository
- file:///%ff does not look like a darcs repository
- file:///%ff does not look like a monotone repository
- file:///%ff does not look like a GNU Arch repository
- file:///%ff does not look like a Bazaar repository
- file:///%ff does not look like a P4 repository
- abort: file:///%ff: missing or unsupported repository
+ Subversion requires that file URLs can be converted to Unicode using the current locale encoding (ascii)
+ file:/*/$TESTTMP/\xff does not look like a CVS checkout (glob) (esc)
+ $TESTTMP/file:$TESTTMP/\xff does not look like a Git repository (esc)
+ file:/*/$TESTTMP/\xff does not look like a Subversion repository (glob) (esc)
+ file:/*/$TESTTMP/\xff is not a local Mercurial repository (glob) (esc)
+ file:/*/$TESTTMP/\xff does not look like a darcs repository (glob) (esc)
+ file:/*/$TESTTMP/\xff does not look like a monotone repository (glob) (esc)
+ file:/*/$TESTTMP/\xff does not look like a GNU Arch repository (glob) (esc)
+ file:/*/$TESTTMP/\xff does not look like a Bazaar repository (glob) (esc)
+ file:/*/$TESTTMP/\xff does not look like a P4 repository (glob) (esc)
+ abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
[255]
-#endif
+
+Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded
+string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion
+would choke on them when converting them to the locale encoding.
+
+ $ hg convert file://$TESTTMP/%FF test
+ initializing destination test repository
+ Subversion does not support non-UTF-8 percent-encoded bytes in file URLs
+ file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
+ $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
+ file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
+ file:/*/$TESTTMP/%FF is not a local Mercurial repository (glob)
+ file:/*/$TESTTMP/%FF does not look like a darcs repository (glob)
+ file:/*/$TESTTMP/%FF does not look like a monotone repository (glob)
+ file:/*/$TESTTMP/%FF does not look like a GNU Arch repository (glob)
+ file:/*/$TESTTMP/%FF does not look like a Bazaar repository (glob)
+ file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
+ abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
+ [255]