# HG changeset patch # User Manuel Jacob # Date 1592308820 -7200 # Node ID 75b59d221aa322cfd4b8ce55c41e9841654e0fb2 # Parent de7bdb0e2a95edda3bf609e203f77e5385f765fd py3: pass native string to urlreq.url2pathname() Of course, I’m not happy with the warning, but it’s better than crashing. Solving the problem properly is hard, and non-UTF-8 percent-encoded bytes in file URLs seem rare enough to block solving that all file URLs (even if not SVN-specific) will cause a crash. diff -r de7bdb0e2a95 -r 75b59d221aa3 hgext/convert/subversion.py --- a/hgext/convert/subversion.py Tue Jun 16 12:59:45 2020 +0200 +++ b/hgext/convert/subversion.py Tue Jun 16 14:00:20 2020 +0200 @@ -321,7 +321,26 @@ and path[2:6].lower() == b'%3a/' ): path = path[:2] + b':/' + path[6:] - path = urlreq.url2pathname(path) + # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes + # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on + # py3 will decode percent-encoded bytes using the utf-8 encoding + # and the "replace" error handler. This means that it will not + # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983). + # url.open() uses the reverse function (urlreq.pathname2url()) and + # has a similar problem + # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes + # sense to solve both problems together and handle all file URLs + # consistently. For now, we warn. + unicodepath = urlreq.url2pathname(pycompat.fsdecode(path)) + if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath: + ui.warn( + _( + b'on Python 3, we currently do not support non-UTF-8 ' + b'percent-encoded bytes in file URLs for Subversion ' + b'repositories\n' + ) + ) + path = pycompat.fsencode(unicodepath) except ValueError: proto = b'file' path = os.path.abspath(url) diff -r de7bdb0e2a95 -r 75b59d221aa3 tests/test-convert-svn-encoding.t --- a/tests/test-convert-svn-encoding.t Tue Jun 16 12:59:45 2020 +0200 +++ b/tests/test-convert-svn-encoding.t Tue Jun 16 14:00:20 2020 +0200 @@ -152,3 +152,23 @@ f7e66f98380ed1e53a797c5c7a7a2616a7ab377d branch\xc3\xa9 (esc) $ cd .. + +#if py3 +For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded +bytes in a filename. + + $ hg convert file:///%ff test + initializing destination test repository + on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories + file:///%ff does not look like a CVS checkout + $TESTTMP/file:/%ff does not look like a Git repository + file:///%ff does not look like a Subversion repository + file:///%ff is not a local Mercurial repository + file:///%ff does not look like a darcs repository + file:///%ff does not look like a monotone repository + file:///%ff does not look like a GNU Arch repository + file:///%ff does not look like a Bazaar repository + file:///%ff does not look like a P4 repository + abort: file:///%ff: missing or unsupported repository + [255] +#endif