changeset 45036:df3660cc60f5

merge with stable
author Augie Fackler <augie@google.com>
date Wed, 01 Jul 2020 14:28:12 -0400
parents 24b1a8eb73aa (current diff) 1fdc8cbd5fb8 (diff)
children b4b6ff83ed9c
files hgext/convert/subversion.py
diffstat 4 files changed, 177 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/.hgsigs	Thu Jun 18 15:13:38 2020 +0200
+++ b/.hgsigs	Wed Jul 01 14:28:12 2020 -0400
@@ -197,3 +197,4 @@
 26ce8e7515036d3431a03aaeb7bc72dd96cb1112 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6YlRUVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6Z3YP/iOqphn99v0z2OupCl0q8CepbcdZMJWW3j00OAHYSO43M0FULpMpzC2o+kZDeqeLyzN7DsjoGts2cUnAOe9WX73sPkX1n1dbiDcUSsRqNND+tCkEZMtTn4DaGNIq1zSkkm8Q7O/1uwZPnX6FaIRMBs9qGbdfmMPNEvzny2tgrKc3ra1+AA8RCdtsbpqhjy+xf+EKVB/SMsQVVSJEgPkUkW6PwpaspdrxQKgZrb7C7Jx/gRVzMTUmCQe1sVCSnZNO3I/woAqDY2UNg7/hBubeRh/EjoH1o4ONTXgBQdYCl7QdcwDHpDc2HstonrFq51qxBecHDVw+ZKQds63Ixtxuab3SK0o/SWabZ1v8bGaWnyWnRWXL/1qkyFWly+fjEGGlv1kHl3n0UmwlUY8FQJCYDZgR0FqQGXAF3vMJOEp82ysk6jWN/7NRzcnoUC7HpNo1jPMiPRjskgVf3bhErfUQnhlF1YsVu/jPTixyfftbiaZmwILMkaPF8Kg3Cyf63p2cdcnTHdbP1U6ncR+BucthlbFei4WL0J2iERb8TBeCxOyCHlEUq8kampjbmPXN7VxnK4oX3xeBTf8mMbvrD5Fv3svRD+SkCCKu/MwQvB1VT6q425TSKHbCWeNqGjVLvetpx+skVH7eaXLEQ3wlCfo/0OQTRimx2O73EnOF5r8Q2POm
 cf3e07d7648a4371ce584d15dd692e7a6845792f 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6sS5sVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6FQcP/1usy9WxajBppBZ54ep+qesxufLoux5qkRU7j4XZ0Id4/IcKQZeik0C/0mFMjc+dYhQDGpDiuXCADKMv5h2DCIoaWUC0GueVtVkPhhMW3zMg/BmepV7dhUuipfQ4fck8gYuaBOclunLX1MFd+CS/6BQ6XIrsKasnx9WrbO2JpieBXv+8I5mslChaZf2AxeIvUVb2BkKqsCD0rqbIjTjtfHWJpaH6spFa7XX/BZWeEYz2Nc6LVJNZY0AmvJh8ebpoGOx85dokRIEAzTmBh04SbkChi+350ki6MvG3Ax+3yrUZVc1PJtBDreL7dMs7Y3ENafSMhKnBrRaPVMyUHEm2Ygn4cmJ1YiGw4OWha1n7dtRW/uI96lXKDt8iLAQ4WBRojPhYNl4L3b6/6voCgpZUOpd7PgTRc3/00siCmYIOQzAO0HkDsALoNpk8LcCxpPFYTr8dF3bSsAT9fuaLNV6tI2ofbRLXh0gFXYdaWu10eVRrSMUMiH7n3H6EpzLa4sNdyFrK0vU4aSTlBERcjj2rj86dY0XQQL181V7Yhg8m8nyj+BzraRh7et2UXNsVosOnbTa1XX0qFVu+qAVp2BeqC4k31jm0MJk+1pDzkuAPs07z3ITwkDmTHjzxm5qoZyZ1/n37BB6miD+8xJYNH7vBX/yrDW790HbloasQOcXcerNR
 065704cbdbdbb05dcd6bb814eb9bbdd982211b28 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl7amzkVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6AKEP/26Hoe8VqkuGwU0ZDsK6YgErXEPs8xtgZ9A2iouDkIqw2dm1TDmWnB5X8XaWmhAWFMUdjcqd1ZZJrAyD0p13xUOm3D+hlDXYTd2INkLwS8cVu22czZ5eoxtPkjuGYlPvek9b3vrrejkZ4vpamdS3iSvIx+TzvEW+w5eZFh9s1a9gR77hcZZoir24vtM9MsNnnBuI/5/fdWkhBoe17HSU4II56ckNXDrGO0nuqrWDxPr64WAcz6EmlTGc+cUqOM45Uc0sCr3GNQGEm6VCAw5oXq2Vt9O6sjgExLxr8zdud6w5hl9b8h2MrxyisgcnVR7efbumaRuNb8QZZPzk5QqlRxbaEcStyIXzAdar4fArQUY2vrmv1WyLJR3S/G3p8QkyWYL3CZNKjCAVxSa5ytS5Dr/bM2sWaEnIHqq+W6DOagpWV4uRRnwaId9tB9b0KBoFElXZRlaq0FlNYG8RLg65ZlkF+lj6RACO23epxapadcJwibDQiNYX20mcSEFDkSEgECnLQBecA2WZvw134RRbL3vuvB49SKS0ZEJ95myXMZa9kyIJY/g+oAFBuyZeK9O8DwGii0zFDOi6VWDTZzc3/15RRS6ehqQyYrLQntYtVGwHpxnUrp2kBjk3hDIvaYOcFbTnhTGcQCzckFnIZN2oxr5YZOI+Fpfak6RQTVhnHh0/
+0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl78z0gVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6IrkP/2m/DJ93BR/SljCFe7KnExrDTzDI/i69x+ljomRZJmMRa86zRkclgd5L49woExDd1ZGebUY650V16adKNmVpz2rS6bQOgEr2NBD5fL+GiTX6UJ1VMgmQ8x1m8DYuI8pfBWbqQuZIl1vCEc0RmT3tHLZ7T8XgG9RXa4XielI2uhyimJPyZsE1K7c8Fa6UakH++DhYFBj+3QYbwS2fFDdA29L/4N5JLUzHkIbF7tPg7P1RBk+vhopKz9MMIu4S95LU+Gk7eQ3FfE8Jnv959hX2o/B2sdT2tEPIuDRSxZhSKLdlGbMy5IZvc/bZ+a5jlb2w23tlpfgzQxNarFqpX/weiJCtsxzeMXQHEVFG/+VuIOIYbfILWzySFcnSvcAtmNXExxH2F9j+XmQkLysnsgIfplNVEEIgZDBPGAkAQ+lH7UrEdw31ciSrCDsjXDaPQWcmk4zkfrXlwN7R9zJguJ+OuZ/Ga7NXWdZAC+YkPSKAfCesdUefcesyiresO8GEk9DyRNQsX/gl5BjEeuqYyUsve5541IMqscvdosg6HrU/RrmeR7sM7tZrDwCWdOWu/GdFatQ+k6zArSrMTKUBztzV93MIwUHDrnd+7OOYDfAuqGy7oM2KoW0Jp8sS2hotIJZ9a+VGwQcxCJ93I5sVT6ePBdmBoIAFW+rbncnD+E/RvVpl
--- a/.hgtags	Thu Jun 18 15:13:38 2020 +0200
+++ b/.hgtags	Wed Jul 01 14:28:12 2020 -0400
@@ -210,3 +210,4 @@
 26ce8e7515036d3431a03aaeb7bc72dd96cb1112 5.4rc0
 cf3e07d7648a4371ce584d15dd692e7a6845792f 5.4
 065704cbdbdbb05dcd6bb814eb9bbdd982211b28 5.4.1
+0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 5.4.2
--- a/hgext/convert/subversion.py	Thu Jun 18 15:13:38 2020 +0200
+++ b/hgext/convert/subversion.py	Wed Jul 01 14:28:12 2020 -0400
@@ -3,6 +3,8 @@
 # Copyright(C) 2007 Daniel Holth et al
 from __future__ import absolute_import
 
+import codecs
+import locale
 import os
 import re
 import xml.dom.minidom
@@ -63,6 +65,38 @@
     svn = None
 
 
+# In Subversion, paths and URLs are Unicode (encoded as UTF-8), which
+# Subversion converts from / to native strings when interfacing with the OS.
+# When passing paths and URLs to Subversion, we have to recode them such that
+# it roundstrips with what Subversion is doing.
+
+fsencoding = None
+
+
+def init_fsencoding():
+    global fsencoding, fsencoding_is_utf8
+    if fsencoding is not None:
+        return
+    if pycompat.iswindows:
+        # On Windows, filenames are Unicode, but we store them using the MBCS
+        # encoding.
+        fsencoding = 'mbcs'
+    else:
+        # This is the encoding used to convert UTF-8 back to natively-encoded
+        # strings in Subversion 1.14.0 or earlier with APR 1.7.0 or earlier.
+        with util.with_lc_ctype():
+            fsencoding = locale.nl_langinfo(locale.CODESET) or 'ISO-8859-1'
+    fsencoding = codecs.lookup(fsencoding).name
+    fsencoding_is_utf8 = fsencoding == codecs.lookup('utf-8').name
+
+
+def fs2svn(s):
+    if fsencoding_is_utf8:
+        return s
+    else:
+        return s.decode(fsencoding).encode('utf-8')
+
+
 class SvnPathNotFound(Exception):
     pass
 
@@ -106,8 +140,15 @@
 
 
 def geturl(path):
+    """Convert path or URL to a SVN URL, encoded in UTF-8.
+
+    This can raise UnicodeDecodeError if the path or URL can't be converted to
+    unicode using `fsencoding`.
+    """
     try:
-        return svn.client.url_from_path(svn.core.svn_path_canonicalize(path))
+        return svn.client.url_from_path(
+            svn.core.svn_path_canonicalize(fs2svn(path))
+        )
     except svn.core.SubversionException:
         # svn.client.url_from_path() fails with local repositories
         pass
@@ -117,7 +158,7 @@
             path = b'/' + util.normpath(path)
         # Module URL is later compared with the repository URL returned
         # by svn API, which is UTF-8.
-        path = encoding.tolocal(path)
+        path = fs2svn(path)
         path = b'file://%s' % quote(path)
     return svn.core.svn_path_canonicalize(path)
 
@@ -284,7 +325,9 @@
 def httpcheck(ui, path, proto):
     try:
         opener = urlreq.buildopener()
-        rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb')
+        rsp = opener.open(
+            pycompat.strurl(b'%s://%s/!svn/ver/0/.svn' % (proto, path)), b'rb'
+        )
         data = rsp.read()
     except urlerr.httperror as inst:
         if inst.code != 404:
@@ -311,6 +354,32 @@
 }
 
 
+class NonUtf8PercentEncodedBytes(Exception):
+    pass
+
+
+# Subversion paths are Unicode. Since the percent-decoding is done on
+# UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
+def url2pathname_like_subversion(unicodepath):
+    if pycompat.ispy3:
+        # On Python 3, we have to pass unicode to urlreq.url2pathname().
+        # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
+        # handler.
+        unicodepath = urlreq.url2pathname(unicodepath)
+        if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
+            raise NonUtf8PercentEncodedBytes
+        else:
+            return unicodepath
+    else:
+        # If we passed unicode on Python 2, it would be converted using the
+        # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
+        unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
+        try:
+            return unicodepath.decode('utf-8')
+        except UnicodeDecodeError:
+            raise NonUtf8PercentEncodedBytes
+
+
 def issvnurl(ui, url):
     try:
         proto, path = url.split(b'://', 1)
@@ -322,31 +391,58 @@
                 and path[2:6].lower() == b'%3a/'
             ):
                 path = path[:2] + b':/' + path[6:]
-            # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
-            # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
-            # py3 will decode percent-encoded bytes using the utf-8 encoding
-            # and the "replace" error handler. This means that it will not
-            # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
-            # url.open() uses the reverse function (urlreq.pathname2url()) and
-            # has a similar problem
-            # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
-            # sense to solve both problems together and handle all file URLs
-            # consistently. For now, we warn.
-            unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
-            if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
+            try:
+                unicodepath = path.decode(fsencoding)
+            except UnicodeDecodeError:
                 ui.warn(
                     _(
-                        b'on Python 3, we currently do not support non-UTF-8 '
-                        b'percent-encoded bytes in file URLs for Subversion '
-                        b'repositories\n'
+                        b'Subversion requires that file URLs can be converted '
+                        b'to Unicode using the current locale encoding (%s)\n'
+                    )
+                    % pycompat.sysbytes(fsencoding)
+                )
+                return False
+            try:
+                unicodepath = url2pathname_like_subversion(unicodepath)
+            except NonUtf8PercentEncodedBytes:
+                ui.warn(
+                    _(
+                        b'Subversion does not support non-UTF-8 '
+                        b'percent-encoded bytes in file URLs\n'
                     )
                 )
-            path = pycompat.fsencode(unicodepath)
+                return False
+            # Below, we approximate how Subversion checks the path. On Unix, we
+            # should therefore convert the path to bytes using `fsencoding`
+            # (like Subversion does). On Windows, the right thing would
+            # actually be to leave the path as unicode. For now, we restrict
+            # the path to MBCS.
+            path = unicodepath.encode(fsencoding)
     except ValueError:
         proto = b'file'
         path = os.path.abspath(url)
+        try:
+            path.decode(fsencoding)
+        except UnicodeDecodeError:
+            ui.warn(
+                _(
+                    b'Subversion requires that paths can be converted to '
+                    b'Unicode using the current locale encoding (%s)\n'
+                )
+                % pycompat.sysbytes(fsencoding)
+            )
+            return False
     if proto == b'file':
         path = util.pconvert(path)
+    elif proto in (b'http', 'https'):
+        if not encoding.isasciistr(path):
+            ui.warn(
+                _(
+                    b"Subversion sources don't support non-ASCII characters in "
+                    b"HTTP(S) URLs. Please percent-encode them.\n"
+                )
+            )
+            return False
     check = protomap.get(proto, lambda *args: False)
     while b'/' in path:
         if check(ui, path, proto):
@@ -373,6 +469,7 @@
     def __init__(self, ui, repotype, url, revs=None):
         super(svn_source, self).__init__(ui, repotype, url, revs=revs)
 
+        init_fsencoding()
         if not (
             url.startswith(b'svn://')
             or url.startswith(b'svn+ssh://')
--- a/tests/test-convert-svn-encoding.t	Thu Jun 18 15:13:38 2020 +0200
+++ b/tests/test-convert-svn-encoding.t	Wed Jul 01 14:28:12 2020 -0400
@@ -153,22 +153,65 @@
 
   $ cd ..
 
-#if py3
-For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
-bytes in a filename.
+Subversion sources don't support non-ASCII characters in HTTP(S) URLs.
+
+  $ XFF=$($PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")')
+  $ hg convert --source-type=svn http://localhost:$HGPORT/$XFF test
+  initializing destination test repository
+  Subversion sources don't support non-ASCII characters in HTTP(S) URLs. Please percent-encode them.
+  http://localhost:$HGPORT/\xff does not look like a Subversion repository (esc)
+  abort: http://localhost:$HGPORT/\xff: missing or unsupported repository (esc)
+  [255]
+
+In Subversion, paths are Unicode (encoded as UTF-8). Therefore paths that can't
+be converted between UTF-8 and the locale encoding (which is always ASCII in
+tests) don't work.
 
-  $ hg convert file:///%ff test
+  $ cp -R svn-repo $XFF
+  $ hg convert $XFF test
+  initializing destination test repository
+  Subversion requires that paths can be converted to Unicode using the current locale encoding (ascii)
+  \xff does not look like a CVS checkout (glob) (esc)
+  $TESTTMP/\xff does not look like a Git repository (esc)
+  \xff does not look like a Subversion repository (glob) (esc)
+  \xff is not a local Mercurial repository (glob) (esc)
+  \xff does not look like a darcs repository (glob) (esc)
+  \xff does not look like a monotone repository (glob) (esc)
+  \xff does not look like a GNU Arch repository (glob) (esc)
+  \xff does not look like a Bazaar repository (glob) (esc)
+  cannot find required "p4" tool
+  abort: \xff: missing or unsupported repository (glob) (esc)
+  [255]
+  $ hg convert file://$TESTTMP/$XFF test
   initializing destination test repository
-  on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
-  file:///%ff does not look like a CVS checkout
-  $TESTTMP/file:/%ff does not look like a Git repository
-  file:///%ff does not look like a Subversion repository
-  file:///%ff is not a local Mercurial repository
-  file:///%ff does not look like a darcs repository
-  file:///%ff does not look like a monotone repository
-  file:///%ff does not look like a GNU Arch repository
-  file:///%ff does not look like a Bazaar repository
-  file:///%ff does not look like a P4 repository
-  abort: file:///%ff: missing or unsupported repository
+  Subversion requires that file URLs can be converted to Unicode using the current locale encoding (ascii)
+  file:/*/$TESTTMP/\xff does not look like a CVS checkout (glob) (esc)
+  $TESTTMP/file:$TESTTMP/\xff does not look like a Git repository (esc)
+  file:/*/$TESTTMP/\xff does not look like a Subversion repository (glob) (esc)
+  file:/*/$TESTTMP/\xff is not a local Mercurial repository (glob) (esc)
+  file:/*/$TESTTMP/\xff does not look like a darcs repository (glob) (esc)
+  file:/*/$TESTTMP/\xff does not look like a monotone repository (glob) (esc)
+  file:/*/$TESTTMP/\xff does not look like a GNU Arch repository (glob) (esc)
+  file:/*/$TESTTMP/\xff does not look like a Bazaar repository (glob) (esc)
+  file:/*/$TESTTMP/\xff does not look like a P4 repository (glob) (esc)
+  abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
   [255]
-#endif
+
+Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded
+string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion
+would choke on them when converting them to the locale encoding.
+
+  $ hg convert file://$TESTTMP/%FF test
+  initializing destination test repository
+  Subversion does not support non-UTF-8 percent-encoded bytes in file URLs
+  file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
+  $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
+  file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
+  file:/*/$TESTTMP/%FF is not a local Mercurial repository (glob)
+  file:/*/$TESTTMP/%FF does not look like a darcs repository (glob)
+  file:/*/$TESTTMP/%FF does not look like a monotone repository (glob)
+  file:/*/$TESTTMP/%FF does not look like a GNU Arch repository (glob)
+  file:/*/$TESTTMP/%FF does not look like a Bazaar repository (glob)
+  file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
+  abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
+  [255]