changeset 30032:2219f4f82ede

pycompat: extract function that converts attribute or encoding name to str This will be used to convert encoding.encoding to a str acceptable by Python 3 functions. The source encoding is changed to "latin-1" because encoding.encoding can have arbitrary bytes. Since valid names should consist of ASCII characters, we don't care about the mapping of non-ASCII characters so long as invalid names are distinct from valid names.
author Yuya Nishihara <yuya@tcha.org>
date Wed, 28 Sep 2016 22:32:09 +0900
parents 0f6d6fdd3c2a
children 02dbfaa6df0b
files mercurial/pycompat.py
diffstat 1 files changed, 17 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/pycompat.py	Wed Sep 28 20:01:23 2016 +0900
+++ b/mercurial/pycompat.py	Wed Sep 28 22:32:09 2016 +0900
@@ -35,12 +35,22 @@
     import builtins
     import functools
 
+    def sysstr(s):
+        """Return a keyword str to be passed to Python functions such as
+        getattr() and str.encode()
+
+        This never raises UnicodeDecodeError. Non-ascii characters are
+        considered invalid and mapped to arbitrary but unique code points
+        such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
+        """
+        if isinstance(s, builtins.str):
+            return s
+        return s.decode(u'latin-1')
+
     def _wrapattrfunc(f):
         @functools.wraps(f)
         def w(object, name, *args):
-            if isinstance(name, bytes):
-                name = name.decode(u'utf-8')
-            return f(object, name, *args)
+            return f(object, sysstr(name), *args)
         return w
 
     # these wrappers are automagically imported by hgloader
@@ -50,6 +60,10 @@
     setattr = _wrapattrfunc(builtins.setattr)
     xrange = builtins.range
 
+else:
+    def sysstr(s):
+        return s
+
 stringio = io.StringIO
 empty = _queue.Empty
 queue = _queue.Queue