diff mercurial/encoding.py @ 37990:57b0c7221dba

encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it If 's' is a localstr, 's._utf8' must be returned to get the original UTF-8 sequence back. Because of this, it was totally wrong to test if '"\xed" not in s', which should be either '"\xed" not in s._utf8' or just omitted. This patch moves the localstr handling to top as the validity of 's._utf8' should be pre-checked by encoding.tolocal().
author Yuya Nishihara <yuya@tcha.org>
date Sun, 22 Apr 2018 11:38:53 +0900
parents d4c760c997cd
children 3ea3c96ada54
line wrap: on
line diff
--- a/mercurial/encoding.py	Sun Mar 25 16:47:33 2018 +0900
+++ b/mercurial/encoding.py	Sun Apr 22 11:38:53 2018 +0900
@@ -504,11 +504,13 @@
     internal surrogate encoding as a UTF-8 string.)
     '''
 
-    if not isinstance(s, localstr) and isasciistr(s):
+    if isinstance(s, localstr):
+        # assume that the original UTF-8 sequence would never contain
+        # invalid characters in U+DCxx range
+        return s._utf8
+    elif isasciistr(s):
         return s
     if "\xed" not in s:
-        if isinstance(s, localstr):
-            return s._utf8
         try:
             s.decode('utf-8', _utf8strict)
             return s