mercurial/encoding.py
changeset 43076 2372284d9457
parent 41836 25694a78e4a4
child 43077 687b865b95ad
--- a/mercurial/encoding.py	Sat Oct 05 10:29:34 2019 -0400
+++ b/mercurial/encoding.py	Sun Oct 06 09:45:02 2019 -0400
@@ -17,9 +17,7 @@
     pycompat,
 )
 
-from .pure import (
-    charencode as charencodepure,
-)
+from .pure import charencode as charencodepure
 
 charencode = policy.importmod(r'charencode')
 
@@ -36,12 +34,15 @@
 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
 # "Unicode Subtleties"), so we need to ignore them in some places for
 # sanity.
-_ignore = [unichr(int(x, 16)).encode("utf-8") for x in
-           "200c 200d 200e 200f 202a 202b 202c 202d 202e "
-           "206a 206b 206c 206d 206e 206f feff".split()]
+_ignore = [
+    unichr(int(x, 16)).encode("utf-8")
+    for x in "200c 200d 200e 200f 202a 202b 202c 202d 202e "
+    "206a 206b 206c 206d 206e 206f feff".split()
+]
 # verify the next function will work
 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
 
+
 def hfsignoreclean(s):
     """Remove codepoints ignored by HFS+ from s.
 
@@ -55,9 +56,10 @@
             s = s.replace(c, '')
     return s
 
+
 # encoding.environ is provided read-only, which may not be used to modify
 # the process environment
-_nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
+_nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ
 if not pycompat.ispy3:
     environ = os.environ  # re-exports
 elif _nativeenviron:
@@ -65,8 +67,10 @@
 else:
     # preferred encoding isn't known yet; use utf-8 to avoid unicode error
     # and recreate it once encoding is settled
-    environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8'))
-                   for k, v in os.environ.items())  # re-exports
+    environ = dict(
+        (k.encode(r'utf-8'), v.encode(r'utf-8'))
+        for k, v in os.environ.items()  # re-exports
+    )
 
 _encodingrewrites = {
     '646': 'ascii',
@@ -88,15 +92,19 @@
 encodingmode = environ.get("HGENCODINGMODE", "strict")
 fallbackencoding = 'ISO-8859-1'
 
+
 class localstr(bytes):
     '''This class allows strings that are unmodified to be
     round-tripped to the local encoding and back'''
+
     def __new__(cls, u, l):
         s = bytes.__new__(cls, l)
         s._utf8 = u
         return s
+
     def __hash__(self):
-        return hash(self._utf8) # avoid collisions in local string space
+        return hash(self._utf8)  # avoid collisions in local string space
+
 
 class safelocalstr(bytes):
     """Tagged string denoting it was previously an internal UTF-8 string,
@@ -108,6 +116,7 @@
     >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0}
     """
 
+
 def tolocal(s):
     """
     Convert a string from internal UTF-8 to local encoding
@@ -167,12 +176,13 @@
                     return safelocalstr(r)
                 return localstr(u.encode('UTF-8'), r)
             except UnicodeDecodeError:
-                u = s.decode("utf-8", "replace") # last ditch
+                u = s.decode("utf-8", "replace")  # last ditch
                 # can't round-trip
                 return u.encode(_sysstr(encoding), r"replace")
     except LookupError as k:
         raise error.Abort(k, hint="please check your locale settings")
 
+
 def fromlocal(s):
     """
     Convert a string from the local character encoding to UTF-8
@@ -194,27 +204,34 @@
         u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
         return u.encode("utf-8")
     except UnicodeDecodeError as inst:
-        sub = s[max(0, inst.start - 10):inst.start + 10]
-        raise error.Abort("decoding near '%s': %s!"
-                          % (sub, pycompat.bytestr(inst)))
+        sub = s[max(0, inst.start - 10) : inst.start + 10]
+        raise error.Abort(
+            "decoding near '%s': %s!" % (sub, pycompat.bytestr(inst))
+        )
     except LookupError as k:
         raise error.Abort(k, hint="please check your locale settings")
 
+
 def unitolocal(u):
     """Convert a unicode string to a byte string of local encoding"""
     return tolocal(u.encode('utf-8'))
 
+
 def unifromlocal(s):
     """Convert a byte string of local encoding to a unicode string"""
     return fromlocal(s).decode('utf-8')
 
+
 def unimethod(bytesfunc):
     """Create a proxy method that forwards __unicode__() and __str__() of
     Python 3 to __bytes__()"""
+
     def unifunc(obj):
         return unifromlocal(bytesfunc(obj))
+
     return unifunc
 
+
 # converter functions between native str and byte string. use these if the
 # character encoding is not aware (e.g. exception message) or is known to
 # be locale dependent (e.g. date formatting.)
@@ -230,8 +247,10 @@
 if not _nativeenviron:
     # now encoding and helper functions are available, recreate the environ
     # dict to be exported to other modules
-    environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8')))
-                   for k, v in os.environ.items())  # re-exports
+    environ = dict(
+        (tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8')))
+        for k, v in os.environ.items()  # re-exports
+    )
 
 if pycompat.ispy3:
     # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
@@ -246,13 +265,16 @@
     getcwd = os.getcwd  # re-exports
 
 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
-_wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
-                and "WFA" or "WF")
+_wide = _sysstr(
+    environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" and "WFA" or "WF"
+)
+
 
 def colwidth(s):
     "Find the column width of a string for display in the local encoding"
     return ucolwidth(s.decode(_sysstr(encoding), r'replace'))
 
+
 def ucolwidth(d):
     "Find the column width of a Unicode string for display"
     eaw = getattr(unicodedata, 'east_asian_width', None)
@@ -260,6 +282,7 @@
         return sum([eaw(c) in _wide and 2 or 1 for c in d])
     return len(d)
 
+
 def getcols(s, start, c):
     '''Use colwidth to find a c-column substring of s starting at byte
     index start'''
@@ -268,6 +291,7 @@
         if colwidth(t) == c:
             return t
 
+
 def trim(s, width, ellipsis='', leftside=False):
     """Trim string 's' to at most 'width' columns (including 'ellipsis').
 
@@ -336,21 +360,21 @@
     try:
         u = s.decode(_sysstr(encoding))
     except UnicodeDecodeError:
-        if len(s) <= width: # trimming is not needed
+        if len(s) <= width:  # trimming is not needed
             return s
         width -= len(ellipsis)
-        if width <= 0: # no enough room even for ellipsis
-            return ellipsis[:width + len(ellipsis)]
+        if width <= 0:  # no enough room even for ellipsis
+            return ellipsis[: width + len(ellipsis)]
         if leftside:
             return ellipsis + s[-width:]
         return s[:width] + ellipsis
 
-    if ucolwidth(u) <= width: # trimming is not needed
+    if ucolwidth(u) <= width:  # trimming is not needed
         return s
 
     width -= len(ellipsis)
-    if width <= 0: # no enough room even for ellipsis
-        return ellipsis[:width + len(ellipsis)]
+    if width <= 0:  # no enough room even for ellipsis
+        return ellipsis[: width + len(ellipsis)]
 
     if leftside:
         uslice = lambda i: u[i:]
@@ -362,7 +386,8 @@
         usub = uslice(i)
         if ucolwidth(usub) <= width:
             return concat(usub.encode(_sysstr(encoding)))
-    return ellipsis # no enough room for multi-column characters
+    return ellipsis  # no enough room for multi-column characters
+
 
 def lower(s):
     "best-effort encoding-aware case-folding of local string s"
@@ -378,13 +403,14 @@
 
         lu = u.lower()
         if u == lu:
-            return s # preserve localstring
+            return s  # preserve localstring
         return lu.encode(_sysstr(encoding))
     except UnicodeError:
-        return s.lower() # we don't know how to fold this except in ASCII
+        return s.lower()  # we don't know how to fold this except in ASCII
     except LookupError as k:
         raise error.Abort(k, hint="please check your locale settings")
 
+
 def upper(s):
     "best-effort encoding-aware case-folding of local string s"
     try:
@@ -392,6 +418,7 @@
     except UnicodeDecodeError:
         return upperfallback(s)
 
+
 def upperfallback(s):
     try:
         if isinstance(s, localstr):
@@ -401,13 +428,14 @@
 
         uu = u.upper()
         if u == uu:
-            return s # preserve localstring
+            return s  # preserve localstring
         return uu.encode(_sysstr(encoding))
     except UnicodeError:
-        return s.upper() # we don't know how to fold this except in ASCII
+        return s.upper()  # we don't know how to fold this except in ASCII
     except LookupError as k:
         raise error.Abort(k, hint="please check your locale settings")
 
+
 class normcasespecs(object):
     '''what a platform's normcase does to ASCII strings
 
@@ -419,10 +447,12 @@
     other: the fallback function should always be called
 
     This should be kept in sync with normcase_spec in util.h.'''
+
     lower = -1
     upper = 1
     other = 0
 
+
 def jsonescape(s, paranoid=False):
     '''returns a string suitable for JSON
 
@@ -475,6 +505,7 @@
         pass
     return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
 
+
 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8
 # bytes are mapped to that range.
 if pycompat.ispy3:
@@ -484,6 +515,7 @@
 
 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
 
+
 def getutf8char(s, pos):
     '''get the next full utf-8 character in the given string, starting at pos
 
@@ -492,15 +524,16 @@
     '''
 
     # find how many bytes to attempt decoding from first nibble
-    l = _utf8len[ord(s[pos:pos + 1]) >> 4]
-    if not l: # ascii
-        return s[pos:pos + 1]
+    l = _utf8len[ord(s[pos : pos + 1]) >> 4]
+    if not l:  # ascii
+        return s[pos : pos + 1]
 
-    c = s[pos:pos + l]
+    c = s[pos : pos + l]
     # validate with attempted decode
     c.decode("utf-8", _utf8strict)
     return c
 
+
 def toutf8b(s):
     '''convert a local, possibly-binary string into UTF-8b
 
@@ -558,16 +591,17 @@
             c = getutf8char(s, pos)
             if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
                 # have to re-escape existing U+DCxx characters
-                c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict)
+                c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict)
                 pos += 1
             else:
                 pos += len(c)
         except UnicodeDecodeError:
-            c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict)
+            c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict)
             pos += 1
         r += c
     return r
 
+
 def fromutf8b(s):
     '''Given a UTF-8b string, return a local, possibly-binary string.
 
@@ -611,6 +645,6 @@
         pos += len(c)
         # unescape U+DCxx characters
         if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
-            c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xff)
+            c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF)
         r += c
     return r