mail: let all charset values be native strings
authorDenis Laxalde <denis@laxalde.org>
Tue, 12 Nov 2019 22:52:30 +0100
changeset 43626 bdb0ddab7bb3
parent 43625 8d9e2c2b6058
child 43627 af3e341dbf03
mail: let all charset values be native strings Charset values will typically be used to build email.header.Header instances, which takes str (though it tolerates bytes) or passed to decode()/encode() methods of string values (which want str). It seems that using native str involves less conversions than before and this also helps type hinting (as illustrates removal of pytype disabling instructions).
mercurial/mail.py
--- a/mercurial/mail.py	Wed Nov 13 22:20:44 2019 +0100
+++ b/mercurial/mail.py	Tue Nov 12 22:52:30 2019 +0100
@@ -253,17 +253,13 @@
 
 
 def codec2iana(cs):
-    # type: (bytes) -> bytes
+    # type: (str) -> str
     ''''''
-    cs = pycompat.sysbytes(
-        email.charset.Charset(
-            cs  # pytype: disable=wrong-arg-types
-        ).input_charset.lower()
-    )
+    cs = email.charset.Charset(cs).input_charset.lower()
 
     # "latin1" normalizes to "iso8859-1", standard calls for "iso-8859-1"
-    if cs.startswith(b"iso") and not cs.startswith(b"iso-"):
-        return b"iso-" + cs[3:]
+    if cs.startswith("iso") and not cs.startswith("iso-"):
+        return "iso-" + cs[3:]
     return cs
 
 
@@ -275,27 +271,30 @@
     ISO-8859-1, an encoding with that allows all byte sequences.
     Transfer encodings will be used if necessary.'''
 
-    cs = [b'us-ascii', b'utf-8', encoding.encoding, encoding.fallbackencoding]
+    cs = [
+        'us-ascii',
+        'utf-8',
+        pycompat.sysstr(encoding.encoding),
+        pycompat.sysstr(encoding.fallbackencoding),
+    ]
     if display:
-        cs = [b'us-ascii']
+        cs = ['us-ascii']
     for charset in cs:
         try:
-            s.decode(pycompat.sysstr(charset))
+            s.decode(charset)
             return mimetextqp(s, subtype, codec2iana(charset))
         except UnicodeDecodeError:
             pass
 
-    return mimetextqp(s, subtype, b"iso-8859-1")
+    return mimetextqp(s, subtype, "iso-8859-1")
 
 
 def mimetextqp(body, subtype, charset):
-    # type: (bytes, bytes, bytes) -> email.message.Message
+    # type: (bytes, bytes, str) -> email.message.Message
     '''Return MIME message.
     Quoted-printable transfer encoding will be used if necessary.
     '''
-    # Experimentally charset is okay as a bytes even if the type
-    # stubs disagree.
-    cs = email.charset.Charset(charset)  # pytype: disable=wrong-arg-types
+    cs = email.charset.Charset(charset)
     msg = email.message.Message()
     msg.set_type(pycompat.sysstr(b'text/' + subtype))
 
@@ -317,24 +316,25 @@
 
 
 def _charsets(ui):
-    # type: (Any) -> List[bytes]
+    # type: (Any) -> List[str]
     '''Obtains charsets to send mail parts not containing patches.'''
     charsets = [
-        cs.lower() for cs in ui.configlist(b'email', b'charsets')
-    ]  # type: List[bytes]
+        pycompat.sysstr(cs.lower())
+        for cs in ui.configlist(b'email', b'charsets')
+    ]
     fallbacks = [
-        encoding.fallbackencoding.lower(),
-        encoding.encoding.lower(),
-        b'utf-8',
-    ]  # type: List[bytes]
+        pycompat.sysstr(encoding.fallbackencoding.lower()),
+        pycompat.sysstr(encoding.encoding.lower()),
+        'utf-8',
+    ]
     for cs in fallbacks:  # find unique charsets while keeping order
         if cs not in charsets:
             charsets.append(cs)
-    return [cs for cs in charsets if not cs.endswith(b'ascii')]
+    return [cs for cs in charsets if not cs.endswith('ascii')]
 
 
 def _encode(ui, s, charsets):
-    # type: (Any, bytes, List[bytes]) -> Tuple[bytes, bytes]
+    # type: (Any, bytes, List[str]) -> Tuple[bytes, str]
     '''Returns (converted) string, charset tuple.
     Finds out best charset by cycling through sendcharsets in descending
     order. Tries both encoding and fallbackencoding for input. Only as
@@ -347,14 +347,17 @@
         # wants, and fall back to garbage-in-ascii.
         for ocs in sendcharsets:
             try:
-                return s.encode(pycompat.sysstr(ocs)), ocs
+                return s.encode(ocs), ocs
             except UnicodeEncodeError:
                 pass
             except LookupError:
-                ui.warn(_(b'ignoring invalid sendcharset: %s\n') % ocs)
+                ui.warn(
+                    _(b'ignoring invalid sendcharset: %s\n')
+                    % pycompat.sysbytes(ocs)
+                )
         else:
             # Everything failed, ascii-armor what we've got and send it.
-            return s.encode('ascii', 'backslashreplace'), b'us-ascii'
+            return s.encode('ascii', 'backslashreplace'), 'us-ascii'
     # We have a bytes of unknown encoding. We'll try and guess a valid
     # encoding, falling back to pretending we had ascii even though we
     # know that's wrong.
@@ -369,29 +372,30 @@
                 continue
             for ocs in sendcharsets:
                 try:
-                    return u.encode(pycompat.sysstr(ocs)), ocs
+                    return u.encode(ocs), ocs
                 except UnicodeEncodeError:
                     pass
                 except LookupError:
-                    ui.warn(_(b'ignoring invalid sendcharset: %s\n') % ocs)
+                    ui.warn(
+                        _(b'ignoring invalid sendcharset: %s\n')
+                        % pycompat.sysbytes(ocs)
+                    )
     # if ascii, or all conversion attempts fail, send (broken) ascii
-    return s, b'us-ascii'
+    return s, 'us-ascii'
 
 
 def headencode(ui, s, charsets=None, display=False):
-    # type: (Any, Union[bytes, str], List[bytes], bool) -> str
+    # type: (Any, Union[bytes, str], List[str], bool) -> str
     '''Returns RFC-2047 compliant header from given string.'''
     if not display:
         # split into words?
         s, cs = _encode(ui, s, charsets)
-        return email.header.Header(
-            s, cs  # pytype: disable=wrong-arg-types
-        ).encode()
+        return email.header.Header(s, cs).encode()
     return encoding.strfromlocal(s)
 
 
 def _addressencode(ui, name, addr, charsets=None):
-    # type: (Any, str, bytes, List[bytes]) -> str
+    # type: (Any, str, bytes, List[str]) -> str
     assert isinstance(addr, bytes)
     name = headencode(ui, name, charsets)
     try:
@@ -411,7 +415,7 @@
 
 
 def addressencode(ui, address, charsets=None, display=False):
-    # type: (Any, bytes, List[bytes], bool) -> str
+    # type: (Any, bytes, List[str], bool) -> str
     '''Turns address into RFC-2047 compliant header.'''
     if display or not address:
         return encoding.strfromlocal(address or b'')
@@ -420,7 +424,7 @@
 
 
 def addrlistencode(ui, addrs, charsets=None, display=False):
-    # type: (Any, List[bytes], List[bytes], bool) -> List[str]
+    # type: (Any, List[bytes], List[str], bool) -> List[str]
     '''Turns a list of addresses into a list of RFC-2047 compliant headers.
     A single element of input list may contain multiple addresses, but output
     always has one address per item'''
@@ -440,10 +444,10 @@
 
 
 def mimeencode(ui, s, charsets=None, display=False):
-    # type: (Any, bytes, List[bytes], bool) -> email.message.Message
+    # type: (Any, bytes, List[str], bool) -> email.message.Message
     '''creates mime text object, encodes it if needed, and sets
     charset and transfer-encoding accordingly.'''
-    cs = b'us-ascii'
+    cs = 'us-ascii'
     if not display:
         s, cs = _encode(ui, s, charsets)
     return mimetextqp(s, b'plain', cs)