changeset 38341:7b12a2d2eedc

py3: ditch email.parser.BytesParser which appears to be plain crap As I said before, BytesParser is a thin wrapper over the unicode Parser, and it's too thin to return bytes back. Today, I found it does normalize newline characters to '\n's thanks to the careless use of TextIOWrapper. So, this patch replaces BytesParser with Parser + TextIOWrapper, and fix newline handling. Since I don't know what's the least bad encoding strategy here, I just copied it from BytesParser. I've moved new parse() function from pycompat, as it is no longer a trivial wrapper.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 16 Jun 2018 19:31:07 +0900
parents cf59de802883
children bb7e3c6ef592
files contrib/python3-whitelist mercurial/mail.py mercurial/patch.py mercurial/pycompat.py
diffstat 4 files changed, 22 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/python3-whitelist	Sat Jun 16 17:56:37 2018 +0900
+++ b/contrib/python3-whitelist	Sat Jun 16 19:31:07 2018 +0900
@@ -203,6 +203,7 @@
 test-hybridencode.py
 test-identify.t
 test-import-bypass.t
+test-import-eol.t
 test-import-merge.t
 test-import-unknown.t
 test-import.t
--- a/mercurial/mail.py	Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/mail.py	Sat Jun 16 19:31:07 2018 +0900
@@ -11,6 +11,8 @@
 import email.charset
 import email.header
 import email.message
+import email.parser
+import io
 import os
 import smtplib
 import socket
@@ -322,6 +324,23 @@
         s, cs = _encode(ui, s, charsets)
     return mimetextqp(s, 'plain', cs)
 
+if pycompat.ispy3:
+    def parse(fp):
+        ep = email.parser.Parser()
+        # disable the "universal newlines" mode, which isn't binary safe.
+        # I have no idea if ascii/surrogateescape is correct, but that's
+        # what the standard Python email parser does.
+        fp = io.TextIOWrapper(fp, encoding=r'ascii',
+                              errors=r'surrogateescape', newline=chr(10))
+        try:
+            return ep.parse(fp)
+        finally:
+            fp.detach()
+else:
+    def parse(fp):
+        ep = email.parser.Parser()
+        return ep.parse(fp)
+
 def headdecode(s):
     '''Decodes RFC-2047 header'''
     uparts = []
--- a/mercurial/patch.py	Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/patch.py	Sat Jun 16 19:31:07 2018 +0900
@@ -112,7 +112,7 @@
             cur.append(line)
         c = chunk(cur)
 
-        m = pycompat.emailparser().parse(c)
+        m = mail.parse(c)
         if not m.is_multipart():
             yield msgfp(m)
         else:
@@ -230,7 +230,7 @@
 
     data = {}
 
-    msg = pycompat.emailparser().parse(fileobj)
+    msg = mail.parse(fileobj)
 
     subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject'])
     data['user'] = msg[r'From'] and mail.headdecode(msg[r'From'])
--- a/mercurial/pycompat.py	Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/pycompat.py	Sat Jun 16 19:31:07 2018 +0900
@@ -295,10 +295,6 @@
         ret = shlex.split(s.decode('latin-1'), comments, posix)
         return [a.encode('latin-1') for a in ret]
 
-    def emailparser(*args, **kwargs):
-        import email.parser
-        return email.parser.BytesParser(*args, **kwargs)
-
 else:
     import cStringIO
 
@@ -371,10 +367,6 @@
     rawinput = raw_input
     getargspec = inspect.getargspec
 
-    def emailparser(*args, **kwargs):
-        import email.parser
-        return email.parser.Parser(*args, **kwargs)
-
 isjython = sysplatform.startswith('java')
 
 isdarwin = sysplatform == 'darwin'