py3: ditch email.parser.BytesParser which appears to be plain crap
As I said before, BytesParser is a thin wrapper over the unicode Parser,
and it's too thin to return bytes back. Today, I found it does normalize
newline characters to '\n's thanks to the careless use of TextIOWrapper.
So, this patch replaces BytesParser with Parser + TextIOWrapper, and fix
newline handling. Since I don't know what's the least bad encoding strategy
here, I just copied it from BytesParser.
I've moved new parse() function from pycompat, as it is no longer a trivial
wrapper.
--- a/contrib/python3-whitelist Sat Jun 16 17:56:37 2018 +0900
+++ b/contrib/python3-whitelist Sat Jun 16 19:31:07 2018 +0900
@@ -203,6 +203,7 @@
test-hybridencode.py
test-identify.t
test-import-bypass.t
+test-import-eol.t
test-import-merge.t
test-import-unknown.t
test-import.t
--- a/mercurial/mail.py Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/mail.py Sat Jun 16 19:31:07 2018 +0900
@@ -11,6 +11,8 @@
import email.charset
import email.header
import email.message
+import email.parser
+import io
import os
import smtplib
import socket
@@ -322,6 +324,23 @@
s, cs = _encode(ui, s, charsets)
return mimetextqp(s, 'plain', cs)
+if pycompat.ispy3:
+ def parse(fp):
+ ep = email.parser.Parser()
+ # disable the "universal newlines" mode, which isn't binary safe.
+ # I have no idea if ascii/surrogateescape is correct, but that's
+ # what the standard Python email parser does.
+ fp = io.TextIOWrapper(fp, encoding=r'ascii',
+ errors=r'surrogateescape', newline=chr(10))
+ try:
+ return ep.parse(fp)
+ finally:
+ fp.detach()
+else:
+ def parse(fp):
+ ep = email.parser.Parser()
+ return ep.parse(fp)
+
def headdecode(s):
'''Decodes RFC-2047 header'''
uparts = []
--- a/mercurial/patch.py Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/patch.py Sat Jun 16 19:31:07 2018 +0900
@@ -112,7 +112,7 @@
cur.append(line)
c = chunk(cur)
- m = pycompat.emailparser().parse(c)
+ m = mail.parse(c)
if not m.is_multipart():
yield msgfp(m)
else:
@@ -230,7 +230,7 @@
data = {}
- msg = pycompat.emailparser().parse(fileobj)
+ msg = mail.parse(fileobj)
subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject'])
data['user'] = msg[r'From'] and mail.headdecode(msg[r'From'])
--- a/mercurial/pycompat.py Sat Jun 16 17:56:37 2018 +0900
+++ b/mercurial/pycompat.py Sat Jun 16 19:31:07 2018 +0900
@@ -295,10 +295,6 @@
ret = shlex.split(s.decode('latin-1'), comments, posix)
return [a.encode('latin-1') for a in ret]
- def emailparser(*args, **kwargs):
- import email.parser
- return email.parser.BytesParser(*args, **kwargs)
-
else:
import cStringIO
@@ -371,10 +367,6 @@
rawinput = raw_input
getargspec = inspect.getargspec
- def emailparser(*args, **kwargs):
- import email.parser
- return email.parser.Parser(*args, **kwargs)
-
isjython = sysplatform.startswith('java')
isdarwin = sysplatform == 'darwin'