Mercurial > hg
changeset 38332:7b12a2d2eedc
py3: ditch email.parser.BytesParser which appears to be plain crap
As I said before, BytesParser is a thin wrapper over the unicode Parser,
and it's too thin to return bytes back. Today, I found it does normalize
newline characters to '\n's thanks to the careless use of TextIOWrapper.
So, this patch replaces BytesParser with Parser + TextIOWrapper, and fix
newline handling. Since I don't know what's the least bad encoding strategy
here, I just copied it from BytesParser.
I've moved new parse() function from pycompat, as it is no longer a trivial
wrapper.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 16 Jun 2018 19:31:07 +0900 |
parents | cf59de802883 |
children | bb7e3c6ef592 |
files | contrib/python3-whitelist mercurial/mail.py mercurial/patch.py mercurial/pycompat.py |
diffstat | 4 files changed, 22 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/python3-whitelist Sat Jun 16 17:56:37 2018 +0900 +++ b/contrib/python3-whitelist Sat Jun 16 19:31:07 2018 +0900 @@ -203,6 +203,7 @@ test-hybridencode.py test-identify.t test-import-bypass.t +test-import-eol.t test-import-merge.t test-import-unknown.t test-import.t
--- a/mercurial/mail.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/mail.py Sat Jun 16 19:31:07 2018 +0900 @@ -11,6 +11,8 @@ import email.charset import email.header import email.message +import email.parser +import io import os import smtplib import socket @@ -322,6 +324,23 @@ s, cs = _encode(ui, s, charsets) return mimetextqp(s, 'plain', cs) +if pycompat.ispy3: + def parse(fp): + ep = email.parser.Parser() + # disable the "universal newlines" mode, which isn't binary safe. + # I have no idea if ascii/surrogateescape is correct, but that's + # what the standard Python email parser does. + fp = io.TextIOWrapper(fp, encoding=r'ascii', + errors=r'surrogateescape', newline=chr(10)) + try: + return ep.parse(fp) + finally: + fp.detach() +else: + def parse(fp): + ep = email.parser.Parser() + return ep.parse(fp) + def headdecode(s): '''Decodes RFC-2047 header''' uparts = []
--- a/mercurial/patch.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/patch.py Sat Jun 16 19:31:07 2018 +0900 @@ -112,7 +112,7 @@ cur.append(line) c = chunk(cur) - m = pycompat.emailparser().parse(c) + m = mail.parse(c) if not m.is_multipart(): yield msgfp(m) else: @@ -230,7 +230,7 @@ data = {} - msg = pycompat.emailparser().parse(fileobj) + msg = mail.parse(fileobj) subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject']) data['user'] = msg[r'From'] and mail.headdecode(msg[r'From'])
--- a/mercurial/pycompat.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/pycompat.py Sat Jun 16 19:31:07 2018 +0900 @@ -295,10 +295,6 @@ ret = shlex.split(s.decode('latin-1'), comments, posix) return [a.encode('latin-1') for a in ret] - def emailparser(*args, **kwargs): - import email.parser - return email.parser.BytesParser(*args, **kwargs) - else: import cStringIO @@ -371,10 +367,6 @@ rawinput = raw_input getargspec = inspect.getargspec - def emailparser(*args, **kwargs): - import email.parser - return email.parser.Parser(*args, **kwargs) - isjython = sysplatform.startswith('java') isdarwin = sysplatform == 'darwin'