# HG changeset patch # User Yuya Nishihara # Date 1529145067 -32400 # Node ID 7b12a2d2eedc995405187cdf9a35736a14d60706 # Parent cf59de8028832768ad62d66d1339c7bafe7d9eb8 py3: ditch email.parser.BytesParser which appears to be plain crap As I said before, BytesParser is a thin wrapper over the unicode Parser, and it's too thin to return bytes back. Today, I found it does normalize newline characters to '\n's thanks to the careless use of TextIOWrapper. So, this patch replaces BytesParser with Parser + TextIOWrapper, and fix newline handling. Since I don't know what's the least bad encoding strategy here, I just copied it from BytesParser. I've moved new parse() function from pycompat, as it is no longer a trivial wrapper. diff -r cf59de802883 -r 7b12a2d2eedc contrib/python3-whitelist --- a/contrib/python3-whitelist Sat Jun 16 17:56:37 2018 +0900 +++ b/contrib/python3-whitelist Sat Jun 16 19:31:07 2018 +0900 @@ -203,6 +203,7 @@ test-hybridencode.py test-identify.t test-import-bypass.t +test-import-eol.t test-import-merge.t test-import-unknown.t test-import.t diff -r cf59de802883 -r 7b12a2d2eedc mercurial/mail.py --- a/mercurial/mail.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/mail.py Sat Jun 16 19:31:07 2018 +0900 @@ -11,6 +11,8 @@ import email.charset import email.header import email.message +import email.parser +import io import os import smtplib import socket @@ -322,6 +324,23 @@ s, cs = _encode(ui, s, charsets) return mimetextqp(s, 'plain', cs) +if pycompat.ispy3: + def parse(fp): + ep = email.parser.Parser() + # disable the "universal newlines" mode, which isn't binary safe. + # I have no idea if ascii/surrogateescape is correct, but that's + # what the standard Python email parser does. + fp = io.TextIOWrapper(fp, encoding=r'ascii', + errors=r'surrogateescape', newline=chr(10)) + try: + return ep.parse(fp) + finally: + fp.detach() +else: + def parse(fp): + ep = email.parser.Parser() + return ep.parse(fp) + def headdecode(s): '''Decodes RFC-2047 header''' uparts = [] diff -r cf59de802883 -r 7b12a2d2eedc mercurial/patch.py --- a/mercurial/patch.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/patch.py Sat Jun 16 19:31:07 2018 +0900 @@ -112,7 +112,7 @@ cur.append(line) c = chunk(cur) - m = pycompat.emailparser().parse(c) + m = mail.parse(c) if not m.is_multipart(): yield msgfp(m) else: @@ -230,7 +230,7 @@ data = {} - msg = pycompat.emailparser().parse(fileobj) + msg = mail.parse(fileobj) subject = msg[r'Subject'] and mail.headdecode(msg[r'Subject']) data['user'] = msg[r'From'] and mail.headdecode(msg[r'From']) diff -r cf59de802883 -r 7b12a2d2eedc mercurial/pycompat.py --- a/mercurial/pycompat.py Sat Jun 16 17:56:37 2018 +0900 +++ b/mercurial/pycompat.py Sat Jun 16 19:31:07 2018 +0900 @@ -295,10 +295,6 @@ ret = shlex.split(s.decode('latin-1'), comments, posix) return [a.encode('latin-1') for a in ret] - def emailparser(*args, **kwargs): - import email.parser - return email.parser.BytesParser(*args, **kwargs) - else: import cStringIO @@ -371,10 +367,6 @@ rawinput = raw_input getargspec = inspect.getargspec - def emailparser(*args, **kwargs): - import email.parser - return email.parser.Parser(*args, **kwargs) - isjython = sysplatform.startswith('java') isdarwin = sysplatform == 'darwin'