patch: when importing from email, RFC2047-decode From/Subject headers
Reported at https://bugs.debian.org/737498
--- a/mercurial/mail.py Wed Mar 02 22:39:03 2016 +0000
+++ b/mercurial/mail.py Thu Mar 03 18:34:19 2016 +0100
@@ -332,3 +332,21 @@
if not display:
s, cs = _encode(ui, s, charsets)
return mimetextqp(s, 'plain', cs)
+
+def headdecode(s):
+ '''Decodes RFC-2047 header'''
+ uparts = []
+ for part, charset in email.Header.decode_header(s):
+ if charset is not None:
+ try:
+ uparts.append(part.decode(charset))
+ continue
+ except UnicodeDecodeError:
+ pass
+ try:
+ uparts.append(part.decode('UTF-8'))
+ continue
+ except UnicodeDecodeError:
+ pass
+ uparts.append(part.decode('ISO-8859-1'))
+ return encoding.tolocal(u' '.join(uparts).encode('UTF-8'))
--- a/mercurial/patch.py Wed Mar 02 22:39:03 2016 +0000
+++ b/mercurial/patch.py Thu Mar 03 18:34:19 2016 +0100
@@ -31,6 +31,7 @@
diffhelpers,
encoding,
error,
+ mail,
mdiff,
pathutil,
scmutil,
@@ -210,8 +211,8 @@
try:
msg = email.Parser.Parser().parse(fileobj)
- subject = msg['Subject']
- data['user'] = msg['From']
+ subject = msg['Subject'] and mail.headdecode(msg['Subject'])
+ data['user'] = msg['From'] and mail.headdecode(msg['From'])
if not subject and not data['user']:
# Not an email, restore parsed headers if any
subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
--- a/tests/test-import-git.t Wed Mar 02 22:39:03 2016 +0000
+++ b/tests/test-import-git.t Thu Mar 03 18:34:19 2016 +0100
@@ -822,4 +822,27 @@
> EOF
applying patch from stdin
+Test email metadata
+
+ $ hg revert -qa
+ $ hg --encoding utf-8 import - <<EOF
+ > From: =?UTF-8?q?Rapha=C3=ABl=20Hertzog?= <hertzog@debian.org>
+ > Subject: [PATCH] =?UTF-8?q?=C5=A7=E2=82=AC=C3=9F=E1=B9=AA?=
+ >
+ > diff --git a/a b/a
+ > --- a/a
+ > +++ b/a
+ > @@ -1,1 +1,2 @@
+ > a
+ > +a
+ > EOF
+ applying patch from stdin
+ $ hg --encoding utf-8 log -r .
+ changeset: 2:* (glob)
+ tag: tip
+ user: Rapha\xc3\xabl Hertzog <hertzog@debian.org> (esc)
+ date: * (glob)
+ summary: \xc5\xa7\xe2\x82\xac\xc3\x9f\xe1\xb9\xaa (esc)
+
+
$ cd ..