# HG changeset patch # User Julien Cristau <julien.cris...@logilab.fr> # Date 1457026459 -3600 # Thu Mar 03 18:34:19 2016 +0100 # Node ID 6c153cbad4a032861417dbba9d1d90332964ab5f # Parent 549ff28a345f595cad7e06fb08c2ac6973e2f030 patch: when importing from email, RFC2047-decode From/Subject headers
I'm not too sure about the Subject part: it should be possible to use the charset information from the email (RFC2047 encoding and the Content-Type header), but mercurial seems to use its own encoding instead (in the test, that means the commit message ends up as "????" if the import is done without --encoding utf-8). Advice welcome. Reported at https://bugs.debian.org/737498 diff --git a/mercurial/patch.py b/mercurial/patch.py --- a/mercurial/patch.py +++ b/mercurial/patch.py @@ -201,19 +201,28 @@ def extract(ui, fileobj): # (this heuristic is borrowed from quilt) diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |' r'retrieving revision [0-9]+(\.[0-9]+)*$|' r'---[ \t].*?^\+\+\+[ \t]|' r'\*\*\*[ \t].*?^---[ \t])', re.MULTILINE|re.DOTALL) + def decode_header(header): + if header is None: + return None + parts = [] + for part, charset in email.Header.decode_header(header): + if charset is None: + charset = 'ascii' + parts.append(part.decode(charset)) + return encoding.tolocal(u' '.join(parts).encode('utf-8')) data = {} fd, tmpname = tempfile.mkstemp(prefix='hg-patch-') tmpfp = os.fdopen(fd, 'w') try: msg = email.Parser.Parser().parse(fileobj) - subject = msg['Subject'] - data['user'] = msg['From'] + subject = decode_header(msg['Subject']) + data['user'] = decode_header(msg['From']) if not subject and not data['user']: # Not an email, restore parsed headers if any subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n' # should try to parse msg['Date'] diff --git a/tests/test-import-git.t b/tests/test-import-git.t --- a/tests/test-import-git.t +++ b/tests/test-import-git.t @@ -820,6 +820,29 @@ Test corner case involving copies and mu > a > +b > EOF applying patch from stdin +Test email metadata + + $ hg revert -qa + $ hg --encoding utf-8 import - <<EOF + > From: =?UTF-8?q?Rapha=C3=ABl=20Hertzog?= <hert...@debian.org> + > Subject: [PATCH] =?UTF-8?q?=C5=A7=E2=82=AC=C3=9F=E1=B9=AA?= + > + > diff --git a/a b/a + > --- a/a + > +++ b/a + > @@ -1,1 +1,2 @@ + > a + > +a + > EOF + applying patch from stdin + $ hg --encoding utf-8 log -r . + changeset: 2:* (glob) + tag: tip + user: Rapha\xc3\xabl Hertzog <hert...@debian.org> (esc) + date: * (glob) + summary: \xc5\xa7\xe2\x82\xac\xc3\x9f\xe1\xb9\xaa (esc) + + $ cd ..