Mark Sapiro pushed to branch master at GNU Mailman / Mailman Core
Commits: 9209030a by Mark Sapiro at 2020-06-17T21:10:47-07:00 Fix mailman.email.message.Message.as_string() to not return surrogates. - - - - - 8cb56116 by Mark Sapiro at 2020-06-18T04:24:52+00:00 Merge branch 'fix_732' into 'master' Fix mailman.email.message.Message.as_string() to not return surrogates. Closes #732 See merge request mailman/mailman!665 - - - - - 4 changed files: - src/mailman/docs/NEWS.rst - src/mailman/email/message.py - + src/mailman/email/tests/data/bad_email_4.eml - src/mailman/email/tests/test_message.py Changes: ===================================== src/mailman/docs/NEWS.rst ===================================== @@ -23,6 +23,8 @@ Bugs (Closes #725) * The ``dmarc`` rule no longer misses if DNS returns a name containing upper case. (Closes #726) +* Fixed ``mailman.email.message.Message.as_string`` to not return unicode + surrogates. (Closes #732) Command line ------------ ===================================== src/mailman/email/message.py ===================================== @@ -55,7 +55,8 @@ class Message(email.message.Message): except (KeyError, LookupError, UnicodeEncodeError): value = email.message.Message.as_bytes(self).decode( 'ascii', 'replace') - return value + # Also ensure no unicode surrogates in the returned string. + return email.utils._sanitize(value) @property def sender(self): ===================================== src/mailman/email/tests/data/bad_email_4.eml ===================================== @@ -0,0 +1,23 @@ +From: u...@example.com +To: l...@example.com +Subject: Test Message +Message-ID: <some...@example.com> +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="zzz123" + +Ce message est au format MIME. Comme votre logiciel de courrier ne comprend +pas ce format, tout ou partie de ce message pourrait ĂȘtre illisible.. + +--zzz123 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit + +Plain Text + +--zzz123 +Content-Type: text/html; charset="us-ascii" +Content-Transfer-Encoding: 7bit + +Not really HTML + +--zzz123-- ===================================== src/mailman/email/tests/test_message.py ===================================== @@ -22,6 +22,7 @@ import unittest from email import message_from_binary_file from email.header import Header from email.parser import FeedParser +from email.utils import _has_surrogates from importlib_resources import path from mailman.app.lifecycle import create_list from mailman.email.message import Message, UserNotification @@ -121,6 +122,12 @@ Test content text = fp.read().decode('ascii', 'replace') self.assertEqual(msg.as_string(), text) + def test_as_string_unicode_surrogates(self): + with path('mailman.email.tests.data', 'bad_email_4.eml') as email_path: + with open(str(email_path), 'rb') as fp: + msg = message_from_binary_file(fp, Message) + self.assertFalse(_has_surrogates(msg.as_string())) + def test_bogus_content_charset(self): with path('mailman.email.tests.data', 'bad_email_3.eml') as email_path: with open(str(email_path), 'rb') as fp: View it on GitLab: https://gitlab.com/mailman/mailman/-/compare/6b276b9cfc3d5580f2cfb48a6aacd78541ff443b...8cb5611662490dd154d2f4e6dbbe2b4aa3adb596 -- View it on GitLab: https://gitlab.com/mailman/mailman/-/compare/6b276b9cfc3d5580f2cfb48a6aacd78541ff443b...8cb5611662490dd154d2f4e6dbbe2b4aa3adb596 You're receiving this email because of your account on gitlab.com.
_______________________________________________ Mailman-checkins mailing list -- mailman-checkins@python.org To unsubscribe send an email to mailman-checkins-le...@python.org https://mail.python.org/mailman3/lists/mailman-checkins.python.org/ Member address: arch...@jab.org