Mark Sapiro pushed to branch master at GNU Mailman / Mailman Core
Commits: e5f3f118 by Mark Sapiro at 2020-12-04T09:52:50+00:00 Fix issue converting non-ascii html to plain text. - - - - - 45edbcb8 by Mark Sapiro at 2020-12-04T09:52:50+00:00 Merge branch 'mimedel' into 'master' Fix issue converting non-ascii html to plain text. Closes #798 See merge request mailman/mailman!742 - - - - - 4 changed files: - src/mailman/docs/NEWS.rst - src/mailman/handlers/mime_delete.py - + src/mailman/handlers/tests/data/html_to_plain.eml - src/mailman/handlers/tests/test_mimedel.py Changes: ===================================== src/mailman/docs/NEWS.rst ===================================== @@ -23,8 +23,10 @@ Bugs * Implemented a ``scrubber`` for plain text digests. (Closes #473) * The ``mailman gatenews`` command now adds ``original_size`` as a message attribute. (Extends fix for #762) -* Handle FileNotFoundError when creating digest.mmdf file without - parent directory present (Closes #699) +* Handle FileNotFoundError when creating digest.mmdf file without a + parent directory present. (Closes #699) +* Fixed an issue where content filtering can throw UnicodeEncodeError when + converting HTML to plain text. (Closes #798) New Features ------------ ===================================== src/mailman/handlers/mime_delete.py ===================================== @@ -277,9 +277,9 @@ def to_plaintext(msg): resources.callback(shutil.rmtree, tempdir) for subpart in typed_subpart_iterator(msg, 'text', 'html'): filename = os.path.join(tempdir, '{}.html'.format(next(counter))) - ctype = msg.get_content_charset('us-ascii') + cset = subpart.get_content_charset('us-ascii') with open(filename, 'w', encoding='utf-8') as fp: - fp.write(subpart.get_payload(decode=True).decode(ctype, + fp.write(subpart.get_payload(decode=True).decode(cset, errors='replace')) template = Template(config.mailman.html_to_plain_text_command) command = template.safe_substitute(filename=filename).split() @@ -291,7 +291,7 @@ def to_plaintext(msg): # Replace the payload of the subpart with the converted text # and tweak the content type. del subpart['content-transfer-encoding'] - subpart.set_payload(stdout, charset=ctype) + subpart.set_payload(stdout, charset=cset) subpart.set_type('text/plain') changedp += 1 return changedp ===================================== src/mailman/handlers/tests/data/html_to_plain.eml ===================================== @@ -0,0 +1,25 @@ +To: l...@example.com +From: u...@example.com +Subject: Test Message +Message-ID: <m...@example.com> +Date: Thu, 3 Dec 2020 15:18:27 +0100 +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="------------04218E0A720FDBFA6DB11AF1" + +--------------04218E0A720FDBFA6DB11AF1 +Content-Type: text/plain; charset=utf-8; format=flowed +Content-Transfer-Encoding: quoted-printable + +This is a plain text body + +--------------04218E0A720FDBFA6DB11AF1 +Content-Type: text/html; charset=UTF-8; + name="junk.html" +Content-Disposition: attachment; + filename="junk.html" +Content-Transfer-Encoding: base64 + +VW0gZnLDvGhlcmUgTmFjaHJpY2h0ZW4K + +--------------04218E0A720FDBFA6DB11AF1-- ===================================== src/mailman/handlers/tests/test_mimedel.py ===================================== @@ -351,6 +351,22 @@ MIME-Version: 1.0 payload_lines = msg.get_payload().splitlines() self.assertEqual(payload_lines[0], '<html><head></head>') + def test_html_part_with_non_ascii(self): + # Ensure we can convert HTML to plain text in an HTML sub-part which + # contains non-ascii. + with resource_open( + 'mailman.handlers.tests.data', + 'html_to_plain.eml') as fp: + msg = email.message_from_binary_file(fp) + process = config.handlers['mime-delete'].process + with dummy_script(): + process(self._mlist, msg, {}) + part = msg.get_payload(1) + cset = part.get_content_charset('us-ascii') + text = part.get_payload(decode=True).decode(cset).splitlines() + self.assertEqual(text[0], 'Converted text/html to text/plain') + self.assertEqual(text[2], 'Um frühere Nachrichten') + class TestMiscellaneous(unittest.TestCase): """Test various miscellaneous filtering actions.""" View it on GitLab: https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d -- View it on GitLab: https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d You're receiving this email because of your account on gitlab.com.
_______________________________________________ Mailman-checkins mailing list -- mailman-checkins@python.org To unsubscribe send an email to mailman-checkins-le...@python.org https://mail.python.org/mailman3/lists/mailman-checkins.python.org/ Member address: arch...@jab.org