Mark Sapiro pushed to branch master at GNU Mailman / Mailman Core


Commits:
e5f3f118 by Mark Sapiro at 2020-12-04T09:52:50+00:00
Fix issue converting non-ascii html to plain text.

- - - - -
45edbcb8 by Mark Sapiro at 2020-12-04T09:52:50+00:00
Merge branch 'mimedel' into 'master'

Fix issue converting non-ascii html to plain text.

Closes #798

See merge request mailman/mailman!742
- - - - -


4 changed files:

- src/mailman/docs/NEWS.rst
- src/mailman/handlers/mime_delete.py
- + src/mailman/handlers/tests/data/html_to_plain.eml
- src/mailman/handlers/tests/test_mimedel.py


Changes:

=====================================
src/mailman/docs/NEWS.rst
=====================================
@@ -23,8 +23,10 @@ Bugs
 * Implemented a ``scrubber`` for plain text digests.  (Closes #473)
 * The ``mailman gatenews`` command now adds ``original_size`` as a message
   attribute.  (Extends fix for #762)
-* Handle FileNotFoundError when creating digest.mmdf file without
-  parent directory present (Closes #699)
+* Handle FileNotFoundError when creating digest.mmdf file without a
+  parent directory present.  (Closes #699)
+* Fixed an issue where content filtering can throw UnicodeEncodeError when
+  converting HTML to plain text.  (Closes #798)
 
 New Features
 ------------


=====================================
src/mailman/handlers/mime_delete.py
=====================================
@@ -277,9 +277,9 @@ def to_plaintext(msg):
         resources.callback(shutil.rmtree, tempdir)
         for subpart in typed_subpart_iterator(msg, 'text', 'html'):
             filename = os.path.join(tempdir, '{}.html'.format(next(counter)))
-            ctype = msg.get_content_charset('us-ascii')
+            cset = subpart.get_content_charset('us-ascii')
             with open(filename, 'w', encoding='utf-8') as fp:
-                fp.write(subpart.get_payload(decode=True).decode(ctype,
+                fp.write(subpart.get_payload(decode=True).decode(cset,
                          errors='replace'))
             template = Template(config.mailman.html_to_plain_text_command)
             command = template.safe_substitute(filename=filename).split()
@@ -291,7 +291,7 @@ def to_plaintext(msg):
                 # Replace the payload of the subpart with the converted text
                 # and tweak the content type.
                 del subpart['content-transfer-encoding']
-                subpart.set_payload(stdout, charset=ctype)
+                subpart.set_payload(stdout, charset=cset)
                 subpart.set_type('text/plain')
                 changedp += 1
     return changedp


=====================================
src/mailman/handlers/tests/data/html_to_plain.eml
=====================================
@@ -0,0 +1,25 @@
+To: l...@example.com
+From: u...@example.com
+Subject: Test Message
+Message-ID: <m...@example.com>
+Date: Thu, 3 Dec 2020 15:18:27 +0100
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------04218E0A720FDBFA6DB11AF1"
+
+--------------04218E0A720FDBFA6DB11AF1
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: quoted-printable
+
+This is a plain text body
+
+--------------04218E0A720FDBFA6DB11AF1
+Content-Type: text/html; charset=UTF-8;
+ name="junk.html"
+Content-Disposition: attachment;
+ filename="junk.html"
+Content-Transfer-Encoding: base64
+
+VW0gZnLDvGhlcmUgTmFjaHJpY2h0ZW4K
+
+--------------04218E0A720FDBFA6DB11AF1--


=====================================
src/mailman/handlers/tests/test_mimedel.py
=====================================
@@ -351,6 +351,22 @@ MIME-Version: 1.0
         payload_lines = msg.get_payload().splitlines()
         self.assertEqual(payload_lines[0], '<html><head></head>')
 
+    def test_html_part_with_non_ascii(self):
+        # Ensure we can convert HTML to plain text in an HTML sub-part which
+        # contains non-ascii.
+        with resource_open(
+                'mailman.handlers.tests.data',
+                'html_to_plain.eml') as fp:
+            msg = email.message_from_binary_file(fp)
+        process = config.handlers['mime-delete'].process
+        with dummy_script():
+            process(self._mlist, msg, {})
+        part = msg.get_payload(1)
+        cset = part.get_content_charset('us-ascii')
+        text = part.get_payload(decode=True).decode(cset).splitlines()
+        self.assertEqual(text[0], 'Converted text/html to text/plain')
+        self.assertEqual(text[2], 'Um frühere Nachrichten')
+
 
 class TestMiscellaneous(unittest.TestCase):
     """Test various miscellaneous filtering actions."""



View it on GitLab: 
https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d

-- 
View it on GitLab: 
https://gitlab.com/mailman/mailman/-/compare/9176bf6ee3fa8c707e8a60aeca495e799c07e216...45edbcb884ac9198d42f06833ea381585a72b68d
You're receiving this email because of your account on gitlab.com.


_______________________________________________
Mailman-checkins mailing list -- mailman-checkins@python.org
To unsubscribe send an email to mailman-checkins-le...@python.org
https://mail.python.org/mailman3/lists/mailman-checkins.python.org/
Member address: arch...@jab.org

Reply via email to