https://github.com/python/cpython/commit/f97f25ef5dfcdfec0d9a359fd970abd139cf3428
commit: f97f25ef5dfcdfec0d9a359fd970abd139cf3428
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-03-05T17:49:01+02:00
summary:
gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII
charset (GH-116125)
files:
A Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
M Lib/email/generator.py
M Lib/email/message.py
M Lib/test/test_email/test_email.py
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 7ccbe10eb76856..c8056ad47baa0f 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -243,7 +243,7 @@ def _handle_text(self, msg):
# existing message.
msg = deepcopy(msg)
del msg['content-transfer-encoding']
- msg.set_payload(payload, charset)
+ msg.set_payload(msg._payload, charset)
payload = msg.get_payload()
self._munge_cte = (msg['content-transfer-encoding'],
msg['content-type'])
diff --git a/Lib/email/message.py b/Lib/email/message.py
index fe769580fed5d0..a14cca56b3745a 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None):
return
if not isinstance(charset, Charset):
charset = Charset(charset)
- payload = payload.encode(charset.output_charset)
+ payload = payload.encode(charset.output_charset, 'surrogateescape')
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:
diff --git a/Lib/test/test_email/test_email.py
b/Lib/test/test_email/test_email.py
index 39d4ace8d4a1d8..d9af05c306eb30 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self):
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)
+ def test_nonascii_as_string_with_ascii_charset(self):
+ m = textwrap.dedent("""\
+ MIME-Version: 1.0
+ Content-type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 8bit
+
+ Test if non-ascii messages with no Content-Transfer-Encoding set
+ can be as_string'd:
+ Föö bär
+ """)
+ source = m.encode('iso-8859-1')
+ expected = source.decode('ascii', 'replace')
+ msg = email.message_from_bytes(source)
+ self.assertEqual(msg.as_string(), expected)
+
def test_nonascii_as_string_without_content_type_and_cte(self):
m = textwrap.dedent("""\
MIME-Version: 1.0
diff --git
a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
new file mode 100644
index 00000000000000..da62f8a2450711
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
@@ -0,0 +1,4 @@
+Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
+a message that claims to be in the ascii character set actually has non-ascii
+characters. Non-ascii characters are now replaced with the U+FFFD replacement
+character, like in the ``replace`` error handler.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]