https://github.com/python/cpython/commit/5ba0a1aa1fe386fbc863d3fe8f32dfbfe2b1bded
commit: 5ba0a1aa1fe386fbc863d3fe8f32dfbfe2b1bded
branch: main
author: Stan Ulbrych <[email protected]>
committer: malemburg <[email protected]>
date: 2025-11-09T13:37:34+01:00
summary:
gh-136702: Deprecate passing non-ascii *encoding* (str) to
`encodings.normalize_encoding` (#140030)
Closes #136702
files:
A Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
M Doc/deprecations/pending-removal-in-3.17.rst
M Lib/email/_header_value_parser.py
M Lib/email/utils.py
M Lib/encodings/__init__.py
M Lib/test/test_codecs.py
M Lib/test/test_email/test_email.py
M Lib/test/test_email/test_headerregistry.py
diff --git a/Doc/deprecations/pending-removal-in-3.17.rst
b/Doc/deprecations/pending-removal-in-3.17.rst
index 0a1c2f08cab3bd..e769c9d371e133 100644
--- a/Doc/deprecations/pending-removal-in-3.17.rst
+++ b/Doc/deprecations/pending-removal-in-3.17.rst
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
(Contributed by Shantanu Jain in :gh:`91896`.)
+* :mod:`encodings`:
+
+ - Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
+ is deprecated and scheduled for removal in Python 3.17.
+ (Contributed by Stan Ulbrych in :gh:`136702`)
+
* :mod:`typing`:
- Before Python 3.14, old-style unions were implemented using the private
class
diff --git a/Lib/email/_header_value_parser.py
b/Lib/email/_header_value_parser.py
index 91243378dc0441..c7f665b3990512 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,6 +796,10 @@ def params(self):
value = urllib.parse.unquote(value, encoding='latin-1')
else:
try:
+ # Explicitly look up the codec for warning
generation, see gh-140030
+ # Can be removed in 3.17
+ import codecs
+ codecs.lookup(charset)
value = value.decode(charset, 'surrogateescape')
except (LookupError, UnicodeEncodeError):
# XXX: there should really be a custom defect for
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 3de1f0d24a15b0..d4824dc3601b2d 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace',
charset = fallback_charset
rawbytes = bytes(text, 'raw-unicode-escape')
try:
+ # Explicitly look up the codec for warning generation, see gh-140030
+ # Can be removed in 3.17
+ import codecs
+ codecs.lookup(charset)
return str(rawbytes, charset, errors)
except LookupError:
# charset is not a known codec.
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index e7e4ca3358e0f9..e205ec326376d8 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -26,7 +26,7 @@
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
-"""#"
+"""
import codecs
import sys
@@ -56,6 +56,12 @@ def normalize_encoding(encoding):
if isinstance(encoding, bytes):
encoding = str(encoding, "ascii")
+ if not encoding.isascii():
+ import warnings
+ warnings.warn(
+ "Support for non-ascii encoding names will be removed in 3.17",
+ DeprecationWarning, stacklevel=2)
+
return _normalize_encoding(encoding)
def search_function(encoding):
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index c35a4508943506..f1f0ac5ad36fd2 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3886,15 +3886,14 @@ def search_function(encoding):
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3,
4))
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3,
4))
- self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'),
('test.aaa\xe9\u20ac-8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3,
4))
+ with self.assertWarns(DeprecationWarning):
+ self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'),
('test.aaa\xe9\u20ac-8', 2, 3, 4))
def test_encodings_normalize_encoding(self):
- # encodings.normalize_encoding() ignores non-ASCII characters.
normalize = encodings.normalize_encoding
self.assertEqual(normalize('utf_8'), 'utf_8')
- self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
self.assertEqual(normalize('utf 8'), 'utf_8')
# encodings.normalize_encoding() doesn't convert
# characters to lower case.
@@ -3902,6 +3901,11 @@ def test_encodings_normalize_encoding(self):
self.assertEqual(normalize('utf.8'), 'utf.8')
self.assertEqual(normalize('utf...8'), 'utf...8')
+ # Non-ASCII *encoding* is deprecated.
+ with self.assertWarnsRegex(DeprecationWarning,
+ "Support for non-ascii encoding names will be removed in
3.17"):
+ self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_email/test_email.py
b/Lib/test/test_email/test_email.py
index 4cd587bcd76040..1900adf463befc 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5738,7 +5738,8 @@ def test_rfc2231_bad_character_in_encoding(self):
"""
msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(), 'myfile.txt')
+ with self.assertWarns(DeprecationWarning):
+ self.assertEqual(msg.get_filename(), 'myfile.txt')
def test_rfc2231_single_tick_in_filename_extended(self):
eq = self.assertEqual
diff --git a/Lib/test/test_email/test_headerregistry.py
b/Lib/test/test_email/test_headerregistry.py
index ff7a6da644d572..1d0d0a49a82917 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -247,7 +247,15 @@ def content_type_as_value(self,
decoded = args[2] if l>2 and args[2] is not DITTO else source
header = 'Content-Type:' + ' ' if source else ''
folded = args[3] if l>3 else header + decoded + '\n'
- h = self.make_header('Content-Type', source)
+ # Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
+ # clear encoding cache to ensure test isolation.
+ if 'utf-8%E2%80%9D' in source and 'ascii' not in source:
+ import encodings
+ encodings._cache.clear()
+ with self.assertWarns(DeprecationWarning):
+ h = self.make_header('Content-Type', source)
+ else:
+ h = self.make_header('Content-Type', source)
self.assertEqual(h.content_type, content_type)
self.assertEqual(h.maintype, maintype)
self.assertEqual(h.subtype, subtype)
diff --git
a/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
new file mode 100644
index 00000000000000..88303f017f58c4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
@@ -0,0 +1,3 @@
+:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
+:func:`encodings.normalize_encoding` and schedule removal of support for
+Python 3.17.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]