https://github.com/python/cpython/commit/ce1b747ff68754635b7b12870dfc527184ee3b39
commit: ce1b747ff68754635b7b12870dfc527184ee3b39
branch: main
author: Victor Stinner <[email protected]>
committer: vstinner <[email protected]>
date: 2025-08-06T14:35:27+02:00
summary:

gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (#137415)

Fix name of the Python encoding in Unicode errors of the code page
codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8"
which are not valid Python code names.

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
M Lib/test/test_codecs.py
M Objects/unicodeobject.c
M Python/codecs.c

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d8666f7290e72e..fd7769e8c275d3 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3293,7 +3293,7 @@ def test_code_page_name(self):
             codecs.code_page_encode, 932, '\xff')
         self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
             codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
-        self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
+        self.assertRaisesRegex(UnicodeDecodeError, 'cp65001',
             codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
 
     def check_decode(self, cp, tests):
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
new file mode 100644
index 00000000000000..f875d4c5e785c6
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst
@@ -0,0 +1,3 @@
+Fix name of the Python encoding in Unicode errors of the code page codec:
+use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not
+valid Python code names. Patch by Victor Stinner.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8df7a48284dccd..425e4681f0a4dc 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7684,10 +7684,6 @@ code_page_name(UINT code_page, PyObject **obj)
     *obj = NULL;
     if (code_page == CP_ACP)
         return "mbcs";
-    if (code_page == CP_UTF7)
-        return "CP_UTF7";
-    if (code_page == CP_UTF8)
-        return "CP_UTF8";
 
     *obj = PyBytes_FromFormat("cp%u", code_page);
     if (*obj == NULL)
diff --git a/Python/codecs.c b/Python/codecs.c
index caf8d9d5f3c188..4e9aecfe75c2c9 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int 
*bytelength)
             }
         }
     }
-    else if (strcmp(encoding, "CP_UTF8") == 0) {
+    else if (strcmp(encoding, "cp65001") == 0) {
         *bytelength = 3;
         return ENC_UTF8;
     }

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to