https://github.com/python/cpython/commit/57c8e3eb8ef399415a55d218a892efb36c275b43 commit: 57c8e3eb8ef399415a55d218a892efb36c275b43 branch: 3.14 author: Miss Islington (bot) <[email protected]> committer: vstinner <[email protected]> date: 2025-10-07T19:39:31+02:00 summary:
[3.14] gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415) (#137460) gh-58124: Avoid CP_UTF8 in UnicodeDecodeError (GH-137415) Fix name of the Python encoding in Unicode errors of the code page codec: use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not valid Python code names. (cherry picked from commit ce1b747ff68754635b7b12870dfc527184ee3b39) Co-authored-by: Victor Stinner <[email protected]> files: A Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst M Lib/test/test_codecs.py M Objects/unicodeobject.c M Python/codecs.c diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d8666f7290e72e..fd7769e8c275d3 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3293,7 +3293,7 @@ def test_code_page_name(self): codecs.code_page_encode, 932, '\xff') self.assertRaisesRegex(UnicodeDecodeError, 'cp932', codecs.code_page_decode, 932, b'\x81\x00', 'strict', True) - self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8', + self.assertRaisesRegex(UnicodeDecodeError, 'cp65001', codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True) def check_decode(self, cp, tests): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst new file mode 100644 index 00000000000000..f875d4c5e785c6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-05-17-22-24.gh-issue-58124.q1__53.rst @@ -0,0 +1,3 @@ +Fix name of the Python encoding in Unicode errors of the code page codec: +use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not +valid Python code names. Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c7f560adb99356..0ff2d71924ff9c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7713,10 +7713,6 @@ code_page_name(UINT code_page, PyObject **obj) *obj = NULL; if (code_page == CP_ACP) return "mbcs"; - if (code_page == CP_UTF7) - return "CP_UTF7"; - if (code_page == CP_UTF8) - return "CP_UTF8"; *obj = PyBytes_FromFormat("cp%u", code_page); if (*obj == NULL) diff --git a/Python/codecs.c b/Python/codecs.c index caf8d9d5f3c188..4e9aecfe75c2c9 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int *bytelength) } } } - else if (strcmp(encoding, "CP_UTF8") == 0) { + else if (strcmp(encoding, "cp65001") == 0) { *bytelength = 3; return ENC_UTF8; } _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
