[Python-checkins] [3.13] gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (#145312)

pablogsal Fri, 27 Feb 2026 17:38:53 -0800

https://github.com/python/cpython/commit/2daece9903d1b3843462ced14f6fc659f70eb9f5
commit: 2daece9903d1b3843462ced14f6fc659f70eb9f5
branch: 3.13
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-02-28T01:38:36Z
summary:


[3.13] gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (#145312)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
M Lib/test/test_py_compile.py
M Parser/tokenizer/string_tokenizer.c

diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py
index 64387296e84621..749a877d013ce4 100644
--- a/Lib/test/test_py_compile.py
+++ b/Lib/test/test_py_compile.py
@@ -207,6 +207,14 @@ def test_quiet(self):
             with self.assertRaises(py_compile.PyCompileError):
                 py_compile.compile(bad_coding, doraise=True, quiet=1)
 
+    def test_utf7_decoded_cr_compiles(self):
+        with open(self.source_path, 'wb') as file:
+            file.write(b"#coding=U7+AA0''\n")
+
+        pyc_path = py_compile.compile(self.source_path, self.pyc_path, 
doraise=True)
+        self.assertEqual(pyc_path, self.pyc_path)
+        self.assertTrue(os.path.exists(self.pyc_path))
+
 
 class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
                                     unittest.TestCase,
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst b/Misc/NEWS.d/next/Core 
and Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
new file mode 100644
index 00000000000000..caeffff0be8a85
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst 
@@ -0,0 +1,5 @@
+Fixed a ``SystemError`` in the parser when an encoding cookie (for example,
+UTF-7) decodes to carriage returns (``\r``). Newlines are now normalized after
+decoding in the string tokenizer.
+
+Patch by Pablo Galindo.
diff --git a/Parser/tokenizer/string_tokenizer.c 
b/Parser/tokenizer/string_tokenizer.c
index 0c26d5df8d4a40..560cb37e518be1 100644
--- a/Parser/tokenizer/string_tokenizer.c
+++ b/Parser/tokenizer/string_tokenizer.c
@@ -102,6 +102,19 @@ decode_str(const char *input, int single, struct tok_state 
*tok, int preserve_cr
             return _PyTokenizer_error_ret(tok);
         str = PyBytes_AS_STRING(utf8);
     }
+    if (utf8 != NULL) {
+        char *translated = _PyTokenizer_translate_newlines(
+            str, single, preserve_crlf, tok);
+        if (translated == NULL) {
+            Py_DECREF(utf8);
+            return _PyTokenizer_error_ret(tok);
+        }
+        PyMem_Free(tok->input);
+        tok->input = translated;
+        str = translated;
+        Py_CLEAR(utf8);
+    }
+    tok->str = str;
     assert(tok->decoding_buffer == NULL);
     tok->decoding_buffer = utf8; /* CAUTION */
     return str;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] [3.13] gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (#145312)

Reply via email to