https://github.com/python/cpython/commit/98b1e519273dd28ce73cc21a636e2f3a937e1f8c
commit: 98b1e519273dd28ce73cc21a636e2f3a937e1f8c
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-02-27T12:44:54Z
summary:
gh-145234: Normalize decoded CR in string tokenizer (#145281)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
M Lib/test/test_py_compile.py
M Parser/tokenizer/string_tokenizer.c
diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py
index 66de61930968e4..da2d630d7ace7b 100644
--- a/Lib/test/test_py_compile.py
+++ b/Lib/test/test_py_compile.py
@@ -239,6 +239,14 @@ def test_quiet(self):
with self.assertRaises(py_compile.PyCompileError):
py_compile.compile(bad_coding, self.pyc_path, doraise=True,
quiet=1)
+ def test_utf7_decoded_cr_compiles(self):
+ with open(self.source_path, 'wb') as file:
+ file.write(b"#coding=U7+AA0''\n")
+
+ pyc_path = py_compile.compile(self.source_path, self.pyc_path,
doraise=True)
+ self.assertEqual(pyc_path, self.pyc_path)
+ self.assertTrue(os.path.exists(self.pyc_path))
+
class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
unittest.TestCase,
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
new file mode 100644
index 00000000000000..caeffff0be8a85
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst
@@ -0,0 +1,5 @@
+Fixed a ``SystemError`` in the parser when an encoding cookie (for example,
+UTF-7) decodes to carriage returns (``\r``). Newlines are now normalized after
+decoding in the string tokenizer.
+
+Patch by Pablo Galindo.
diff --git a/Parser/tokenizer/string_tokenizer.c
b/Parser/tokenizer/string_tokenizer.c
index 7299ecf483ccd9..7f07cca37ee019 100644
--- a/Parser/tokenizer/string_tokenizer.c
+++ b/Parser/tokenizer/string_tokenizer.c
@@ -108,6 +108,19 @@ decode_str(const char *input, int single, struct tok_state
*tok, int preserve_cr
else if (!_PyTokenizer_ensure_utf8(str, tok, 1)) {
return _PyTokenizer_error_ret(tok);
}
+ if (utf8 != NULL) {
+ char *translated = _PyTokenizer_translate_newlines(
+ str, single, preserve_crlf, tok);
+ if (translated == NULL) {
+ Py_DECREF(utf8);
+ return _PyTokenizer_error_ret(tok);
+ }
+ PyMem_Free(tok->input);
+ tok->input = translated;
+ str = translated;
+ Py_CLEAR(utf8);
+ }
+ tok->str = str;
assert(tok->decoding_buffer == NULL);
tok->decoding_buffer = utf8; /* CAUTION */
return str;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]