https://github.com/python/cpython/commit/3fc945df22a169e039c3f21b44c0d08390a00c0c
commit: 3fc945df22a169e039c3f21b44c0d08390a00c0c
branch: main
author: AdamKorcz <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-02-26T22:35:08Z
summary:
gh-144872: fix heap buffer overflow `_PyTokenizer_ensure_utf8` (#144807)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst
M Lib/test/test_source_encoding.py
M Parser/tokenizer/helpers.c
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index 46b291192df429..8ac64b3105708f 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -65,6 +65,23 @@ def test_issue7820(self):
# two bytes in common with the UTF-8 BOM
self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
+ def test_truncated_utf8_at_eof(self):
+ # Regression test for https://issues.oss-fuzz.com/issues/451112368
+ # Truncated multi-byte UTF-8 sequences at end of input caused an
+ # out-of-bounds read in Parser/tokenizer/helpers.c:valid_utf8().
+ truncated = [
+ b'\xc2', # 2-byte lead, missing 1 continuation
+ b'\xdf', # 2-byte lead, missing 1 continuation
+ b'\xe0', # 3-byte lead, missing 2 continuations
+ b'\xe0\xa0', # 3-byte lead, missing 1 continuation
+ b'\xf0\x90', # 4-byte lead, missing 2 continuations
+ b'\xf0\x90\x80', # 4-byte lead, missing 1 continuation
+ b'\xf3', # 4-byte lead, missing 3 (the oss-fuzz
reproducer)
+ ]
+ for seq in truncated:
+ with self.subTest(seq=seq):
+ self.assertRaises(SyntaxError, compile, seq, '<test>', 'exec')
+
@support.requires_subprocess()
def test_20731(self):
sub = subprocess.Popen([sys.executable,
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst
new file mode 100644
index 00000000000000..c06bf01baee6fd
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-16-12-28-43.gh-issue-144872.k9_Q30.rst
@@ -0,0 +1 @@
+Fix heap buffer overflow in the parser found by OSS-Fuzz.
diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c
index fda8216a3005b9..9542969ad3127b 100644
--- a/Parser/tokenizer/helpers.c
+++ b/Parser/tokenizer/helpers.c
@@ -494,9 +494,11 @@ valid_utf8(const unsigned char* s)
return 0;
}
length = expected + 1;
- for (; expected; expected--)
- if (s[expected] < 0x80 || s[expected] >= 0xC0)
+ for (int i = 1; i <= expected; i++) {
+ if (s[i] < 0x80 || s[i] >= 0xC0) {
return 0;
+ }
+ }
return length;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]