Pablo Galindo Salgado <[email protected]> added the comment:
Ok, I was able to reproduce:
❯ gcc --version
gcc (GCC) 10.1.0
Copyright (C) 2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
make distclean
./configure --with-address-sanitizer --with-undefined-behavior-sanitizer
LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0" make -j
❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0"
./python -m test test_eof
0:00:00 load avg: 1.82 Run tests sequentially
0:00:00 load avg: 1.82 [1/1] test_eof
test test_eof failed -- Traceback (most recent call last):
File "/home/pablogsal/github/python/master/Lib/test/test_eof.py", line 54, in
test_line_continuation_EOF_from_file_bpo2180
self.assertIn(b'unexpected EOF while parsing', err)
AssertionError: b'unexpected EOF while parsing' not found in
b'Parser/tokenizer.c:978:50: runtime error: pointer index expression with base
0x625000016900 overflowed to 0xbebebebebebee6be\n=====================
============================================\n==27549==ERROR: AddressSanitizer:
heap-buffer-overflow on address 0x606000027c51 at pc 0x5612210ca7d4 bp
0x7fffe6e9ff70 sp 0x7fffe6e9ff60\nREAD of size 1 at 0x60600
0027c51 thread T0\n #0 0x5612210ca7d3 in ascii_decode
Objects/unicodeobject.c:4941\n #1 0x5612211c9f4a in unicode_decode_utf8
Objects/unicodeobject.c:4999\n #2 0x5612219201bd in byte_offset_to_characte
r_offset Parser/pegen.c:148\n #3 0x5612219201bd in
_PyPegen_raise_error_known_location Parser/pegen.c:412\n #4 0x561221920e4d
in _PyPegen_raise_error Parser/pegen.c:373\n #5 0x561221924981 in tokenizer
_error Parser/pegen.c:321\n #6 0x561221924981 in _PyPegen_fill_token
Parser/pegen.c:638\n #7 0x56122192777f in _PyPegen_expect_token
Parser/pegen.c:753\n #8 0x56122193817a in _tmp_15_rule Parser/parser
.c:16184\n #9 0x5612219274f9 in _PyPegen_lookahead
(/home/pablogsal/github/python/master/python+0x1c344f9)\n #10 0x56122199de2c
in compound_stmt_rule Parser/parser.c:1860\n #11 0x5612219a65c2 in statem
ent_rule Parser/parser.c:1224\n #12 0x5612219a65c2 in _loop1_11_rule
Parser/parser.c:15954\n #13 0x5612219a65c2 in statements_rule
Parser/parser.c:1183\n #14 0x5612219aa4b7 in file_rule Parser/parser.c
:716\n #15 0x5612219aa4b7 in _PyPegen_parse Parser/parser.c:24401\n #16
0x56122192a768 in _PyPegen_run_parser Parser/pegen.c:1077\n #17
0x56122192b3ef in _PyPegen_run_parser_from_file_pointer Parser/pe
gen.c:1137\n #18 0x5612213da3c6 in PyRun_FileExFlags
Python/pythonrun.c:1057\n #19 0x5612213da72c in PyRun_SimpleFileExFlags
Python/pythonrun.c:400\n #20 0x561220df0dbb in pymain_run_file Modules/main.
c:369\n #21 0x561220df0dbb in pymain_run_python Modules/main.c:553\n #22
0x561220df3154 in Py_RunMain Modules/main.c:632\n #23 0x561220df3154 in
pymain_main Modules/main.c:662\n #24 0x561220df3154 i
n Py_BytesMain Modules/main.c:686\n #25 0x7f981bf9a001 in __libc_start_main
(/usr/lib/libc.so.6+0x27001)\n #26 0x561220ded48d in _start
(/home/pablogsal/github/python/master/python+0x10fa48d)\n\n0x6060000
27c51 is located 0 bytes to the right of 49-byte region
[0x606000027c20,0x606000027c51)\nallocated by thread T0 here:\n #0
0x7f981ccce459 in __interceptor_malloc
/build/gcc/src/gcc/libsanitizer/asan/asan_mal
loc_linux.cpp:145\n #1 0x5612210dfa1d in PyUnicode_New
Objects/unicodeobject.c:1437\n #2 0x56122121324b in _PyUnicode_Init
Objects/unicodeobject.c:15535\n #3 0x5612213ae5c3 in pycore_init_types
Python/
pylifecycle.c:599\n #4 0x5612213ae5c3 in pycore_interp_init
Python/pylifecycle.c:724\n #5 0x5612213b8b4b in pyinit_config
Python/pylifecycle.c:765\n #6 0x5612213b8b4b in pyinit_core
Python/pylifecycle.
c:926\n #7 0x5612213bab6c in Py_InitializeFromConfig
Python/pylifecycle.c:1136\n #8 0x561220ded752 in pymain_init
Modules/main.c:66\n #9 0x561220df310a in pymain_main Modules/main.c:653\n
#10 0x5612
20df310a in Py_BytesMain Modules/main.c:686\n #11 0x7f981bf9a001 in
__libc_start_main (/usr/lib/libc.so.6+0x27001)\n\nSUMMARY: AddressSanitizer:
heap-buffer-overflow Objects/unicodeobject.c:4941 in ascii_dec
ode\nShadow bytes around the buggy address:\n 0x0c0c7fffcf30: 00 00 00 00 00
00 00 00 fa fa fa fa 00 00 00 00\n 0x0c0c7fffcf40: 00 00 00 07 fa fa fa fa 00
00 00 00 00 00 00 00\n 0x0c0c7fffcf50: fa fa fa fa 0
0 00 00 00 00 00 00 05 fa fa fa fa\n 0x0c0c7fffcf60: 00 00 00 00 00 00 00 00
fa fa fa fa 00 00 00 00\n 0x0c0c7fffcf70: 00 00 00 00 fa fa fa fa 00 00 00 00
00 00 00 01\n=>0x0c0c7fffcf80: fa fa fa fa 00 00 00 0
0 00 00[01]fa fa fa fa fa\n 0x0c0c7fffcf90: 00 00 00 00 00 00 00 00 fa fa fa
fa 00 00 00 00\n 0x0c0c7fffcfa0: 00 00 05 fa fa fa fa fa 00 00 00 00 00 00 00
fa\n 0x0c0c7fffcfb0: fa fa fa fa 00 00 00 00 00 00 0
0 00 fa fa fa fa\n 0x0c0c7fffcfc0: fd fd fd fd fd fd fd fd fa fa fa fa fd fd
fd fd\n 0x0c0c7fffcfd0: fd fd fd fd fa fa fa fa 00 00 00 00 00 00 00
fa\nShadow byte legend (one shadow byte represents 8 applicati
on bytes):\n Addressable: 00\n Partially addressable: 01 02 03 04
05 06 07 \n Heap left redzone: fa\n Freed heap region: fd\n
Stack left redzone: f1\n Stack mid redzone: f
2\n Stack right redzone: f3\n Stack after return: f5\n Stack use
after scope: f8\n Global redzone: f9\n Global init order:
f6\n Poisoned by user: f7\n Container overflow:
fc\n Array cookie: ac\n Intra object redzone: bb\n ASan
internal: fe\n Left alloca redzone: ca\n Right alloca redzone:
cb\n Shadow gap: cc\n==27549==ABORT
ING\n'
test_eof failed
== Tests result: FAILURE ==
1 test failed:
test_eof
Total duration: 359 ms
Tests result: FAILURE
----------
With this patch
diff --git a/Parser/pegen.c b/Parser/pegen.c
index e29910bf86..a9f24ca5fa 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -145,15 +145,15 @@ byte_offset_to_character_offset(PyObject *line, int
col_offset)
if (!str) {
return 0;
}
+ Py_ssize_t linesize = PyUnicode_GET_LENGTH(line);
+ if (col_offset > linesize) {
+ col_offset = (int)linesize;
+ }
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
if (!text) {
return 0;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
- str = PyUnicode_AsUTF8(text);
- if (str != NULL && (int)strlen(str) == col_offset) {
- size = strlen(str);
- }
Py_DECREF(text);
return size;
}
@@ -400,9 +400,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject
*errtype,
if (!error_line) {
Py_ssize_t size = p->tok->inp - p->tok->buf;
- if (size && p->tok->buf[size-1] == '\n') {
- size--;
- }
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
if (!error_line) {
goto error;
❯ LSAN_OPTIONS="suppressions=asan-suppression.txt,print_suppressions=0"
./python -m test test_eof
0:00:00 load avg: 1.39 Run tests sequentially
0:00:00 load avg: 1.39 [1/1] test_eof
== Tests result: SUCCESS ==
1 test OK.
Total duration: 500 ms
Tests result: SUCCESS
----------
_______________________________________
Python tracker <[email protected]>
<https://bugs.python.org/issue40958>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com