https://github.com/python/cpython/commit/35a43d43943ad1e038ec2c55cc7006932aeb5b07
commit: 35a43d43943ad1e038ec2c55cc7006932aeb5b07
branch: 3.11
author: Miss Islington (bot) <[email protected]>
committer: pablogsal <[email protected]>
date: 2024-02-26T16:08:37Z
summary:

[3.11] gh-115823: Calculate correctly error locations when dealing with 
implicit encodings (GH-115824) (#115950)

gh-115823: Calculate correctly error locations when dealing with implicit 
encodings (GH-115824)
(cherry picked from commit 015b97d19a24a169cc3c0939119e1228791e4253)

Co-authored-by: Pablo Galindo Salgado <[email protected]>

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst
M Lib/test/test_exceptions.py
M Parser/pegen_errors.c

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index cd064219c671e0..ffeb2f04b3f607 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -234,7 +234,7 @@ def testSyntaxErrorOffset(self):
         check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
         check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +',
               2, 19, encoding='cp1251')
-        check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18)
+        check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 13)
         check('x = "a', 1, 5)
         check('lambda x: x = 2', 1, 1)
         check('f{a + b + c}', 1, 2)
@@ -301,6 +301,7 @@ def baz():
             {
             6
             0="""''', 5, 13)
+        check('b"fooжжж"'.encode(), 1, 1, 1, 10)
 
         # Errors thrown by symtable.c
         check('x = [(yield i) for i in range(3)]', 1, 7)
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst
new file mode 100644
index 00000000000000..8cda4c9343d4d7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst 
@@ -0,0 +1,3 @@
+Properly calculate error ranges in the parser when raising
+:exc:`SyntaxError` exceptions caused by invalid byte sequences. Patch by
+Pablo Galindo
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index ccb0d37406181a..4f2063346e9202 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -377,20 +377,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject 
*errtype,
     Py_ssize_t col_number = col_offset;
     Py_ssize_t end_col_number = end_col_offset;
 
-    if (p->tok->encoding != NULL) {
-        col_number = _PyPegen_byte_offset_to_character_offset(error_line, 
col_offset);
-        if (col_number < 0) {
+    col_number = _PyPegen_byte_offset_to_character_offset(error_line, 
col_offset);
+    if (col_number < 0) {
+        goto error;
+    }
+
+    if (end_col_offset > 0) {
+        end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, 
end_col_offset);
+        if (end_col_number < 0) {
             goto error;
         }
-        if (end_col_number > 0) {
-            Py_ssize_t end_col_offset = 
_PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
-            if (end_col_offset < 0) {
-                goto error;
-            } else {
-                end_col_number = end_col_offset;
-            }
-        }
     }
+
     tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, 
error_line, end_lineno, end_col_number);
     if (!tmp) {
         goto error;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to