https://github.com/python/cpython/commit/18542240c26cea2b5e0372b668a5e67bd9015379
commit: 18542240c26cea2b5e0372b668a5e67bd9015379
branch: 3.12
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-09-24T08:53:54Z
summary:
[3.12] gh-124188: Fix PyErr_ProgramTextObject() (GH-124189) (GH-124426)
* Detect source file encoding.
* Use the "replace" error handler even for UTF-8 (default) encoding.
* Remove the BOM.
* Fix detection of too long lines if they contain NUL.
* Return the head rather than the tail for truncated long lines.
(cherry picked from commit e2f710792b0418b8ca1ca3b8cdf39588c7268495)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2024-09-17-22-06-01.gh-issue-124188.aFqNAB.rst
M Lib/test/support/script_helper.py
M Lib/test/test_eof.py
M Lib/test/test_exceptions.py
M Python/errors.c
diff --git a/Lib/test/support/script_helper.py
b/Lib/test/support/script_helper.py
index 565f3b54a04be5..761878315143b4 100644
--- a/Lib/test/support/script_helper.py
+++ b/Lib/test/support/script_helper.py
@@ -218,9 +218,13 @@ def make_script(script_dir, script_basename, source,
omit_suffix=False):
if not omit_suffix:
script_filename += os.extsep + 'py'
script_name = os.path.join(script_dir, script_filename)
- # The script should be encoded to UTF-8, the default string encoding
- with open(script_name, 'w', encoding='utf-8') as script_file:
- script_file.write(source)
+ if isinstance(source, str):
+ # The script should be encoded to UTF-8, the default string encoding
+ with open(script_name, 'w', encoding='utf-8') as script_file:
+ script_file.write(source)
+ else:
+ with open(script_name, 'wb') as script_file:
+ script_file.write(source)
importlib.invalidate_caches()
return script_name
diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py
index be4fd73bfdc36b..e377383450e19d 100644
--- a/Lib/test/test_eof.py
+++ b/Lib/test/test_eof.py
@@ -1,6 +1,7 @@
"""test script for a few new invalid token catches"""
import sys
+from codecs import BOM_UTF8
from test import support
from test.support import os_helper
from test.support import script_helper
@@ -11,67 +12,158 @@ class EOFTestCase(unittest.TestCase):
def test_EOF_single_quote(self):
expect = "unterminated string literal (detected at line 1) (<string>,
line 1)"
for quote in ("'", "\""):
- try:
+ with self.assertRaises(SyntaxError) as cm:
eval(f"""{quote}this is a test\
""")
- except SyntaxError as msg:
- self.assertEqual(str(msg), expect)
- self.assertEqual(msg.offset, 1)
- else:
- raise support.TestFailed
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.offset, 1)
def test_EOFS(self):
- expect = ("unterminated triple-quoted string literal (detected at line
1) (<string>, line 1)")
- try:
- eval("""'''this is a test""")
- except SyntaxError as msg:
- self.assertEqual(str(msg), expect)
- self.assertEqual(msg.offset, 1)
- else:
- raise support.TestFailed
+ expect = ("unterminated triple-quoted string literal (detected at line
3) (<string>, line 1)")
+ with self.assertRaises(SyntaxError) as cm:
+ eval("""ä = '''thîs is \na \ntest""")
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, "ä = '''thîs is ")
+ self.assertEqual(cm.exception.offset, 5)
+
+ with self.assertRaises(SyntaxError) as cm:
+ eval("""ä = '''thîs is \na \ntest""".encode())
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, "ä = '''thîs is ")
+ self.assertEqual(cm.exception.offset, 5)
+
+ with self.assertRaises(SyntaxError) as cm:
+ eval(BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, "ä = '''thîs is ")
+ self.assertEqual(cm.exception.offset, 5)
+
+ with self.assertRaises(SyntaxError) as cm:
+ eval("""# coding: latin1\nä = '''thîs is \na
\ntest""".encode('latin1'))
+ self.assertEqual(str(cm.exception), "unterminated triple-quoted string
literal (detected at line 4) (<string>, line 2)")
+ self.assertEqual(cm.exception.text, "ä = '''thîs is ")
+ self.assertEqual(cm.exception.offset, 5)
def test_EOFS_with_file(self):
expect = ("(<string>, line 1)")
with os_helper.temp_dir() as temp_dir:
- file_name = script_helper.make_script(temp_dir, 'foo', """'''this
is \na \ntest""")
- rc, out, err = script_helper.assert_python_failure(file_name)
- self.assertIn(b'unterminated triple-quoted string literal (detected at
line 3)', err)
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ """ä = '''thîs is \na
\ntest""")
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ " ä = '''thîs is ",
+ ' ^',
+ 'SyntaxError: unterminated triple-quoted string literal
(detected at line 3)'])
+
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ """ä = '''thîs is \na
\ntest""".encode())
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ " ä = '''thîs is ",
+ ' ^',
+ 'SyntaxError: unterminated triple-quoted string literal
(detected at line 3)'])
+
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ BOM_UTF8 + """ä = '''thîs is
\na \ntest""".encode())
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ " ä = '''thîs is ",
+ ' ^',
+ 'SyntaxError: unterminated triple-quoted string literal
(detected at line 3)'])
+
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ """# coding: latin1\nä =
'''thîs is \na \ntest""".encode('latin1'))
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ " ä = '''thîs is ",
+ ' ^',
+ 'SyntaxError: unterminated triple-quoted string literal
(detected at line 4)'])
@warnings_helper.ignore_warnings(category=SyntaxWarning)
def test_eof_with_line_continuation(self):
expect = "unexpected EOF while parsing (<string>, line 1)"
- try:
+ with self.assertRaises(SyntaxError) as cm:
compile('"\\Xhh" \\', '<string>', 'exec')
- except SyntaxError as msg:
- self.assertEqual(str(msg), expect)
- else:
- raise support.TestFailed
+ self.assertEqual(str(cm.exception), expect)
def test_line_continuation_EOF(self):
"""A continuation at the end of input must be an error; bpo2180."""
expect = 'unexpected EOF while parsing (<string>, line 1)'
- with self.assertRaises(SyntaxError) as excinfo:
- exec('x = 5\\')
- self.assertEqual(str(excinfo.exception), expect)
- with self.assertRaises(SyntaxError) as excinfo:
+ with self.assertRaises(SyntaxError) as cm:
+ exec('ä = 5\\')
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, 'ä = 5\\\n')
+ self.assertEqual(cm.exception.offset, 7)
+
+ with self.assertRaises(SyntaxError) as cm:
+ exec('ä = 5\\'.encode())
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, 'ä = 5\\\n')
+ self.assertEqual(cm.exception.offset, 7)
+
+ with self.assertRaises(SyntaxError) as cm:
+ exec('# coding:latin1\nä = 5\\'.encode('latin1'))
+ self.assertEqual(str(cm.exception),
+ 'unexpected EOF while parsing (<string>, line 2)')
+ self.assertEqual(cm.exception.text, 'ä = 5\\\n')
+ self.assertEqual(cm.exception.offset, 7)
+
+ with self.assertRaises(SyntaxError) as cm:
+ exec(BOM_UTF8 + 'ä = 5\\'.encode())
+ self.assertEqual(str(cm.exception), expect)
+ self.assertEqual(cm.exception.text, 'ä = 5\\\n')
+ self.assertEqual(cm.exception.offset, 7)
+
+ with self.assertRaises(SyntaxError) as cm:
exec('\\')
- self.assertEqual(str(excinfo.exception), expect)
+ self.assertEqual(str(cm.exception), expect)
@unittest.skipIf(not sys.executable, "sys.executable required")
def test_line_continuation_EOF_from_file_bpo2180(self):
"""Ensure tok_nextc() does not add too many ending newlines."""
with os_helper.temp_dir() as temp_dir:
file_name = script_helper.make_script(temp_dir, 'foo', '\\')
- rc, out, err = script_helper.assert_python_failure(file_name)
- self.assertIn(b'unexpected EOF while parsing', err)
- self.assertIn(b'line 1', err)
- self.assertIn(b'\\', err)
-
- file_name = script_helper.make_script(temp_dir, 'foo', 'y = 6\\')
- rc, out, err = script_helper.assert_python_failure(file_name)
- self.assertIn(b'unexpected EOF while parsing', err)
- self.assertIn(b'line 1', err)
- self.assertIn(b'y = 6\\', err)
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-2:], [
+ ' \\',
+ 'SyntaxError: unexpected EOF while parsing'])
+ self.assertEqual(err[-3][-8:], ', line 1', err)
+
+ file_name = script_helper.make_script(temp_dir, 'foo', 'ä = 6\\')
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ ' ä = 6\\',
+ ' ^',
+ 'SyntaxError: unexpected EOF while parsing'])
+ self.assertEqual(err[-4][-8:], ', line 1', err)
+
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ '# coding:latin1\n'
+ 'ä = 7\\'.encode('latin1'))
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ ' ä = 7\\',
+ ' ^',
+ 'SyntaxError: unexpected EOF while parsing'])
+ self.assertEqual(err[-4][-8:], ', line 2', err)
+
+ file_name = script_helper.make_script(temp_dir, 'foo',
+ BOM_UTF8 + 'ä =
8\\'.encode())
+ rc, out, err = script_helper.assert_python_failure('-X', 'utf8',
file_name)
+ err = err.decode().splitlines()
+ self.assertEqual(err[-3:], [
+ ' ä = 8\\',
+ ' ^',
+ 'SyntaxError: unexpected EOF while parsing'])
+ self.assertEqual(err[-4][-8:], ', line 1', err)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index b2a24ca21860ee..5b0334f34652d2 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -7,6 +7,7 @@
import pickle
import weakref
import errno
+from codecs import BOM_UTF8
from textwrap import dedent
from test.support import (captured_stderr, check_impl_detail,
@@ -1971,7 +1972,115 @@ def test_copy_pickle(self):
self.assertEqual(exc.name, orig.name)
self.assertEqual(exc.path, orig.path)
+
+def run_script(source):
+ if isinstance(source, str):
+ with open(TESTFN, 'w', encoding='utf-8') as testfile:
+ testfile.write(dedent(source))
+ else:
+ with open(TESTFN, 'wb') as testfile:
+ testfile.write(source)
+ _rc, _out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8',
TESTFN)
+ return err.decode('utf-8').splitlines()
+
+class AssertionErrorTests(unittest.TestCase):
+ def tearDown(self):
+ unlink(TESTFN)
+
+ def test_assertion_error_location(self):
+ cases = [
+ ('assert None',
+ [
+ ' assert None',
+ ' ^^^^',
+ 'AssertionError',
+ ],
+ ),
+ ('assert 0',
+ [
+ ' assert 0',
+ ' ^',
+ 'AssertionError',
+ ],
+ ),
+ ('assert 1 > 2',
+ [
+ ' assert 1 > 2',
+ ' ^^^^^',
+ 'AssertionError',
+ ],
+ ),
+ ('assert 1 > 2 and 3 > 2',
+ [
+ ' assert 1 > 2 and 3 > 2',
+ ' ^^^^^^^^^^^^^^^',
+ 'AssertionError',
+ ],
+ ),
+ ('assert 1 > 2, "messäge"',
+ [
+ ' assert 1 > 2, "messäge"',
+ ' ^^^^^',
+ 'AssertionError: messäge',
+ ],
+ ),
+ ('assert 1 > 2, "messäge"'.encode(),
+ [
+ ' assert 1 > 2, "messäge"',
+ ' ^^^^^',
+ 'AssertionError: messäge',
+ ],
+ ),
+ ('# coding: latin1\nassert 1 > 2, "messäge"'.encode('latin1'),
+ [
+ ' assert 1 > 2, "messäge"',
+ ' ^^^^^',
+ 'AssertionError: messäge',
+ ],
+ ),
+
+ # Multiline:
+ ("""
+ assert (
+ 1 > 2)
+ """,
+ [
+ ' 1 > 2)',
+ ' ^^^^^',
+ 'AssertionError',
+ ],
+ ),
+ ("""
+ assert (
+ 1 > 2), "Message"
+ """,
+ [
+ ' 1 > 2), "Message"',
+ ' ^^^^^',
+ 'AssertionError: Message',
+ ],
+ ),
+ ("""
+ assert (
+ 1 > 2), \\
+ "Message"
+ """,
+ [
+ ' 1 > 2), \\',
+ ' ^^^^^',
+ 'AssertionError: Message',
+ ],
+ ),
+ ]
+ for source, expected in cases:
+ with self.subTest(source=source):
+ result = run_script(source)
+ self.assertEqual(result[-3:], expected)
+
+
class SyntaxErrorTests(unittest.TestCase):
+ maxDiff = None
+
def test_range_of_offsets(self):
cases = [
# Basic range from 2->7
@@ -2063,45 +2172,103 @@ def test_range_of_offsets(self):
the_exception = exc
def test_encodings(self):
+ self.addCleanup(unlink, TESTFN)
source = (
'# -*- coding: cp437 -*-\n'
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
)
- try:
- with open(TESTFN, 'w', encoding='cp437') as testfile:
- testfile.write(source)
- rc, out, err = script_helper.assert_python_failure('-Wd', '-X',
'utf8', TESTFN)
- err = err.decode('utf-8').splitlines()
-
- self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in
range(1))')
- self.assertEqual(err[-2], '
^^^^^^^^^^^^^^^^^^^')
- finally:
- unlink(TESTFN)
+ err = run_script(source.encode('cp437'))
+ self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in
range(1))')
+ self.assertEqual(err[-2], '
^^^^^^^^^^^^^^^^^^^')
# Check backwards tokenizer errors
source = '# -*- coding: ascii -*-\n\n(\n'
- try:
- with open(TESTFN, 'w', encoding='ascii') as testfile:
- testfile.write(source)
- rc, out, err = script_helper.assert_python_failure('-Wd', '-X',
'utf8', TESTFN)
- err = err.decode('utf-8').splitlines()
-
- self.assertEqual(err[-3], ' (')
- self.assertEqual(err[-2], ' ^')
- finally:
- unlink(TESTFN)
+ err = run_script(source)
+ self.assertEqual(err[-3], ' (')
+ self.assertEqual(err[-2], ' ^')
def test_non_utf8(self):
# Check non utf-8 characters
- try:
- with open(TESTFN, 'bw') as testfile:
- testfile.write(b"\x89")
- rc, out, err = script_helper.assert_python_failure('-Wd', '-X',
'utf8', TESTFN)
- err = err.decode('utf-8').splitlines()
+ self.addCleanup(unlink, TESTFN)
+ err = run_script(b"\x89")
+ self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in
file", err[-1])
- self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89'
in file", err[-1])
- finally:
- unlink(TESTFN)
+ def test_string_source(self):
+ def try_compile(source):
+ with self.assertRaises(SyntaxError) as cm:
+ compile(source, '<string>', 'exec')
+ return cm.exception
+
+ exc = try_compile('return "ä"')
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
1)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ exc = try_compile('return "ä"'.encode())
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
1)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ exc = try_compile(BOM_UTF8 + 'return "ä"'.encode())
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
1)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ exc = try_compile('# coding: latin1\nreturn "ä"'.encode('latin1'))
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
2)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ exc = try_compile('return "ä" #' + 'ä'*1000)
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
1)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ exc = try_compile('return "ä" # ' + 'ä'*1000)
+ self.assertEqual(str(exc), "'return' outside function (<string>, line
1)")
+ self.assertIsNone(exc.text)
+ self.assertEqual(exc.offset, 1)
+ self.assertEqual(exc.end_offset, 12)
+
+ def test_file_source(self):
+ self.addCleanup(unlink, TESTFN)
+ err = run_script('return "ä"')
+ # NOTE: Offset is calculated incorrectly for non-ASCII strings.
+ self.assertEqual(err[-3::2], [
+ ' return "ä"',
+ "SyntaxError: 'return' outside function"])
+
+ err = run_script('return "ä"'.encode())
+ self.assertEqual(err[-3::2], [
+ ' return "ä"',
+ "SyntaxError: 'return' outside function"])
+
+ err = run_script(BOM_UTF8 + 'return "ä"'.encode())
+ self.assertEqual(err[-3::2], [
+ ' return "ä"',
+ "SyntaxError: 'return' outside function"])
+
+ err = run_script('# coding: latin1\nreturn "ä"'.encode('latin1'))
+ self.assertEqual(err[-3::2], [
+ ' return "ä"',
+ "SyntaxError: 'return' outside function"])
+
+ err = run_script('return "ä" #' + 'ä'*1000)
+ self.assertEqual(err[-2:], [
+ ' ^^^^^^^^^^^',
+ "SyntaxError: 'return' outside function"])
+ self.assertEqual(err[-3][:100], ' return "ä" #' + 'ä'*84)
+
+ err = run_script('return "ä" # ' + 'ä'*1000)
+ self.assertEqual(err[-2:], [
+ ' ^^^^^^^^^^^',
+ "SyntaxError: 'return' outside function"])
+ self.assertEqual(err[-3][:100], ' return "ä" # ' + 'ä'*83)
def test_attributes_new_constructor(self):
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-17-22-06-01.gh-issue-124188.aFqNAB.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-17-22-06-01.gh-issue-124188.aFqNAB.rst
new file mode 100644
index 00000000000000..0c2935fbe000bc
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-17-22-06-01.gh-issue-124188.aFqNAB.rst
@@ -0,0 +1,2 @@
+Fix reading and decoding a line from the source file witn non-UTF-8 encoding
+for syntax errors raised in the compiler.
diff --git a/Python/errors.c b/Python/errors.c
index 6c46d1f2136654..cbfc2faf8bb3bc 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -1871,44 +1871,44 @@ PyErr_SyntaxLocationEx(const char *filename, int
lineno, int col_offset)
functionality in tb_displayline() in traceback.c. */
static PyObject *
-err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char*
encoding)
+err_programtext(FILE *fp, int lineno, const char* encoding)
{
- int i;
char linebuf[1000];
- if (fp == NULL) {
- return NULL;
+ size_t line_size = 0;
+
+ for (int i = 0; i < lineno; ) {
+ line_size = 0;
+ if (_Py_UniversalNewlineFgetsWithSize(linebuf, sizeof(linebuf),
+ fp, NULL, &line_size) == NULL)
+ {
+ /* Error or EOF. */
+ return NULL;
+ }
+ /* fgets read *something*; if it didn't fill the
+ whole buffer, it must have found a newline
+ or hit the end of the file; if the last character is \n,
+ it obviously found a newline; else we haven't
+ yet seen a newline, so must continue */
+ if (i + 1 < lineno
+ && line_size == sizeof(linebuf) - 1
+ && linebuf[sizeof(linebuf) - 2] != '\n')
+ {
+ continue;
+ }
+ i++;
}
- for (i = 0; i < lineno; i++) {
- char *pLastChar = &linebuf[sizeof(linebuf) - 2];
- do {
- *pLastChar = '\0';
- if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf,
- fp, NULL) == NULL) {
- goto after_loop;
- }
- /* fgets read *something*; if it didn't get as
- far as pLastChar, it must have found a newline
- or hit the end of the file; if pLastChar is \n,
- it obviously found a newline; else we haven't
- yet seen a newline, so must continue */
- } while (*pLastChar != '\0' && *pLastChar != '\n');
+ const char *line = linebuf;
+ /* Skip BOM. */
+ if (lineno == 1 && line_size >= 3 && memcmp(line, "\xef\xbb\xbf", 3) == 0)
{
+ line += 3;
+ line_size -= 3;
}
-
-after_loop:
- fclose(fp);
- if (i == lineno) {
- PyObject *res;
- if (encoding != NULL) {
- res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding,
"replace");
- } else {
- res = PyUnicode_FromString(linebuf);
- }
- if (res == NULL)
- _PyErr_Clear(tstate);
- return res;
+ PyObject *res = PyUnicode_Decode(line, line_size, encoding, "replace");
+ if (res == NULL) {
+ PyErr_Clear();
}
- return NULL;
+ return res;
}
PyObject *
@@ -1928,20 +1928,41 @@ PyErr_ProgramText(const char *filename, int lineno)
return res;
}
+/* Function from Parser/tokenizer/file_tokenizer.c */
+extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
+
PyObject *
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char*
encoding)
{
+ char *found_encoding = NULL;
if (filename == NULL || lineno <= 0) {
return NULL;
}
- PyThreadState *tstate = _PyThreadState_GET();
FILE *fp = _Py_fopen_obj(filename, "r" PY_STDIOTEXTMODE);
if (fp == NULL) {
- _PyErr_Clear(tstate);
+ PyErr_Clear();
return NULL;
}
- return err_programtext(tstate, fp, lineno, encoding);
+ if (encoding == NULL) {
+ int fd = fileno(fp);
+ found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
+ encoding = found_encoding;
+ if (encoding == NULL) {
+ PyErr_Clear();
+ encoding = "utf-8";
+ }
+ /* Reset position */
+ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+ fclose(fp);
+ PyMem_Free(found_encoding);
+ return NULL;
+ }
+ }
+ PyObject *res = err_programtext(fp, lineno, encoding);
+ fclose(fp);
+ PyMem_Free(found_encoding);
+ return res;
}
PyObject *
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]