https://github.com/python/cpython/commit/e6b25e9a09dbe09839b36f97b9174a30b1db2dbf
commit: e6b25e9a09dbe09839b36f97b9174a30b1db2dbf
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-07-23T20:02:54+03:00
summary:

gh-122163: Add notes for JSON serialization errors (GH-122165)

This allows to identify the source of the error.

files:
A Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst
M Doc/whatsnew/3.14.rst
M Include/internal/pycore_pyerrors.h
M Lib/json/encoder.py
M Lib/test/test_json/test_default.py
M Lib/test/test_json/test_fail.py
M Lib/test/test_json/test_recursion.py
M Modules/_json.c

diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index f45a44be0bf4a5..bd8bdcb6732fde 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -112,6 +112,13 @@ Added support for converting any objects that have the
 :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`.
 (Contributed by Serhiy Storchaka in :gh:`82017`.)
 
+json
+----
+
+Add notes for JSON serialization errors that allow to identify the source
+of the error.
+(Contributed by Serhiy Storchaka in :gh:`122163`.)
+
 os
 --
 
diff --git a/Include/internal/pycore_pyerrors.h 
b/Include/internal/pycore_pyerrors.h
index 15071638203457..9835e495d176e7 100644
--- a/Include/internal/pycore_pyerrors.h
+++ b/Include/internal/pycore_pyerrors.h
@@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception);
 PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b,
                                           Py_ssize_t max_cost);
 
-void _PyErr_FormatNote(const char *format, ...);
+// Export for '_json' shared extension
+PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...);
 
 /* Context manipulation (PEP 3134) */
 
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index 323332f064edf8..b804224098e14f 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -293,37 +293,40 @@ def _iterencode_list(lst, _current_indent_level):
         else:
             newline_indent = None
             separator = _item_separator
-        first = True
-        for value in lst:
-            if first:
-                first = False
-            else:
+        for i, value in enumerate(lst):
+            if i:
                 buf = separator
-            if isinstance(value, str):
-                yield buf + _encoder(value)
-            elif value is None:
-                yield buf + 'null'
-            elif value is True:
-                yield buf + 'true'
-            elif value is False:
-                yield buf + 'false'
-            elif isinstance(value, int):
-                # Subclasses of int/float may override __repr__, but we still
-                # want to encode them as integers/floats in JSON. One example
-                # within the standard library is IntEnum.
-                yield buf + _intstr(value)
-            elif isinstance(value, float):
-                # see comment above for int
-                yield buf + _floatstr(value)
-            else:
-                yield buf
-                if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+            try:
+                if isinstance(value, str):
+                    yield buf + _encoder(value)
+                elif value is None:
+                    yield buf + 'null'
+                elif value is True:
+                    yield buf + 'true'
+                elif value is False:
+                    yield buf + 'false'
+                elif isinstance(value, int):
+                    # Subclasses of int/float may override __repr__, but we 
still
+                    # want to encode them as integers/floats in JSON. One 
example
+                    # within the standard library is IntEnum.
+                    yield buf + _intstr(value)
+                elif isinstance(value, float):
+                    # see comment above for int
+                    yield buf + _floatstr(value)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                    yield buf
+                    if isinstance(value, (list, tuple)):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                    yield from chunks
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(lst).__name__} item {i}')
+                raise
         if newline_indent is not None:
             _current_indent_level -= 1
             yield '\n' + _indent * _current_indent_level
@@ -382,28 +385,34 @@ def _iterencode_dict(dct, _current_indent_level):
                 yield item_separator
             yield _encoder(key)
             yield _key_separator
-            if isinstance(value, str):
-                yield _encoder(value)
-            elif value is None:
-                yield 'null'
-            elif value is True:
-                yield 'true'
-            elif value is False:
-                yield 'false'
-            elif isinstance(value, int):
-                # see comment for int/float in _make_iterencode
-                yield _intstr(value)
-            elif isinstance(value, float):
-                # see comment for int/float in _make_iterencode
-                yield _floatstr(value)
-            else:
-                if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+            try:
+                if isinstance(value, str):
+                    yield _encoder(value)
+                elif value is None:
+                    yield 'null'
+                elif value is True:
+                    yield 'true'
+                elif value is False:
+                    yield 'false'
+                elif isinstance(value, int):
+                    # see comment for int/float in _make_iterencode
+                    yield _intstr(value)
+                elif isinstance(value, float):
+                    # see comment for int/float in _make_iterencode
+                    yield _floatstr(value)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                    if isinstance(value, (list, tuple)):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                    yield from chunks
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(dct).__name__} item 
{key!r}')
+                raise
         if newline_indent is not None:
             _current_indent_level -= 1
             yield '\n' + _indent * _current_indent_level
@@ -436,8 +445,14 @@ def _iterencode(o, _current_indent_level):
                 if markerid in markers:
                     raise ValueError("Circular reference detected")
                 markers[markerid] = o
-            o = _default(o)
-            yield from _iterencode(o, _current_indent_level)
+            newobj = _default(o)
+            try:
+                yield from _iterencode(newobj, _current_indent_level)
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(o).__name__} object')
+                raise
             if markers is not None:
                 del markers[markerid]
     return _iterencode
diff --git a/Lib/test/test_json/test_default.py 
b/Lib/test/test_json/test_default.py
index 3ce16684a08272..811880a15c8020 100644
--- a/Lib/test/test_json/test_default.py
+++ b/Lib/test/test_json/test_default.py
@@ -8,6 +8,24 @@ def test_default(self):
             self.dumps(type, default=repr),
             self.dumps(repr(type)))
 
+    def test_bad_default(self):
+        def default(obj):
+            if obj is NotImplemented:
+                raise ValueError
+            if obj is ...:
+                return NotImplemented
+            if obj is type:
+                return collections
+            return [...]
+
+        with self.assertRaises(ValueError) as cm:
+            self.dumps(type, default=default)
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing ellipsis object',
+                          'when serializing list item 0',
+                          'when serializing module object',
+                          'when serializing type object'])
+
     def test_ordereddict(self):
         od = collections.OrderedDict(a=1, b=2, c=3, d=4)
         od.move_to_end('b')
diff --git a/Lib/test/test_json/test_fail.py b/Lib/test/test_json/test_fail.py
index a74240f1107de3..7c1696cc66d12b 100644
--- a/Lib/test/test_json/test_fail.py
+++ b/Lib/test/test_json/test_fail.py
@@ -100,8 +100,27 @@ def test_non_string_keys_dict(self):
     def test_not_serializable(self):
         import sys
         with self.assertRaisesRegex(TypeError,
-                'Object of type module is not JSON serializable'):
+                'Object of type module is not JSON serializable') as cm:
             self.dumps(sys)
+        self.assertFalse(hasattr(cm.exception, '__notes__'))
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps([1, [2, 3, sys]])
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing list item 2',
+                          'when serializing list item 1'])
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps((1, (2, 3, sys)))
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing tuple item 2',
+                          'when serializing tuple item 1'])
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps({'a': {'b': sys}})
+        self.assertEqual(cm.exception.__notes__,
+                         ["when serializing dict item 'b'",
+                          "when serializing dict item 'a'"])
 
     def test_truncated_input(self):
         test_cases = [
diff --git a/Lib/test/test_json/test_recursion.py 
b/Lib/test/test_json/test_recursion.py
index 164ff2013eb552..290207e9c15b88 100644
--- a/Lib/test/test_json/test_recursion.py
+++ b/Lib/test/test_json/test_recursion.py
@@ -12,8 +12,8 @@ def test_listrecursion(self):
         x.append(x)
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing list item 0"])
         else:
             self.fail("didn't raise ValueError on list recursion")
         x = []
@@ -21,8 +21,8 @@ def test_listrecursion(self):
         x.append(y)
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
         else:
             self.fail("didn't raise ValueError on alternating list recursion")
         y = []
@@ -35,8 +35,8 @@ def test_dictrecursion(self):
         x["test"] = x
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing dict item 
'test'"])
         else:
             self.fail("didn't raise ValueError on dict recursion")
         x = {}
@@ -60,8 +60,10 @@ def default(self, o):
         enc.recurse = True
         try:
             enc.encode(JSONTestObject)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__,
+                             ["when serializing list item 0",
+                              "when serializing type object"])
         else:
             self.fail("didn't raise ValueError on default recursion")
 
diff --git 
a/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst 
b/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst
new file mode 100644
index 00000000000000..a4625c2a0e50e3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst
@@ -0,0 +1,2 @@
+Add notes for JSON serialization errors that allow to identify the source of
+the error.
diff --git a/Modules/_json.c b/Modules/_json.c
index c7fe1561bb1018..9e29de0f22465f 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -11,6 +11,7 @@
 #include "Python.h"
 #include "pycore_ceval.h"           // _Py_EnterRecursiveCall()
 #include "pycore_runtime.h"         // _PyRuntime
+#include "pycore_pyerrors.h"        // _PyErr_FormatNote
 
 #include "pycore_global_strings.h"  // _Py_ID()
 #include <stdbool.h>                // bool
@@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
         Py_DECREF(newobj);
         if (rv) {
+            _PyErr_FormatNote("when serializing %T object", obj);
             Py_XDECREF(ident);
             return -1;
         }
@@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
 static int
 encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool 
*first,
-                         PyObject *key, PyObject *value,
+                         PyObject *dct, PyObject *key, PyObject *value,
                          PyObject *newline_indent,
                          PyObject *item_separator)
 {
@@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, 
_PyUnicodeWriter *writer, bool *fir
         return -1;
     }
     if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
+        _PyErr_FormatNote("when serializing %T item %R", dct, key);
         return -1;
     }
     return 0;
@@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
             key = PyTuple_GET_ITEM(item, 0);
             value = PyTuple_GET_ITEM(item, 1);
-            if (encoder_encode_key_value(s, writer, &first, key, value,
+            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          new_newline_indent,
                                          current_item_separator) < 0)
                 goto bail;
@@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
     } else {
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
-            if (encoder_encode_key_value(s, writer, &first, key, value,
+            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          new_newline_indent,
                                          current_item_separator) < 0)
                 goto bail;
@@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
             if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
                 goto bail;
         }
-        if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
+        if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
+            _PyErr_FormatNote("when serializing %T item %zd", seq, i);
             goto bail;
+        }
     }
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to