https://github.com/python/cpython/commit/05adfbba2abafcdd271bf144a7b3f80bcd927288
commit: 05adfbba2abafcdd271bf144a7b3f80bcd927288
branch: main
author: Pieter Eendebak <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-05-06T11:04:39+03:00
summary:
gh-95382: Improve performance of json encoder with indent (GH-118105)
files:
A Misc/NEWS.d/next/Core and
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst
M Lib/json/encoder.py
M Modules/_json.c
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index 597849eca0524a..323332f064edf8 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -244,15 +244,18 @@ def floatstr(o, allow_nan=self.allow_nan,
return text
- if (_one_shot and c_make_encoder is not None
- and self.indent is None):
+ if self.indent is None or isinstance(self.indent, str):
+ indent = self.indent
+ else:
+ indent = ' ' * self.indent
+ if _one_shot and c_make_encoder is not None:
_iterencode = c_make_encoder(
- markers, self.default, _encoder, self.indent,
+ markers, self.default, _encoder, indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
else:
_iterencode = _make_iterencode(
- markers, self.default, _encoder, self.indent, floatstr,
+ markers, self.default, _encoder, indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
@@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent,
_floatstr,
_intstr=int.__repr__,
):
- if _indent is not None and not isinstance(_indent, str):
- _indent = ' ' * _indent
-
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
diff --git a/Misc/NEWS.d/next/Core and
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst b/Misc/NEWS.d/next/Core
and Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst
new file mode 100644
index 00000000000000..097a663e3f5e24
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst
@@ -0,0 +1,2 @@
+Improve performance of :func:`json.dumps` and :func:`json.dump` when using the
argument *indent*. Depending on the data the encoding using
+:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.
diff --git a/Modules/_json.c b/Modules/_json.c
index fc39f624b723f5..e33ef1f5eea92f 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyEncoderObject *self);
static int
-encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*seq, Py_ssize_t indent_level);
+encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*seq, PyObject *newline_indent);
static int
-encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*obj, Py_ssize_t indent_level);
+encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*obj, PyObject *newline_indent);
static int
-encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*dct, Py_ssize_t indent_level);
+encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject
*dct, PyObject *newline_indent);
static PyObject *
_encoded_const(PyObject *obj);
static void
@@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject
*kwds)
return (PyObject *)s;
}
+static PyObject *
+_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
+{
+ PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
+ if (newline_indent != NULL && indent_level) {
+ PyUnicode_AppendAndDel(&newline_indent,
+ PySequence_Repeat(indent, indent_level));
+ }
+ return newline_indent;
+}
+
static PyObject *
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
{
@@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args,
PyObject *kwds)
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;
- if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
+ PyObject *newline_indent = NULL;
+ if (self->indent != Py_None) {
+ newline_indent = _create_newline_indent(self->indent, indent_level);
+ if (newline_indent == NULL) {
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
+ }
+ }
+ if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
_PyUnicodeWriter_Dealloc(&writer);
+ Py_XDECREF(newline_indent);
return NULL;
}
+ Py_XDECREF(newline_indent);
result = PyTuple_New(1);
if (result == NULL ||
@@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject
*stolen)
static int
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *obj, Py_ssize_t indent_level)
+ PyObject *obj, PyObject *newline_indent)
{
/* Encode Python object obj to a JSON term */
PyObject *newobj;
@@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s,
_PyUnicodeWriter *writer,
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
- rv = encoder_listencode_list(s, writer, obj, indent_level);
+ rv = encoder_listencode_list(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
else if (PyDict_Check(obj)) {
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
return -1;
- rv = encoder_listencode_dict(s, writer, obj, indent_level);
+ rv = encoder_listencode_dict(s, writer, obj, newline_indent);
_Py_LeaveRecursiveCall();
return rv;
}
@@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s,
_PyUnicodeWriter *writer,
Py_XDECREF(ident);
return -1;
}
- rv = encoder_listencode_obj(s, writer, newobj, indent_level);
+ rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
_Py_LeaveRecursiveCall();
Py_DECREF(newobj);
@@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s,
_PyUnicodeWriter *writer,
static int
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool
*first,
- PyObject *key, PyObject *value, Py_ssize_t
indent_level)
+ PyObject *key, PyObject *value,
+ PyObject *newline_indent,
+ PyObject *item_separator)
{
PyObject *keystr = NULL;
PyObject *encoded;
@@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s,
_PyUnicodeWriter *writer, bool *fir
*first = false;
}
else {
- if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
+ if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr);
return -1;
}
@@ -1511,7 +1534,7 @@ encoder_encode_key_value(PyEncoderObject *s,
_PyUnicodeWriter *writer, bool *fir
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1;
}
- if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
+ if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
return -1;
}
return 0;
@@ -1519,13 +1542,15 @@ encoder_encode_key_value(PyEncoderObject *s,
_PyUnicodeWriter *writer, bool *fir
static int
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *dct, Py_ssize_t indent_level)
+ PyObject *dct, PyObject *newline_indent)
{
/* Encode Python dict dct a JSON term */
PyObject *ident = NULL;
PyObject *items = NULL;
PyObject *key, *value;
bool first = true;
+ PyObject *new_newline_indent = NULL;
+ PyObject *separator_indent = NULL;
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s,
_PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '{'))
goto bail;
+ PyObject *current_item_separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
- indent_level += 1;
- /*
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
- buf += newline_indent
- */
+ new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
+ if (new_newline_indent == NULL) {
+ goto bail;
+ }
+ separator_indent = PyUnicode_Concat(current_item_separator,
new_newline_indent);
+ if (separator_indent == NULL) {
+ goto bail;
+ }
+ // update item separator with a borrowed reference
+ current_item_separator = separator_indent;
+ if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+ goto bail;
+ }
}
if (s->sort_keys || !PyDict_CheckExact(dct)) {
@@ -1574,7 +1606,9 @@ encoder_listencode_dict(PyEncoderObject *s,
_PyUnicodeWriter *writer,
key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
- if (encoder_encode_key_value(s, writer, &first, key, value,
indent_level) < 0)
+ if (encoder_encode_key_value(s, writer, &first, key, value,
+ new_newline_indent,
+ current_item_separator) < 0)
goto bail;
}
Py_CLEAR(items);
@@ -1582,7 +1616,9 @@ encoder_listencode_dict(PyEncoderObject *s,
_PyUnicodeWriter *writer,
} else {
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
- if (encoder_encode_key_value(s, writer, &first, key, value,
indent_level) < 0)
+ if (encoder_encode_key_value(s, writer, &first, key, value,
+ new_newline_indent,
+ current_item_separator) < 0)
goto bail;
}
}
@@ -1592,12 +1628,15 @@ encoder_listencode_dict(PyEncoderObject *s,
_PyUnicodeWriter *writer,
goto bail;
Py_CLEAR(ident);
}
- /* TODO DOES NOT RUN; dead code
if (s->indent != Py_None) {
- indent_level -= 1;
+ Py_CLEAR(new_newline_indent);
+ Py_CLEAR(separator_indent);
+
+ if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+ goto bail;
+ }
+ }
- yield '\n' + (' ' * (_indent * _current_indent_level))
- }*/
if (_PyUnicodeWriter_WriteChar(writer, '}'))
goto bail;
return 0;
@@ -1605,16 +1644,20 @@ encoder_listencode_dict(PyEncoderObject *s,
_PyUnicodeWriter *writer,
bail:
Py_XDECREF(items);
Py_XDECREF(ident);
+ Py_XDECREF(separator_indent);
+ Py_XDECREF(new_newline_indent);
return -1;
}
static int
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *seq, Py_ssize_t indent_level)
+ PyObject *seq, PyObject *newline_indent)
{
PyObject *ident = NULL;
PyObject *s_fast = NULL;
Py_ssize_t i;
+ PyObject *new_newline_indent = NULL;
+ PyObject *separator_indent = NULL;
ident = NULL;
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s,
_PyUnicodeWriter *writer,
if (_PyUnicodeWriter_WriteChar(writer, '['))
goto bail;
+
+ PyObject *separator = s->item_separator; // borrowed reference
if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
- indent_level += 1;
- /*
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
- buf += newline_indent
- */
+ new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
+ if (new_newline_indent == NULL) {
+ goto bail;
+ }
+
+ if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+ goto bail;
+ }
+
+ separator_indent = PyUnicode_Concat(separator, new_newline_indent);
+ if (separator_indent == NULL) {
+ goto bail;
+ }
+ separator = separator_indent; // assign separator with borrowed
reference
}
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
- if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
+ if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
- if (encoder_listencode_obj(s, writer, obj, indent_level))
+ if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
goto bail;
}
if (ident != NULL) {
@@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s,
_PyUnicodeWriter *writer,
Py_CLEAR(ident);
}
- /* TODO: DOES NOT RUN
if (s->indent != Py_None) {
- indent_level -= 1;
+ Py_CLEAR(new_newline_indent);
+ Py_CLEAR(separator_indent);
+ if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+ goto bail;
+ }
+ }
- yield '\n' + (' ' * (_indent * _current_indent_level))
- }*/
if (_PyUnicodeWriter_WriteChar(writer, ']'))
goto bail;
Py_DECREF(s_fast);
@@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s,
_PyUnicodeWriter *writer,
bail:
Py_XDECREF(ident);
Py_DECREF(s_fast);
+ Py_XDECREF(separator_indent);
+ Py_XDECREF(new_newline_indent);
return -1;
}
@@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
return 0;
}
-PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) ->
iterable");
+PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent,
key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
static PyType_Slot PyEncoderType_slots[] = {
{Py_tp_doc, (void *)encoder_doc},
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]