Author: Armin Rigo <ar...@tunes.org> Branch: py3.5 Changeset: r93669:18c3825bad60 Date: 2018-01-15 13:02 +0100 http://bitbucket.org/pypy/pypy/changeset/18c3825bad60/
Log: This is encoder.py from pypy 2.7, with the changes in CPython 2.7-- 3.5 manually applied. diff --git a/lib-python/3/json/encoder.py b/lib-python/3/json/encoder.py --- a/lib-python/3/json/encoder.py +++ b/lib-python/3/json/encoder.py @@ -2,6 +2,8 @@ """ import re +from __pypy__.builders import StringBuilder + try: from _json import encode_basestring_ascii as c_encode_basestring_ascii except ImportError: @@ -157,6 +159,11 @@ if default is not None: self.default = default + if indent is not None and not isinstance(ident, str): + self.indent_str = ' ' * indent + else: + self.indent_str = indent + def default(self, o): """Implement this method in a subclass such that it returns a serializable object for ``o``, or calls the base implementation @@ -186,19 +193,126 @@ '{"foo": ["bar", "baz"]}' """ - # This is for extremely simple cases and benchmarks. + if self.check_circular: + markers = {} + else: + markers = None + builder = StringBuilder() + self.__encode(o, markers, builder, 0) + return builder.build() + + def __emit_indent(self, builder, _current_indent_level): + if self.indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + self.indent_str * _current_indent_level + separator = self.item_separator + newline_indent + builder.append(newline_indent) + else: + separator = self.item_separator + return separator, _current_indent_level + + def __emit_unindent(self, builder, _current_indent_level): + if self.indent is not None: + builder.append('\n') + builder.append(self.indent_str * (_current_indent_level - 1)) + + def __encode(self, o, markers, builder, _current_indent_level): if isinstance(o, str): - if self.ensure_ascii: - return encode_basestring_ascii(o) + builder.append('"') + builder.append(self.__encoder(o)) + builder.append('"') + elif o is None: + builder.append('null') + elif o is True: + builder.append('true') + elif o is False: + builder.append('false') + elif isinstance(o, int): + # Subclasses of int/float may override __str__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + builder.append(int.__str__(o)) + elif isinstance(o, float): + builder.append(self.__floatstr(o)) + elif isinstance(o, (list, tuple)): + if not o: + builder.append('[]') + return + self.__encode_list(o, markers, builder, _current_indent_level) + elif isinstance(o, dict): + if not o: + builder.append('{}') + return + self.__encode_dict(o, markers, builder, _current_indent_level) + else: + self.__mark_markers(markers, o) + res = self.default(o) + self.__encode(res, markers, builder, _current_indent_level) + self.__remove_markers(markers, o) + return res + + def __encode_list(self, l, markers, builder, _current_indent_level): + self.__mark_markers(markers, l) + builder.append('[') + first = True + separator, _current_indent_level = self.__emit_indent(builder, + _current_indent_level) + for elem in l: + if first: + first = False else: - return encode_basestring(o) - # This doesn't pass the iterator directly to ''.join() because the - # exceptions aren't as detailed. The list call should be roughly - # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) - if not isinstance(chunks, (list, tuple)): - chunks = list(chunks) - return ''.join(chunks) + builder.append(separator) + self.__encode(elem, markers, builder, _current_indent_level) + del elem # XXX grumble + self.__emit_unindent(builder, _current_indent_level) + builder.append(']') + self.__remove_markers(markers, l) + + def __encode_dict(self, d, markers, builder, _current_indent_level): + self.__mark_markers(markers, d) + first = True + builder.append('{') + separator, _current_indent_level = self.__emit_indent(builder, + _current_indent_level) + if self.sort_keys: + items = sorted(d.items(), key=lambda kv: kv[0]) + else: + items = d.items() + + for key, v in items: + if first: + first = False + else: + builder.append(separator) + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = self.__floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int in __encode + key = int.__str__(key) + elif self.skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + builder.append('"') + builder.append(self.__encoder(key)) + builder.append('"') + builder.append(self.key_separator) + self.__encode(v, markers, builder, _current_indent_level) + del key + del v # XXX grumble + self.__emit_unindent(builder, _current_indent_level) + builder.append('}') + self.__remove_markers(markers, d) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string @@ -214,83 +328,53 @@ markers = {} else: markers = None - if self.ensure_ascii: - _encoder = encode_basestring_ascii + return self.__iterencode(o, markers, 0) + + def __floatstr(self, o): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on the + # internals. + + if o != o: + text = 'NaN' + elif o == INFINITY: + text = 'Infinity' + elif o == -INFINITY: + text = '-Infinity' else: - _encoder = encode_basestring + return float.__repr__(o) - def floatstr(o, allow_nan=self.allow_nan, - _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor - # and/or platform-specific, so do tests which don't depend on the - # internals. + if not self.allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) - if o != o: - text = 'NaN' - elif o == _inf: - text = 'Infinity' - elif o == _neginf: - text = '-Infinity' - else: - return _repr(o) + return text - if not allow_nan: - raise ValueError( - "Out of range float values are not JSON compliant: " + - repr(o)) + def __mark_markers(self, markers, o): + if markers is not None: + if id(o) in markers: + raise ValueError("Circular reference detected") + markers[id(o)] = None - return text + def __remove_markers(self, markers, o): + if markers is not None: + del markers[id(o)] - - if (_one_shot and c_make_encoder is not None - and self.indent is None): - _iterencode = c_make_encoder( - markers, self.default, _encoder, self.indent, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) - else: - _iterencode = _make_iterencode( - markers, self.default, _encoder, self.indent, floatstr, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) - -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, - _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, - ## HACK: hand-optimized bytecode; turn globals into locals - ValueError=ValueError, - dict=dict, - float=float, - id=id, - int=int, - isinstance=isinstance, - list=list, - str=str, - tuple=tuple, - _intstr=int.__str__, - ): - - if _indent is not None and not isinstance(_indent, str): - _indent = ' ' * _indent - - def _iterencode_list(lst, _current_indent_level): + def __iterencode_list(self, lst, markers, _current_indent_level): if not lst: yield '[]' return - if markers is not None: - markerid = id(lst) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = lst + self.__mark_markers(markers, lst) buf = '[' - if _indent is not None: + if self.indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level - separator = _item_separator + newline_indent + newline_indent = '\n' + self.indent_str * _current_indent_level + separator = self.item_separator + newline_indent buf += newline_indent else: newline_indent = None - separator = _item_separator + separator = self.item_separator first = True for value in lst: if first: @@ -298,7 +382,7 @@ else: buf = separator if isinstance(value, str): - yield buf + _encoder(value) + yield buf + '"' + self.__encoder(value) + '"' elif value is None: yield buf + 'null' elif value is True: @@ -306,49 +390,44 @@ elif value is False: yield buf + 'false' elif isinstance(value, int): - # Subclasses of int/float may override __str__, but we still - # want to encode them as integers/floats in JSON. One example - # within the standard library is IntEnum. - yield buf + _intstr(value) + # see comment for int in __encode + yield buf + int.__str__(value) elif isinstance(value, float): - # see comment above for int - yield buf + _floatstr(value) + yield buf + self.__floatstr(value) else: yield buf if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) + chunks = self.__iterencode_list(value, markers, + _current_indent_level) elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) + chunks = self.__iterencode_dict(value, markers, + _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + chunks = self.__iterencode(value, markers, + _current_indent_level) yield from chunks if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + self.indent_str * _current_indent_level yield ']' - if markers is not None: - del markers[markerid] + self.__remove_markers(markers, lst) - def _iterencode_dict(dct, _current_indent_level): + def __iterencode_dict(self, dct, markers, _current_indent_level): if not dct: yield '{}' return - if markers is not None: - markerid = id(dct) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = dct + self.__mark_markers(markers, dct) yield '{' - if _indent is not None: + if self.indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level - item_separator = _item_separator + newline_indent + newline_indent = '\n' + self.indent_str * _current_indent_level + item_separator = self.item_separator + newline_indent yield newline_indent else: newline_indent = None - item_separator = _item_separator + item_separator = self.item_separator first = True - if _sort_keys: + if self.sort_keys: items = sorted(dct.items(), key=lambda kv: kv[0]) else: items = dct.items() @@ -358,8 +437,7 @@ # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): - # see comment for int/float in _make_iterencode - key = _floatstr(key) + key = self.__floatstr(key) elif key is True: key = 'true' elif key is False: @@ -367,9 +445,9 @@ elif key is None: key = 'null' elif isinstance(key, int): - # see comment for int/float in _make_iterencode - key = _intstr(key) - elif _skipkeys: + # see comment for int in __encode + key = int.__str__(key) + elif self.skipkeys: continue else: raise TypeError("key " + repr(key) + " is not a string") @@ -377,64 +455,68 @@ first = False else: yield item_separator - yield _encoder(key) - yield _key_separator + yield '"' + self.__encoder(key) + '"' + yield self.key_separator if isinstance(value, str): - yield _encoder(value) + yield '"' + self.__encoder(value) + '"' elif value is None: yield 'null' elif value is True: yield 'true' elif value is False: yield 'false' - elif isinstance(value, int): - # see comment for int/float in _make_iterencode - yield _intstr(value) + elif isinstance(value, (int, long)): + yield str(value) elif isinstance(value, float): - # see comment for int/float in _make_iterencode - yield _floatstr(value) + yield self.__floatstr(value) else: if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) + chunks = self.__iterencode_list(value, markers, + _current_indent_level) elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) + chunks = self.__iterencode_dict(value, markers, + _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + chunks = self.__iterencode(value, markers, + _current_indent_level) yield from chunks if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + self.indent_str * _current_indent_level yield '}' - if markers is not None: - del markers[markerid] + self.__remove_markers(markers, dct) - def _iterencode(o, _current_indent_level): + def __iterencode(self, o, markers, _current_indent_level): if isinstance(o, str): - yield _encoder(o) + yield '"' + self.__encoder(o) + '"' elif o is None: yield 'null' elif o is True: yield 'true' elif o is False: yield 'false' - elif isinstance(o, int): - # see comment for int/float in _make_iterencode - yield _intstr(o) + elif isinstance(o, (int, long)): + yield str(o) elif isinstance(o, float): - # see comment for int/float in _make_iterencode - yield _floatstr(o) + yield self.__floatstr(o) elif isinstance(o, (list, tuple)): - yield from _iterencode_list(o, _current_indent_level) + yield from self.__iterencode_list(o, markers, _current_indent_level) elif isinstance(o, dict): - yield from _iterencode_dict(o, _current_indent_level) + yield from self.__iterencode_dict(o, markers, _current_indent_level) else: - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - o = _default(o) - yield from _iterencode(o, _current_indent_level) - if markers is not None: - del markers[markerid] - return _iterencode + self.__mark_markers(markers, o) + obj = self.default(o) + yield from self.__iterencode(obj, markers, _current_indent_level) + self.__remove_markers(markers, o) + + +# overwrite some helpers here with more efficient versions +try: + from _pypyjson import raw_encode_basestring_ascii + def encode_basestring_ascii(s): + encoded = raw_encode_basestring_ascii(s) + if encoded is None: + return '"' + s + '"' + return encoded # on pypy3, includes the quotes already +except ImportError: + pass _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit