Hello community, here is the log from the commit of package python-json_tricks for openSUSE:Leap:15.2 checked in at 2020-04-14 14:20:50 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Leap:15.2/python-json_tricks (Old) and /work/SRC/openSUSE:Leap:15.2/.python-json_tricks.new.3248 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-json_tricks" Tue Apr 14 14:20:50 2020 rev:5 rq:793500 version:3.15.0 Changes: -------- --- /work/SRC/openSUSE:Leap:15.2/python-json_tricks/python-json_tricks.changes 2020-03-09 18:07:01.644867015 +0100 +++ /work/SRC/openSUSE:Leap:15.2/.python-json_tricks.new.3248/python-json_tricks.changes 2020-04-14 14:21:09.469282402 +0200 @@ -1,0 +2,6 @@ +Mon Apr 6 06:40:33 UTC 2020 - Tomáš Chvátal <tchva...@suse.com> + +- Update to 3.15.0: + * no upstream changelog + +------------------------------------------------------------------- Old: ---- pyjson_tricks-3.13.2.tar.gz New: ---- pyjson_tricks-3.15.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-json_tricks.spec ++++++ --- /var/tmp/diff_new_pack.4eD2mi/_old 2020-04-14 14:21:09.773282629 +0200 +++ /var/tmp/diff_new_pack.4eD2mi/_new 2020-04-14 14:21:09.777282632 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-json_tricks # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-json_tricks -Version: 3.13.2 +Version: 3.15.0 Release: 0 Summary: Extra features for Python's JSON License: BSD-3-Clause @@ -61,6 +61,8 @@ %prep %setup -q -n pyjson_tricks-%{version} +# py3 only syntax in this file +rm tests/test_utils.py %build %python_build ++++++ pyjson_tricks-3.13.2.tar.gz -> pyjson_tricks-3.15.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/.travis.yml new/pyjson_tricks-3.15.0/.travis.yml --- old/pyjson_tricks-3.13.2/.travis.yml 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/.travis.yml 2020-04-05 22:48:35.000000000 +0200 @@ -4,9 +4,10 @@ - "3.4" - "3.5" - "3.6" + - "3.7" matrix: include: - - python: "3.7" + - python: "3.8" dist: xenial sudo: true install: pip install tox-travis diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/LICENSE.txt new/pyjson_tricks-3.15.0/LICENSE.txt --- old/pyjson_tricks-3.13.2/LICENSE.txt 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/LICENSE.txt 2020-04-05 22:48:35.000000000 +0200 @@ -1,29 +1,29 @@ +LICENSE: BSD-3-Clause ----------- Revised BSD License ---------- -Copyright (c) 2018, Mark V. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +Copyright (c) 2020 Mark V. All rights reserved. +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/README.rst new/pyjson_tricks-3.15.0/README.rst --- old/pyjson_tricks-3.13.2/README.rst 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/README.rst 2020-04-05 22:48:35.000000000 +0200 @@ -46,117 +46,13 @@ ``json-tricks`` supports Python 2.7, and Python 3.4 and later, and is automatically tested on 2.7, 3.4, 3.5 and 3.6. Pypy is supported without numpy and pandas. Pandas doesn't support 3.4. -Preserve type vs use primitive -------------------------------- - -By default, types are encoded such that they can be restored to their original type when loaded with ``json-tricks``. Example encodings in this documentation refer to that format. - -You can also choose to store things as their closest primitive type (e.g. arrays and sets as lists, decimals as floats). This may be desirable if you don't care about the exact type, or you are loading the json in another language (which doesn't restore python types). It's also smaller. - -To forego meta data and store primitives instead, pass ``primitives`` to ``dump(s)``. This is available in version ``3.8`` and later. Example: - -.. code-block:: python - - data = [ - arange(0, 10, 1, dtype=int).reshape((2, 5)), - datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), - 1 + 2j, - Decimal(42), - Fraction(1, 3), - MyTestCls(s='ub', dct={'7': 7}), # see later - set(range(7)), - ] - # Encode with metadata to preserve types when decoding - print(dumps(data)) - -.. code-block:: javascript - - // (comments added and indenting changed) - [ - // numpy array - { - "__ndarray__": [ - [0, 1, 2, 3, 4], - [5, 6, 7, 8, 9]], - "dtype": "int64", - "shape": [2, 5], - "Corder": true - }, - // datetime (naive) - { - "__datetime__": null, - "year": 2017, - "month": 1, - "day": 19, - "hour": 23 - }, - // complex number - { - "__complex__": [1.0, 2.0] - }, - // decimal & fraction - { - "__decimal__": "42" - }, - { - "__fraction__": true - "numerator": 1, - "denominator": 3, - }, - // class instance - { - "__instance_type__": [ - "tests.test_class", - "MyTestCls" - ], - "attributes": { - "s": "ub", - "dct": {"7": 7} - } - }, - // set - { - "__set__": [0, 1, 2, 3, 4, 5, 6] - } - ] - -.. code-block:: python - - # Encode as primitive types; more simple but loses type information - print(dumps(data, primitives=True)) - -.. code-block:: javascript - - // (comments added and indentation changed) - [ - // numpy array - [[0, 1, 2, 3, 4], - [5, 6, 7, 8, 9]], - // datetime (naive) - "2017-01-19T23:00:00", - // complex number - [1.0, 2.0], - // decimal & fraction - 42.0, - 0.3333333333333333, - // class instance - { - "s": "ub", - "dct": {"7": 7} - }, - // set - [0, 1, 2, 3, 4, 5, 6] - ] - -Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. - Features --------------------------------------- Numpy arrays +++++++++++++++++++++++++++++++++++++++ -The array is encoded in sort-of-readable and very flexible and portable format, like so: +When not compressed, the array is encoded in sort-of-readable and very flexible and portable format, like so: .. code-block:: python @@ -178,13 +74,34 @@ which will be converted back to a numpy array when using ``json_tricks.loads``. Note that the memory order (``Corder``) is only stored in v3.1 and later and for arrays with at least 2 dimensions. -As you've seen, this uses the magic key ``__ndarray__``. Don't use ``__ndarray__`` as a dictionary key unless you're trying to make a numpy array (and know what you're doing). +As you see, this uses the magic key ``__ndarray__``. Don't use ``__ndarray__`` as a dictionary key unless you're trying to make a numpy array (and know what you're doing). + +Numpy scalars are also serialized (v3.5+). They are represented by the closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preserve the exact numpy type, use encode_scalars_inplace_. + +There is also a compressed format. From the next major release, this will be default when using compression. For now you can use it as: -Numpy scalars are also serialized (v3.5+). They are represented by the closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preverse the exact numpy type, use encode_scalars_inplace_. +.. code-block:: python + + dumps(data, compression=True, properties={'ndarray_compact': True}) -**Performance**: this method has slow write times similar to other human-readable formats, although read time is worse than csv. File size (with compression) is high on a relative scale, but it's only around 30% above binary. See this benchmark_ (it's called JSONGzip). A binary alternative `might be added`_, but is not yet available. +This compressed format encodes the array data in base64, with gzip compression for the array, unless 1) compression has little effect for that array, or 2) the whole file is already compressed. If you only want compact format for large arrays, pass the number of elements to `ndarray_compact`. -This implementation is inspired by an answer by tlausch on stackoverflow_ that you could read for details. +Example: + +.. code-block:: python + + data = [linspace(0, 10, 9), array([pi, exp(1)])] + dumps(data, compression=False, properties={'ndarray_compact': 8}) + + [{ + "__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgQAZf7CE0iwOE5oPSIlBaEkrLQegGRShfxQEAz7QFikgAAAA=", + "dtype": "float64", + "shape": [9] + }, { + "__ndarray__": [3.141592653589793, 2.718281828459045], + "dtype": "float64", + "shape": [2] + }] Class instances +++++++++++++++++++++++++++++++++++++++ @@ -338,6 +255,7 @@ Other features +++++++++++++++++++++++++++++++++++++++ +* Special floats like `NaN`, `Infinity` and `-0` using the `allow_nan=True` argument (non-standard_ json, may not decode in other implementations). * Sets are serializable and can be loaded. By default the set json representation is sorted, to have a consistent representation. * Save and load complex numbers (version 3.2) with ``1+2j`` serializing as ``{'__complex__': [1, 2]}``. * Save and load ``Decimal`` and ``Fraction`` (including NaN, infinity, -0 for Decimal). @@ -346,6 +264,110 @@ * ``json_tricks`` can check for duplicate keys in maps by setting ``allow_duplicates`` to False. These are `kind of allowed`_, but are handled inconsistently between json implementations. In Python, for ``dict`` and ``OrderedDict``, duplicate keys are silently overwritten. * Save and load ``pathlib.Path`` objects (e.g., the current path, `Path('.')`, serializes as ``{"__pathlib__": "."}``) (thanks to ``bburan``). +Preserve type vs use primitive +------------------------------- + +By default, types are encoded such that they can be restored to their original type when loaded with ``json-tricks``. Example encodings in this documentation refer to that format. + +You can also choose to store things as their closest primitive type (e.g. arrays and sets as lists, decimals as floats). This may be desirable if you don't care about the exact type, or you are loading the json in another language (which doesn't restore python types). It's also smaller. + +To forego meta data and store primitives instead, pass ``primitives`` to ``dump(s)``. This is available in version ``3.8`` and later. Example: + +.. code-block:: python + + data = [ + arange(0, 10, 1, dtype=int).reshape((2, 5)), + datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), + 1 + 2j, + Decimal(42), + Fraction(1, 3), + MyTestCls(s='ub', dct={'7': 7}), # see later + set(range(7)), + ] + # Encode with metadata to preserve types when decoding + print(dumps(data)) + +.. code-block:: javascript + + // (comments added and indenting changed) + [ + // numpy array + { + "__ndarray__": [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]], + "dtype": "int64", + "shape": [2, 5], + "Corder": true + }, + // datetime (naive) + { + "__datetime__": null, + "year": 2017, + "month": 1, + "day": 19, + "hour": 23 + }, + // complex number + { + "__complex__": [1.0, 2.0] + }, + // decimal & fraction + { + "__decimal__": "42" + }, + { + "__fraction__": true + "numerator": 1, + "denominator": 3, + }, + // class instance + { + "__instance_type__": [ + "tests.test_class", + "MyTestCls" + ], + "attributes": { + "s": "ub", + "dct": {"7": 7} + } + }, + // set + { + "__set__": [0, 1, 2, 3, 4, 5, 6] + } + ] + +.. code-block:: python + + # Encode as primitive types; more simple but loses type information + print(dumps(data, primitives=True)) + +.. code-block:: javascript + + // (comments added and indentation changed) + [ + // numpy array + [[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]], + // datetime (naive) + "2017-01-19T23:00:00", + // complex number + [1.0, 2.0], + // decimal & fraction + 42.0, + 0.3333333333333333, + // class instance + { + "s": "ub", + "dct": {"7": 7} + }, + // set + [0, 1, 2, 3, 4, 5, 6] + ] + +Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. + Usage & contributions --------------------------------------- @@ -378,5 +400,5 @@ .. _`this guide`: https://github.com/mverleg/pyjson_tricks/blob/master/tests/run_locally.rst .. _`Revised BSD License`: https://github.com/mverleg/pyjson_tricks/blob/master/LICENSE.txt .. _`contribution guide`: https://github.com/mverleg/pyjson_tricks/blob/master/CONTRIBUTING.txt - +.. _non-standard: https://stackoverflow.com/questions/1423081/json-left-out-infinity-and-nan-json-status-in-ecmascript diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/docs/index.rst new/pyjson_tricks-3.15.0/docs/index.rst --- old/pyjson_tricks-3.13.2/docs/index.rst 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/docs/index.rst 2020-04-05 22:48:35.000000000 +0200 @@ -4,11 +4,7 @@ Main components --------------------------------------- -Note that these functions exist as two versions, the full version with numpy (np) and the version without requirements (nonp) that doesn't do nunpy encoding/decoding. - -If you import these functions directly from json_tricks, e.g. ``from json_tricks import dumps``, then it will select np if numpy is available, and nonp otherwise. You can use ``json_tricks.NUMPY_MODE`` to see if numpy mode is being used. - -This dual behaviour can lead to confusion, so it is recommended that you import directly from np or nonp. +Support for numpy, pandas and other libraries should work automatically if those libraries are installed. They are not installed automatically as dependencies because `json-tricks` can be used without them. dumps +++++++++++++++++++++++++++++++++++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/_version.py new/pyjson_tricks-3.15.0/json_tricks/_version.py --- old/pyjson_tricks-3.13.2/json_tricks/_version.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/_version.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,2 +1,3 @@ -VERSION = '3.13.2' +VERSION = '3.15.0' + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/decoders.py new/pyjson_tricks-3.15.0/json_tricks/decoders.py --- old/pyjson_tricks-3.13.2/json_tricks/decoders.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/decoders.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,11 +1,11 @@ - -from datetime import datetime, date, time, timedelta -from fractions import Fraction +import warnings from collections import OrderedDict +from datetime import datetime, date, time, timedelta from decimal import Decimal -from logging import warning +from fractions import Fraction + from json_tricks import NoEnumException, NoPandasException, NoNumpyException -from .utils import ClassInstanceHookBase, nested_index +from .utils import ClassInstanceHookBase, nested_index, str_type, gzip_decompress class DuplicateJsonKeyException(Exception): @@ -198,10 +198,6 @@ return dct if '__pandas_dataframe__' not in dct and '__pandas_series__' not in dct: return dct - # todo: this is experimental - if not getattr(pandas_hook, '_warned', False): - pandas_hook._warned = True - warning('Pandas loading support in json-tricks is experimental and may change in future versions.') if '__pandas_dataframe__' in dct: try: from pandas import DataFrame @@ -254,25 +250,77 @@ if not '__ndarray__' in dct: return dct try: - from numpy import asarray, empty, ndindex - import numpy as nptypes + import numpy except ImportError: raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' 'array, but numpy appears not to be installed.') - order = 'A' + order = None if 'Corder' in dct: order = 'C' if dct['Corder'] else 'F' - if dct['shape']: - if dct['dtype'] == 'object': - dec_data = dct['__ndarray__'] - arr = empty(dct['shape'], dtype=dct['dtype'], order=order) - for indx in ndindex(arr.shape): - arr[indx] = nested_index(dec_data, indx) - return arr - return asarray(dct['__ndarray__'], dtype=dct['dtype'], order=order) + data_json = dct['__ndarray__'] + shape = tuple(dct['shape']) + nptype = dct['dtype'] + if shape: + if nptype == 'object': + return _lists_of_obj_to_ndarray(data_json, order, shape, nptype) + if isinstance(data_json, str_type): + return _bin_str_to_ndarray(data_json, order, shape, nptype) + else: + return _lists_of_numbers_to_ndarray(data_json, order, shape, nptype) + else: + return _scalar_to_numpy(data_json, nptype) + + +def _bin_str_to_ndarray(data, order, shape, dtype): + """ + From base64 encoded, gzipped binary data to ndarray. + """ + from base64 import standard_b64decode + from numpy import frombuffer + + assert order in [None, 'C'], 'specifying different memory order is not (yet) supported ' \ + 'for binary numpy format (got order = {})'.format(order) + if data.startswith('b64.gz:'): + data = standard_b64decode(data[7:]) + data = gzip_decompress(data) + elif data.startswith('b64:'): + data = standard_b64decode(data[4:]) else: - dtype = getattr(nptypes, dct['dtype']) - return dtype(dct['__ndarray__']) + raise ValueError('found numpy array buffer, but did not understand header; supported: b64 or b64.gz') + data = frombuffer(data, dtype=dtype) + return data.reshape(shape) + + +def _lists_of_numbers_to_ndarray(data, order, shape, dtype): + """ + From nested list of numbers to ndarray. + """ + from numpy import asarray + arr = asarray(data, dtype=dtype, order=order) + if shape != arr.shape: + warnings.warn('size mismatch decoding numpy array: expected {}, got {}'.format(shape, arr.shape)) + return arr + + +def _lists_of_obj_to_ndarray(data, order, shape, dtype): + """ + From nested list of objects (that aren't native numpy numbers) to ndarray. + """ + from numpy import empty, ndindex + arr = empty(shape, dtype=dtype, order=order) + dec_data = data + for indx in ndindex(arr.shape): + arr[indx] = nested_index(dec_data, indx) + return arr + + +def _scalar_to_numpy(data, dtype): + """ + From scalar value to numpy type. + """ + import numpy as nptypes + dtype = getattr(nptypes, dtype) + return dtype(data) def json_nonumpy_obj_hook(dct): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/encoders.py new/pyjson_tricks-3.15.0/json_tricks/encoders.py --- old/pyjson_tricks-3.13.2/json_tricks/encoders.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/encoders.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,14 +1,14 @@ - +import warnings from datetime import datetime, date, time, timedelta from fractions import Fraction from functools import wraps -from logging import warning from json import JSONEncoder from sys import version, stderr from decimal import Decimal + from .utils import hashodict, get_arg_names, \ get_module_name_from_object, NoEnumException, NoPandasException, \ - NoNumpyException, str_type + NoNumpyException, str_type, JsonTricksDeprecation, gzip_compress def _fallback_wrapper(encoder): @@ -44,7 +44,7 @@ elif not hasattr(encoder, '__call__'): raise TypeError('`obj_encoder` {0:} does not have `default` method and is not callable'.format(enc)) names = get_arg_names(encoder) - + def wrapper(*args, **kwargs): return encoder(*args, **{k: v for k, v in kwargs.items() if k in names}) return wrapper @@ -58,7 +58,7 @@ Each encoder should make any appropriate changes and return an object, changed or not. This will be passes to the other encoders. """ - def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, fallback_encoders=(), **json_kwargs): + def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, fallback_encoders=(), properties=None, **json_kwargs): """ :param obj_encoders: An iterable of functions or encoder instances to try. :param silence_typeerror: DEPRECATED - If set to True, ignore the TypeErrors that Encoder instances throw (default False). @@ -72,6 +72,7 @@ self.obj_encoders.extend(_fallback_wrapper(encoder) for encoder in list(fallback_encoders)) self.obj_encoders = [filtered_wrapper(enc) for enc in self.obj_encoders] self.silence_typeerror = silence_typeerror + self.properties = properties self.primitives = primitives super(TricksEncoder, self).__init__(**json_kwargs) @@ -88,7 +89,7 @@ """ prev_id = id(obj) for encoder in self.obj_encoders: - obj = encoder(obj, primitives=self.primitives, is_changed=id(obj) != prev_id) + obj = encoder(obj, primitives=self.primitives, is_changed=id(obj) != prev_id, properties=self.properties) if id(obj) == prev_id: raise TypeError(('Object of type {0:} could not be encoded by {1:} using encoders [{2:s}]. ' 'You can add an encoders for this type using `extra_obj_encoders`. If you want to \'skip\' this ' @@ -302,11 +303,6 @@ def pandas_encode(obj, primitives=False): from pandas import DataFrame, Series - if isinstance(obj, (DataFrame, Series)): - #todo: this is experimental - if not getattr(pandas_encode, '_warned', False): - pandas_encode._warned = True - warning('Pandas dumping support in json-tricks is experimental and may change in future versions.') if isinstance(obj, DataFrame): repr = hashodict() if not primitives: @@ -316,7 +312,7 @@ )) repr['index'] = tuple(obj.index.values) for k, name in enumerate(obj.columns.values): - repr[name] = tuple(obj.ix[:, k].values) + repr[name] = tuple(obj.iloc[:, k].values) return repr if isinstance(obj, Series): repr = hashodict() @@ -339,7 +335,7 @@ return obj -def numpy_encode(obj, primitives=False): +def numpy_encode(obj, primitives=False, properties=None): """ Encodes numpy `ndarray`s as lists with meta data. @@ -354,8 +350,27 @@ if primitives: return obj.tolist() else: + properties = properties or {} + use_compact = properties.get('ndarray_compact', None) + json_compression = bool(properties.get('compression', False)) + if use_compact is None and json_compression and not getattr(numpy_encode, '_warned_compact', False): + numpy_encode._warned_compact = True + warnings.warn('storing ndarray in text format while compression in enabled; in the next major version ' + 'of json_tricks, the default when using compression will change to compact mode; to already use ' + 'that smaller format, pass `properties={"ndarray_compact": True}` to json_tricks.dump; ' + 'to silence this warning, pass `properties={"ndarray_compact": False}`; ' + 'see issue https://github.com/mverleg/pyjson_tricks/issues/73', JsonTricksDeprecation) + # Property 'use_compact' may also be an integer, in which case it's the number of + # elements from which compact storage is used. + if isinstance(use_compact, int) and not isinstance(use_compact, bool): + use_compact = obj.size >= use_compact + if use_compact: + # If the overall json file is compressed, then don't compress the array. + data_json = _ndarray_to_bin_str(obj, do_compress=not json_compression) + else: + data_json = obj.tolist() dct = hashodict(( - ('__ndarray__', obj.tolist()), + ('__ndarray__', data_json), ('dtype', str(obj.dtype)), ('shape', obj.shape), )) @@ -365,11 +380,31 @@ elif isinstance(obj, generic): if NumpyEncoder.SHOW_SCALAR_WARNING: NumpyEncoder.SHOW_SCALAR_WARNING = False - warning('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') + warnings.warn('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') return obj.item() return obj +def _ndarray_to_bin_str(array, do_compress): + """ + From ndarray to base64 encoded, gzipped binary data. + """ + import gzip + from base64 import standard_b64encode + assert array.flags['C_CONTIGUOUS'], 'only C memory order is (currently) supported for compact ndarray format' + + original_size = array.size * array.itemsize + header = 'b64:' + data = array.data + if do_compress: + small = gzip_compress(data, compresslevel=9) + if len(small) < 0.9 * original_size and len(small) < original_size - 8: + header = 'b64.gz:' + data = small + data = standard_b64encode(data) + return header + data.decode('ascii') + + class NumpyEncoder(ClassInstanceEncoder): """ JSON encoder for numpy arrays. @@ -381,7 +416,7 @@ If input object is a ndarray it will be converted into a dict holding data type, shape and the data. The object can be restored using json_numpy_obj_hook. """ - warning('`NumpyEncoder` is deprecated, use `numpy_encode`') #todo + warnings.warn('`NumpyEncoder` is deprecated, use `numpy_encode`', JsonTricksDeprecation) obj = numpy_encode(obj) return super(NumpyEncoder, self).default(obj, *args, **kwargs) @@ -401,6 +436,6 @@ See `nonumpy_encode`. """ def default(self, obj, *args, **kwargs): - warning('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`') #todo + warnings.warn('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`', JsonTricksDeprecation) obj = nonumpy_encode(obj) return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/nonp.py new/pyjson_tricks-3.15.0/json_tricks/nonp.py --- old/pyjson_tricks-3.13.2/json_tricks/nonp.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/nonp.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,13 +1,12 @@ -from gzip import GzipFile -from io import BytesIO from json import loads as json_loads from os import fsync from sys import exc_info -from json_tricks.utils import is_py3 +from json_tricks.utils import is_py3, dict_default, gzip_compress, gzip_decompress from .utils import str_type, NoNumpyException # keep 'unused' imports from .comment import strip_comments # keep 'unused' imports +#TODO @mark: imports removed? from .encoders import TricksEncoder, json_date_time_encode, \ class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ nonumpy_encode, nopandas_encode, pandas_encode, noenum_instance_encode, \ @@ -31,6 +30,7 @@ numeric_types_hook, _cih_instance, ] +#TODO @mark: add properties to all built-in encoders (for speed - but it should keep working without) try: import enum except ImportError: @@ -60,11 +60,11 @@ DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS try: - import pathlib + import pathlib except: - # No need to include a "nopathlib_encode" hook since we would not encounter - # the Path object if pathlib isn't available. However, we *could* encounter - # a serialized Path object (produced by a version of Python with pathlib). + # No need to include a "nopathlib_encode" hook since we would not encounter + # the Path object if pathlib isn't available. However, we *could* encounter + # a serialized Path object (produced by a version of Python with pathlib). DEFAULT_HOOKS = [nopathlib_hook,] + DEFAULT_HOOKS else: DEFAULT_ENCODERS = [pathlib_encode,] + DEFAULT_ENCODERS @@ -76,7 +76,8 @@ def dumps(obj, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), - primitives=False, compression=None, allow_nan=False, conv_str_byte=False, fallback_encoders=(), **jsonkwargs): + primitives=False, compression=None, allow_nan=False, conv_str_byte=False, fallback_encoders=(), + properties=None, **jsonkwargs): """ Convert a nested data structure to a json string. @@ -88,6 +89,7 @@ :param fallback_encoders: These are extra `obj_encoders` that 1) are ran after all others and 2) only run if the object hasn't yet been changed. :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False). :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). + :param properties: A dictionary of properties that is passed to each encoder that will accept it. :return: The string containing the json-encoded version of obj. Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. @@ -95,8 +97,13 @@ if not hasattr(extra_obj_encoders, '__iter__'): raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) + properties = properties or {} + dict_default(properties, 'primitives', primitives) + dict_default(properties, 'compression', compression) + dict_default(properties, 'allow_nan', allow_nan) txt = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan, - primitives=primitives, fallback_encoders=fallback_encoders, **jsonkwargs).encode(obj) + primitives=primitives, fallback_encoders=fallback_encoders, + properties=properties, **jsonkwargs).encode(obj) if not is_py3 and isinstance(txt, str): txt = unicode(txt, ENCODING) if not compression: @@ -104,16 +111,13 @@ if compression is True: compression = 5 txt = txt.encode(ENCODING) - sh = BytesIO() - with GzipFile(mode='wb', fileobj=sh, compresslevel=compression) as zh: - zh.write(txt) - gzstring = sh.getvalue() + gzstring = gzip_compress(txt, compresslevel=compression) return gzstring def dump(obj, fp, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, - fallback_encoders=(), **jsonkwargs): + fallback_encoders=(), properties=None, **jsonkwargs): """ Convert a nested data structure to a json string. @@ -123,11 +127,16 @@ The other arguments are identical to `dumps`. """ + if (isinstance(obj, str_type) or hasattr(obj, 'write')) and isinstance(fp, (list, dict)): + raise ValueError('json-tricks dump arguments are in the wrong order: provide the data to be serialized before file handle') txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders, primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, - fallback_encoders=fallback_encoders, **jsonkwargs) + fallback_encoders=fallback_encoders, properties=properties, **jsonkwargs) if isinstance(fp, str_type): - fh = open(fp, 'wb+') + if compression: + fh = open(fp, 'wb+') + else: + fh = open(fp, 'w+') else: fh = fp if conv_str_byte: @@ -146,8 +155,7 @@ if isinstance(txt, str_type): txt = txt.encode(ENCODING) try: - if 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, - str_type) and compression: + if compression and 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type): raise IOError('If compression is enabled, the file must be opened in binary mode.') try: fh.write(txt) @@ -193,16 +201,17 @@ if decompression is None: decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' if decompression: - with GzipFile(fileobj=BytesIO(string), mode='rb') as zh: - string = zh.read() - string = string.decode(ENCODING) + string = gzip_decompress(string).decode(ENCODING) if not isinstance(string, str_type): if conv_str_byte: string = string.decode(ENCODING) else: - raise TypeError(('Cannot automatically encode object of type "{0:}" in `json_tricks.load(s)` since ' - 'the encoding is not known. You should instead encode the bytes to a string and pass that ' - 'string to `load(s)`, for example bytevar.encode("utf-8") if utf-8 is the encoding.').format(type(string))) + raise TypeError(('The input was of non-string type "{0:}" in `json_tricks.load(s)`. ' + 'Bytes cannot be automatically decoding since the encoding is not known. Recommended ' + 'way is to instead encode the bytes to a string and pass that string to `load(s)`, ' + 'for example bytevar.encode("utf-8") if utf-8 is the encoding. Alternatively you can ' + 'force an attempt by passing conv_str_byte=True, but this may cause decoding issues.') + .format(type(string))) if ignore_comments: string = strip_comments(string) obj_pairs_hooks = tuple(obj_pairs_hooks) @@ -224,7 +233,14 @@ """ try: if isinstance(fp, str_type): - with open(fp, 'rb') as fh: + if decompression is not None: + open_binary = bool(decompression) + else: + with open(fp, 'rb') as fh: + # This attempts to detect gzip mode; gzip should always + # have this header, and text json can't have it. + open_binary = (fh.read(2) == b'\x1f\x8b') + with open(fp, 'rb' if open_binary else 'r') as fh: string = fh.read() else: string = fp.read() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/np.py new/pyjson_tricks-3.15.0/json_tricks/np.py --- old/pyjson_tricks-3.13.2/json_tricks/np.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/np.py 2020-04-05 22:48:35.000000000 +0200 @@ -3,9 +3,9 @@ This file exists for backward compatibility reasons. """ -from logging import warning +import warnings from .nonp import NoNumpyException, DEFAULT_ENCODERS, DEFAULT_HOOKS, dumps, dump, loads, load # keep 'unused' imports -from .utils import hashodict, NoPandasException +from .utils import hashodict, NoPandasException, JsonTricksDeprecation from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \ numpy_encode, NumpyEncoder # keep 'unused' imports @@ -19,7 +19,7 @@ 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') -warning('`json_tricks.np` is deprecated, you can import directly from `json_tricks`') +warnings.warn('`json_tricks.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/json_tricks/utils.py new/pyjson_tricks-3.15.0/json_tricks/utils.py --- old/pyjson_tricks-3.13.2/json_tricks/utils.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/json_tricks/utils.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,11 +1,18 @@ - +import gzip +import io +import warnings from collections import OrderedDict from functools import partial from importlib import import_module -from logging import warning, warn from sys import version_info, version +class JsonTricksDeprecation(UserWarning): + """ Special deprecation warning because the built-in one is ignored by default """ + def __init__(self, msg): + super(JsonTricksDeprecation, self).__init__(msg) + + class hashodict(OrderedDict): """ This dictionary is hashable. It should NOT be mutated, or all kinds of weird @@ -26,7 +33,7 @@ if type(callable) == partial and version_info[0] == 2: if not hasattr(get_arg_names, '__warned_partial_argspec'): get_arg_names.__warned_partial_argspec = True - warn("'functools.partial' and 'inspect.getargspec' are not compatible in this Python version; " + warnings.warn("'functools.partial' and 'inspect.getargspec' are not compatible in this Python version; " "ignoring the 'partial' wrapper when inspecting arguments of {}, which can lead to problems".format(callable)) return set(getargspec(callable.func).args) argspec = getargspec(callable) @@ -150,7 +157,7 @@ mod = obj.__class__.__module__ if mod == '__main__': mod = None - warning(('class {0:} seems to have been defined in the main file; unfortunately this means' + warnings.warn(('class {0:} seems to have been defined in the main file; unfortunately this means' ' that it\'s module/import path is unknown, so you might have to provide cls_lookup_map when ' 'decoding').format(obj.__class__)) return mod @@ -162,6 +169,29 @@ return collection +def dict_default(dictionary, key, default_value): + if key not in dictionary: + dictionary[key] = default_value + + +def gzip_compress(data, compresslevel): + """ + Do gzip compression, without the timestamp. Similar to gzip.compress, but without timestamp, and also before py3.2. + """ + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel, mtime=0) as fh: + fh.write(data) + return buf.getvalue() + + +def gzip_decompress(data): + """ + Do gzip decompression, without the timestamp. Just like gzip.decompress, but that's py3.2+. + """ + with gzip.GzipFile(fileobj=io.BytesIO(data)) as f: + return f.read() + + is_py3 = (version[:2] == '3.') str_type = str if is_py3 else (basestring, unicode,) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/setup.cfg new/pyjson_tricks-3.15.0/setup.cfg --- old/pyjson_tricks-3.13.2/setup.cfg 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/setup.cfg 2020-04-05 22:48:35.000000000 +0200 @@ -2,3 +2,4 @@ universal = 1 [metadata] description-file = README.rst +license_file = LICENSE.txt diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/setup.py new/pyjson_tricks-3.15.0/setup.py --- old/pyjson_tricks-3.13.2/setup.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/setup.py 2020-04-05 22:48:35.000000000 +0200 @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- from sys import version_info -from logging import warning +import warnings from setuptools import setup - with open('README.rst', 'r') as fh: readme = fh.read() @@ -20,7 +19,7 @@ if (version_info[0] == 2 and version_info[1] < 7) or \ (version_info[0] == 3 and version_info[1] < 4) or \ version_info[0] not in (2, 3): - raise warning('`json_tricks` does not support Python version {}.{}' + raise warnings.warn('`json_tricks` does not support Python version {}.{}' .format(version_info[0], version_info[1])) setup( @@ -58,6 +57,7 @@ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/tests/test_bare.py new/pyjson_tricks-3.15.0/tests/test_bare.py --- old/pyjson_tricks-3.13.2/tests/test_bare.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/tests/test_bare.py 2020-04-05 22:48:35.000000000 +0200 @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from collections import OrderedDict -from datetime import datetime, time, date, timedelta +from datetime import datetime, time, date, timedelta from decimal import Decimal from fractions import Fraction from functools import partial @@ -11,15 +11,15 @@ from math import pi, exp from os.path import join from tempfile import mkdtemp + from pytest import raises, fail from json_tricks import fallback_ignore_unknown, DuplicateJsonKeyException from json_tricks.nonp import strip_comments, dump, dumps, load, loads, \ ENCODING -from json_tricks.utils import is_py3 -from .test_class import MyTestCls, CustomEncodeCls, SubClass, SuperClass, SlotsBase, SlotsDictABC, SlotsStr, SlotsABCDict, SlotsABC - - +from json_tricks.utils import is_py3, gzip_compress +from .test_class import MyTestCls, CustomEncodeCls, SubClass, SuperClass, SlotsBase, SlotsDictABC, SlotsStr, \ + SlotsABCDict, SlotsABC nonpdata = { 'my_array': list(range(20)), @@ -48,6 +48,54 @@ assert data3 == nonpdata +def test_mix_handle_str_path(): + # Based on issue 68 + data = {"fun": 1.1234567891234567e-13} + path = join(mkdtemp(), 'test_mix_handle_str_path.json') + dump(data, open(path, "w")) + back = load(path) + assert data == back + + + +def test_wrong_arg_order(): + # Based on a problem from https://github.com/mverleg/array_storage_benchmark + li = [[1.0, 2.0], [3.0, 4.0]] + map = {"a": 1} + path = join(mkdtemp(), 'pytest-np.json.gz') + msg = 'json-tricks dump arguments are in the wrong order: provide the data to be serialized before file handle' + with raises(ValueError) as ex: + with open(path, 'wb+') as fh: + dump(fh, li) + assert msg in ex.value.args[0] + with raises(ValueError) as ex: + dump(path, li) + assert msg in ex.value.args[0] + with raises(ValueError) as ex: + with open(path, 'wb+') as fh: + dump(fh, map) + assert msg in ex.value.args[0] + with raises(ValueError) as ex: + dump(path, map) + assert msg in ex.value.args[0] + + +def test_mix_handle_bin_path(): + # Based on issue 68 + data = {"fun": 1.1234567891234567e-13} + path = join(mkdtemp(), 'test_mix_handle_bin_path.json') + if is_py3: + with raises(TypeError): + dump(data, open(path, "wb")) + + +def test_mix_path_handle(): + # Based on issue 68 + data = {"fun": 1.1234567891234567e-13} + path = join(mkdtemp(), 'test_mix_path_handle.json') + dump(data, path) + + def test_file_handle_types(): path = join(mkdtemp(), 'pytest-text.json') for conv_str_byte in [True, False]: @@ -145,14 +193,11 @@ def test_compression_with_comments(): - sh = BytesIO() if is_py3: test_json = bytes(test_json_with_comments, encoding=ENCODING) else: test_json = test_json_with_comments - with GzipFile(mode='wb', fileobj=sh, compresslevel=9) as zh: - zh.write(test_json) - json = sh.getvalue() + json = gzip_compress(test_json, compresslevel=9) ref = loads(test_json_without_comments) data2 = loads(json, decompression=True) assert ref == data2 @@ -313,6 +358,7 @@ """ special_floats = [float('NaN'), float('Infinity'), -float('Infinity'), float('+0'), float('-0')] txt = dumps(special_floats, allow_nan=True) + assert txt == "[NaN, Infinity, -Infinity, 0.0, -0.0]" res = loads(txt) for x, y in zip(special_floats, res): """ Use strings since `+0 == -1` and `NaN != NaN` """ @@ -395,8 +441,8 @@ def test_str_unicode_bytes(): - text, pyrepr = u'{"mykey": "你好"}', {"mykey": u"你好"} - assert loads(text) == pyrepr + text, obj = u'{"mykey": "你好"}', {"mykey": u"你好"} + assert loads(text) == obj if is_py3: with raises(TypeError) as err: loads(text.encode('utf-8')) @@ -404,8 +450,8 @@ # This check is needed because the type of err varies between versions # For some reason, isinstance(..., py.code.ExceptionInfo) does not work err = err.value - assert 'Cannot automatically encode' in str(err) - assert loads(text.encode('utf-8'), conv_str_byte=True) == pyrepr + assert 'The input was of non-string type' in str(err) + assert loads(text.encode('utf-8'), conv_str_byte=True) == obj else: assert loads('{"mykey": "nihao"}') == {'mykey': 'nihao'} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/tests/test_np.py new/pyjson_tricks-3.15.0/tests/test_np.py --- old/pyjson_tricks-3.13.2/tests/test_np.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/tests/test_np.py 2020-04-05 22:48:35.000000000 +0200 @@ -2,17 +2,23 @@ # -*- coding: utf-8 -*- from copy import deepcopy -from tempfile import mkdtemp -from numpy import arange, ones, array, array_equal, finfo, iinfo from os.path import join -from numpy.core.umath import exp -from json_tricks.np_utils import encode_scalars_inplace -from json_tricks.np import dump, dumps, load, loads -from .test_class import MyTestCls -from .test_bare import cls_instance +from tempfile import mkdtemp + +from _pytest.recwarn import warns +from numpy import arange, ones, array, array_equal, finfo, iinfo, pi from numpy import int8, int16, int32, int64, uint8, uint16, uint32, uint64, \ float16, float32, float64, complex64, complex128, zeros, ndindex +from numpy.core.umath import exp +from numpy.testing import assert_equal +from pytest import raises +from json_tricks import numpy_encode +from json_tricks.np import dump, dumps, load, loads +from json_tricks.np_utils import encode_scalars_inplace +from json_tricks.utils import JsonTricksDeprecation, gzip_decompress +from .test_bare import cls_instance +from .test_class import MyTestCls DTYPES = (int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, complex64, complex128) @@ -34,8 +40,8 @@ def _numpy_equality(d2): assert npdata.keys() == d2.keys() - assert (npdata['vector'] == d2['vector']).all() - assert (npdata['matrix'] == d2['matrix']).all() + assert_equal(npdata['vector'], d2['vector']) + assert_equal(npdata['matrix'], d2['matrix']) assert npdata['vector'].dtype == d2['vector'].dtype assert npdata['matrix'].dtype == d2['matrix'].dtype @@ -67,6 +73,13 @@ _numpy_equality(data2) +def test_compressed_to_disk(): + arr = [array([[1.0, 2.0], [3.0, 4.0]])] + path = join(mkdtemp(), 'pytest-np.json.gz') + with open(path, 'wb+') as fh: + dump(arr, fh, compression=True, properties={'ndarray_compact': True}) + + mixed_data = { 'vec': array(range(10)), 'inst': MyTestCls( @@ -186,3 +199,96 @@ 'shape of array changed for nested ndarrays:\n{}'.format(dumps(before, indent=2)) assert before.dtype == before.dtype assert array_equal(before[0, 0], after[0, 0]) + + +def test_dtype_object(): + # Based on issue 64 + arr = array(['a', 'b', 'c'], dtype=object) + json = dumps(arr) + back = loads(json) + assert array_equal(back, arr) + + +def test_compact_mode_unspecified(): + # Other tests may have raised deprecation warning, so reset the cache here + numpy_encode._warned_compact = False + data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] + with warns(JsonTricksDeprecation): + gz_json_1 = dumps(data, compression=True) + # noinspection PyTypeChecker + with warns(None) as captured: + gz_json_2 = dumps(data, compression=True) + assert len(captured) == 0 + assert gz_json_1 == gz_json_2 + json = gzip_decompress(gz_json_1).decode('ascii') + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + + +def test_compact(): + data = [array(list(2**(x + 0.5) for x in range(-30, +31)))] + json = dumps(data, compression=True, properties={'ndarray_compact': True}) + back = loads(json) + assert_equal(data, back) + + +def test_encode_disable_compact(): + data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] + gz_json = dumps(data, compression=True, properties={'ndarray_compact': False}) + json = gzip_decompress(gz_json).decode('ascii') + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + + +def test_encode_enable_compact(): + data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] + gz_json = dumps(data, compression=True, properties={'ndarray_compact': True}) + json = gzip_decompress(gz_json).decode('ascii') + assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'true}, {"__ndarray__": "b64:GC1EVPshCUBpVxSLCr8FQA==", "dtype": "float64", "shape": [2]}]' + + +def test_encode_compact_cutoff(): + data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] + gz_json = dumps(data, compression=True, properties={'ndarray_compact': 5}) + json = gzip_decompress(gz_json).decode('ascii') + assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'true}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + + +def test_encode_compact_inline_compression(): + data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])] + json = dumps(data, compression=False, properties={'ndarray_compact': True}) + assert 'b64.gz:' in json, 'If the overall file is not compressed and there are significant savings, then do inline gzip compression.' + assert json == '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", "dtype": "float64", "shape": [4, 4], "Corder": true}]' + + +def test_encode_compact_no_inline_compression(): + data = [array([[1.0, 2.0], [3.0, 4.0]])] + json = dumps(data, compression=False, properties={'ndarray_compact': True}) + assert 'b64.gz:' not in json, 'If the overall file is not compressed, but there are no significant savings, then do not do inline compression.' + assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ + '"dtype": "float64", "shape": [2, 2], "Corder": true}]' + + +def test_decode_compact_mixed_compactness(): + json = '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'true}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + data = loads(json) + assert_equal(data[0], array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])) + + +def test_decode_compact_inline_compression(): + json = '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", "dtype": "float64", "shape": [4, 4], "Corder": true}]' + data = loads(json) + assert_equal(data[0], array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])) + + +def test_decode_compact_no_inline_compression(): + json = '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ + '"dtype": "float64", "shape": [2, 2], "Corder": true}]' + data = loads(json) + assert_equal(data[0], array([[1.0, 2.0], [3.0, 4.0]])) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/tests/test_pandas.py new/pyjson_tricks-3.15.0/tests/test_pandas.py --- old/pyjson_tricks-3.13.2/tests/test_pandas.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/tests/test_pandas.py 2020-04-05 22:48:35.000000000 +0200 @@ -24,7 +24,7 @@ df = DataFrame(COLUMNS, columns=tuple(COLUMNS.keys())) txt = dumps(df, allow_nan=True) back = loads(txt) - assert isnan(back.ix[0, -1]) + assert isnan(back.iloc[0, -1]) assert (df.equals(back)) assert (df.dtypes == back.dtypes).all() df = DataFrame(COLUMNS, columns=tuple(COLUMNS.keys())) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/tests/test_utils.py new/pyjson_tricks-3.15.0/tests/test_utils.py --- old/pyjson_tricks-3.13.2/tests/test_utils.py 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/tests/test_utils.py 2020-04-05 22:48:35.000000000 +0200 @@ -31,3 +31,32 @@ pass else: raise AssertionError('indexing more than nesting level should yield IndexError') + + +def base85_vsbase64_performance(): + from base64 import b85encode, standard_b64encode, urlsafe_b64encode + from random import getrandbits + test_data = bytearray(getrandbits(8) for _ in range(10_000_000)) + from timeit import default_timer + print('') + + start = default_timer() + for _ in range(20): + standard_b64encode(test_data) + end = default_timer() + print('standard_b64encode took {} s'.format(end - start)) + + start = default_timer() + for _ in range(20): + urlsafe_b64encode(test_data) + end = default_timer() + print('urlsafe_b64encode took {} s'.format(end - start)) + + start = default_timer() + for _ in range(20): + b85encode(test_data) + end = default_timer() + print('b85encode took {} s'.format(end - start)) + + # Result on local PC in 2020: base84 is 53x slower to encode + # (urlsafe also costs a bit of performance, about 2x) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pyjson_tricks-3.13.2/tox.ini new/pyjson_tricks-3.15.0/tox.ini --- old/pyjson_tricks-3.13.2/tox.ini 2019-08-10 18:48:04.000000000 +0200 +++ new/pyjson_tricks-3.15.0/tox.ini 2020-04-05 22:48:35.000000000 +0200 @@ -4,8 +4,8 @@ [tox] envlist = ; py26 not tested; also update setup.py - {py27,py34,py35,py36,py37}-{bare,pnp,tz,enum,path} - {py27,py35,py36,py37}-panda + {py27,py34,py35,py36,py37,py38}-{bare,pnp,tz,enum,path} + {py27,py35,py36,py37,py38}-panda pypy-{bare,tz,enum} [testenv]