Something like this in the docstring?: "In order to support the historical JSON specification and closed ecosystem JSON, it is possible to specify an encoding other than UTF-8."
8.1. Character Encoding > JSON text exchanged between systems that are not part of a closed > ecosystem MUST be encoded using UTF-8 [RFC3629]. > Previous specifications of JSON have not required the use of UTF-8 > when transmitting JSON text. However, the vast majority of JSON- > based software implementations have chosen to use the UTF-8 encoding, > to the extent that it is the only encoding that achieves > interoperability. > Implementations MUST NOT add a byte order mark (U+FEFF) to the > beginning of a networked-transmitted JSON text. In the interests of > interoperability, implementations that parse JSON texts MAY ignore > the presence of a byte order mark rather than treating it as an > error. ```python import json import os def dumpf(obj, path, *, encoding="UTF-8", **kwargs): with open(os.fspath(path), "w", encoding=encoding) as f: return json.dump(obj, f, **kwargs) def loadf(path, *, encoding="UTF-8", **kwargs): with open(os.fspath(path), "r", encoding=encoding) as f: return json.load(f, **kwargs) import pathlib import unittest class TestJsonLoadfAndDumpf(unittest.TestCase): def setUp(self): self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"] data = dict( obj=dict(a=dict(b=[1, 2, 3])), path=pathlib.Path(".") / "test_loadf_and_dumpf.json", ) if os.path.isfile(data["path"]): os.unlink(data["path"]) self.data = data def test_dumpf_and_loadf(self): data = self.data for encoding in self.encodings: path = f'{data["path"]}.{encoding}.json' dumpf_output = dumpf(data["obj"], path, encoding=encoding) loadf_output = loadf(path, encoding=encoding) assert loadf_output == data["obj"] # $ pip install pytest-cov # $ pytest -v example.py # https://docs.pytest.org/en/stable/parametrize.html # https://docs.pytest.org/en/stable/tmpdir.html import pytest @pytest.mark.parametrize("encoding", [None, "UTF-8", "UTF-16", "UTF-32"]) @pytest.mark.parametrize("obj", [dict(a=dict(b=[1, 2, 3]))]) def test_dumpf_and_loadf(obj, encoding, tmpdir): pth = pathlib.Path(tmpdir) / f"test_loadf_and_dumpf.{encoding}.json" dumpf_output = dumpf(obj, pth, encoding=encoding) loadf_output = loadf(pth, encoding=encoding) assert loadf_output == obj ``` For whoever creates a PR for this: - [ ] add parameter and return type annotations - [ ] copy docstrings from json.load/json.dump and open#encoding - [ ] correctly support the c module implementation (this just does `import json`)? - [ ] keep or drop the encoding tests? On Thu, Sep 17, 2020 at 1:25 AM Christopher Barker <python...@gmail.com> wrote: > Is that suggested code? I don't follow. > > But if it is, no. personally, I think ANY use of system settings is a bad > idea [*]. But certainly no need to even think about it for JSON. > > -CHB > > * have we not learned that in the age of the internet the machine the code > happens to be running on has nothing to do with the user of the > applications' needs? Timezones, encodings, number formats, NOTHING. > > > On Wed, Sep 16, 2020 at 8:45 PM Wes Turner <wes.tur...@gmail.com> wrote: > >> Is all of this locale/encoding testing necessary (or even sufficient)? >> >> >> ```python >> import json >> import locale >> import os >> >> >> def get_default_encoding(): >> """ >> TODO XXX: ??? >> """ >> default_encoding = locale.getdefaultlocale()[1] >> if default_encoding.startswith("UTF-"): >> return default_encoding >> else: >> return "UTF-8" >> >> >> def dumpf(obj, path, *args, **kwargs): >> with open( >> os.fspath(path), >> "w", >> encoding=kwargs.pop("encoding", get_default_encoding()), >> ) as file_: >> return json.dump(obj, file_, *args, **kwargs) >> >> >> def loadf(path, *args, **kwargs): >> with open( >> os.fspath(path), >> "r", >> encoding=kwargs.pop("encoding", get_default_encoding()), >> ) as file_: >> return json.load(file_, *args, **kwargs) >> >> >> import pathlib >> import unittest >> >> >> class TestJsonLoadfAndDumpf(unittest.TestCase): >> def setUp(self): >> self.locales = ["", "C", "en_US.UTF-8", "japanese"] >> self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"] >> >> data = dict( >> obj=dict(a=dict(b=[1, 2, 3])), >> encoding=None, >> path=pathlib.Path(".") / "test_loadf_and_dumpf.json", >> ) >> if os.path.isfile(data["path"]): >> os.unlink(data["path"]) >> self.data = data >> >> self.previous_locale = locale.getlocale() >> >> def tearDown(self): >> locale.setlocale(locale.LC_ALL, self.previous_locale) >> >> def test_get_default_encoding(self): >> for localestr in self.locales: >> locale.setlocale(locale.LC_ALL, localestr) >> output = get_default_encoding() >> assert output.startswith("UTF-") >> >> def test_dumpf_and_loadf(self): >> data = self.data >> for localestr in self.locales: >> locale.setlocale(locale.LC_ALL, localestr) >> for encoding in self.encodings: >> dumpf_output = dumpf( >> data["obj"], data["path"], encoding=encoding >> ) >> loadf_output = loadf(data["path"], encoding=encoding) >> assert loadf_output == data["obj"] >> ``` >> >> On Wed, Sep 16, 2020 at 8:30 PM Christopher Barker <python...@gmail.com> >> wrote: >> >>> On Wed, Sep 16, 2020 at 2:53 PM Wes Turner <wes.tur...@gmail.com> wrote: >>> >>>> So I was not correct: dump does not default to UTF-8 (and does not >>>> accept an encoding= parameter) >>>> >>>> >>>>> I think dumpf() should use UTF-8, and that's it. If anyone really >>>>> wants something else, they can get it by providing an open text file >>>>> object. >>>>> >>>> >>>> Why would we impose UTF-8 when the spec says UTF-8, UTF-16, or UTF-32? >>>> >>> >>> The idea was that the encoding was one of the motivators to doing this >>> in the first place. But I suppose as long as utf-8 is the default, and only >>> the three "official" ones are allowed, then yeah, we could add an encoding >>> keyword argument. >>> >>> -CHB >>> >>> >>> -- >>> Christopher Barker, PhD >>> >>> Python Language Consulting >>> - Teaching >>> - Scientific Software Development >>> - Desktop GUI and Web Development >>> - wxPython, numpy, scipy, Cython >>> >> > > -- > Christopher Barker, PhD > > Python Language Consulting > - Teaching > - Scientific Software Development > - Desktop GUI and Web Development > - wxPython, numpy, scipy, Cython >
_______________________________________________ Python-ideas mailing list -- python-ideas@python.org To unsubscribe send an email to python-ideas-le...@python.org https://mail.python.org/mailman3/lists/python-ideas.python.org/ Message archived at https://mail.python.org/archives/list/python-ideas@python.org/message/I7FFASD4TSM7JXY3RXD3GOB4WCYOZE4N/ Code of Conduct: http://python.org/psf/codeofconduct/