https://github.com/python/cpython/commit/9068c8c614b869d8183b74d2a6b0c13370572271 commit: 9068c8c614b869d8183b74d2a6b0c13370572271 branch: 3.11 author: Serhiy Storchaka <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2024-02-20T19:09:10Z summary:
[3.11] gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (GH-115721) (GH-115729) (GH-115738) (cherry picked from commit 5ea86f496a4cfb34abbe2b7bb6fa7f25eeeb6294) Co-authored-by: Serhiy Storchaka <[email protected]> csv.writer() now quotes empty fields if delimiter is a space and skipinitialspace is true and raises exception if quoting is not possible. (cherry picked from commit 937d2821501de7adaa5ed8491eef4b7f3dc0940a) files: A Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst M Lib/test/test_csv.py M Modules/_csv.c diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 1bf487bcf9bd70..e56b0022e091ea 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -46,6 +46,20 @@ def _test_arg_valid(self, ctor, arg): quoting=csv.QUOTE_ALL, quotechar=None) self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_NONE, quotechar='') + ctor(arg, delimiter=' ') + ctor(arg, escapechar=' ') + ctor(arg, quotechar=' ') + ctor(arg, delimiter='\t', skipinitialspace=True) + ctor(arg, escapechar='\t', skipinitialspace=True) + ctor(arg, quotechar='\t', skipinitialspace=True) + ctor(arg, delimiter=' ', skipinitialspace=True) + ctor(arg, delimiter='^') + ctor(arg, escapechar='^') + ctor(arg, quotechar='^') + ctor(arg, delimiter='\x85') + ctor(arg, escapechar='\x85') + ctor(arg, quotechar='\x85') + ctor(arg, lineterminator='\x85') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -152,9 +166,6 @@ def _write_error_test(self, exc, fields, **kwargs): def test_write_arg_valid(self): self._write_error_test(csv.Error, None) - self._write_test((), '') - self._write_test([None], '""') - self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE) # Check that exceptions are passed up the chain self._write_error_test(OSError, BadIterable()) class BadList: @@ -271,6 +282,38 @@ def test_writerows_with_none(self): fileobj.seek(0) self.assertEqual(fileobj.read(), 'a\r\n""\r\n') + def test_write_empty_fields(self): + self._write_test((), '') + self._write_test([''], '""') + self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE) + self._write_test([None], '""') + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE) + self._write_test(['', ''], ',') + self._write_test([None, None], ',') + + def test_write_empty_fields_space_delimiter(self): + self._write_test([''], '""', delimiter=' ', skipinitialspace=False) + self._write_test([''], '""', delimiter=' ', skipinitialspace=True) + self._write_test([None], '""', delimiter=' ', skipinitialspace=False) + self._write_test([None], '""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False) + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True) + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False) + self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False, + quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, ['', ''], + delimiter=' ', skipinitialspace=True, + quoting=csv.QUOTE_NONE) + + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False, + quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, [None, None], + delimiter=' ', skipinitialspace=True, + quoting=csv.QUOTE_NONE) + def test_writerows_errors(self): with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: writer = csv.writer(fileobj) @@ -372,6 +415,14 @@ def test_read_skipinitialspace(self): [['no space', 'space', 'spaces', '\ttab']], skipinitialspace=True) + def test_read_space_delimiter(self): + self._read_test(['a b', ' a ', ' ', ''], + [['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []], + delimiter=' ', skipinitialspace=False) + self._read_test(['a b', ' a ', ' ', ''], + [['a', 'b'], ['a', ''], [''], []], + delimiter=' ', skipinitialspace=True) + def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size # limits. @@ -498,10 +549,10 @@ class space(csv.excel): escapechar = "\\" with TemporaryFile("w+", encoding="utf-8") as fileobj: - fileobj.write("abc def\nc1ccccc1 benzene\n") + fileobj.write("abc def\nc1ccccc1 benzene\n") fileobj.seek(0) reader = csv.reader(fileobj, dialect=space()) - self.assertEqual(next(reader), ["abc", "def"]) + self.assertEqual(next(reader), ["abc", "", "", "def"]) self.assertEqual(next(reader), ["c1ccccc1", "benzene"]) def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): diff --git a/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst b/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst new file mode 100644 index 00000000000000..70243dc6480b7e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst @@ -0,0 +1,3 @@ +:func:`csv.writer()` now quotes empty fields if delimiter is a +space and skipinitialspace is true and raises exception if quoting is not +possible. diff --git a/Modules/_csv.c b/Modules/_csv.c index 407d6f03540d9c..101278315643aa 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1180,6 +1180,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) static int join_append(WriterObj *self, PyObject *field, int quoted) { + DialectObj *dialect = self->dialect; unsigned int field_kind = -1; const void *field_data = NULL; Py_ssize_t field_len = 0; @@ -1192,6 +1193,15 @@ join_append(WriterObj *self, PyObject *field, int quoted) field_data = PyUnicode_DATA(field); field_len = PyUnicode_GET_LENGTH(field); } + if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) { + if (dialect->quoting == QUOTE_NONE) { + PyErr_Format(self->error_obj, + "empty field must be quoted if delimiter is a space " + "and skipinitialspace is true"); + return 0; + } + quoted = 1; + } rec_len = join_append_data(self, field_kind, field_data, field_len, "ed, 0); if (rec_len < 0) _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: [email protected]
