Repository: arrow Updated Branches: refs/heads/master b7639c153 -> ff6c6e0f9
ARROW-1276: enable parquet serialization of empty DataFrames Fixes ARROW-1276 and fixes Python dev. documentation (encountered during the preparation of this PR). Author: Marco Neumann <[email protected]> Closes #906 from crepererum/ARROW-1276 and squashes the following commits: 1c1c92cd [Marco Neumann] ARROW-1276: enable parquet serialization of empty DataFrames 1d9cc411 [Marco Neumann] add missing conda packages to python dev. doc Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ff6c6e0f Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ff6c6e0f Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ff6c6e0f Branch: refs/heads/master Commit: ff6c6e0f99a26f0509d1368e3aa8aefd201a5e28 Parents: b7639c1 Author: Marco Neumann <[email protected]> Authored: Fri Jul 28 10:29:42 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Fri Jul 28 10:29:42 2017 -0400 ---------------------------------------------------------------------- python/doc/source/development.rst | 2 +- python/pyarrow/_parquet.pyx | 5 ++++- python/pyarrow/tests/test_parquet.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/ff6c6e0f/python/doc/source/development.rst ---------------------------------------------------------------------- diff --git a/python/doc/source/development.rst b/python/doc/source/development.rst index b5aba6c..55b3efd 100644 --- a/python/doc/source/development.rst +++ b/python/doc/source/development.rst @@ -84,7 +84,7 @@ from conda-forge: conda create -y -q -n pyarrow-dev \ python=3.6 numpy six setuptools cython pandas pytest \ cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \ - brotli jemalloc -c conda-forge + brotli jemalloc lz4-c zstd -c conda-forge source activate pyarrow-dev http://git-wip-us.apache.org/repos/asf/arrow/blob/ff6c6e0f/python/pyarrow/_parquet.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 20f189a..aef6618 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -629,7 +629,10 @@ cdef class ParquetWriter: cdef CTable* ctable = table.table if row_group_size is None or row_group_size == -1: - row_group_size = ctable.num_rows() + if ctable.num_rows() > 0: + row_group_size = ctable.num_rows() + else: + row_group_size = 1 elif row_group_size == 0: raise ValueError('Row group size cannot be 0') http://git-wip-us.apache.org/repos/asf/arrow/blob/ff6c6e0f/python/pyarrow/tests/test_parquet.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 6763fb3..7443df8 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -266,6 +266,18 @@ def test_read_pandas_column_subset(tmpdir): @parquet +def test_pandas_parquet_empty_roundtrip(tmpdir): + df = _test_dataframe(0) + arrow_table = pa.Table.from_pandas(df) + imos = pa.BufferOutputStream() + _write_table(arrow_table, imos, version="2.0") + buf = imos.get_result() + reader = pa.BufferReader(buf) + df_read = _read_table(reader).to_pandas() + tm.assert_frame_equal(df, df_read) + + +@parquet def test_pandas_parquet_pyfile_roundtrip(tmpdir): filename = tmpdir.join('pandas_pyfile_roundtrip.parquet').strpath size = 5
