This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d4755e4 ARROW-2677: [Python] Expose Parquet ZSTD compression
d4755e4 is described below
commit d4755e46494b7280b8c9e6192e03e4d3a5dd5342
Author: Korn, Uwe <[email protected]>
AuthorDate: Thu Jun 14 18:24:10 2018 +0200
ARROW-2677: [Python] Expose Parquet ZSTD compression
Author: Korn, Uwe <[email protected]>
Closes #2120 from xhochy/ARROW-2677 and squashes the following commits:
585a4bf4 <Korn, Uwe> flake8
c5407965 <Korn, Uwe> Mention possible compression options in docstring
68f83bb8 <Korn, Uwe> ARROW-2677: Expose Parquet ZSTD compression
---
python/pyarrow/_parquet.pxd | 1 +
python/pyarrow/_parquet.pyx | 5 ++++-
python/pyarrow/parquet.py | 1 +
python/pyarrow/tests/test_parquet.py | 2 +-
4 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 3ecd5be..ca20ce2 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -102,6 +102,7 @@ cdef extern from "parquet/api/schema.h" namespace "parquet"
nogil:
ParquetCompression_LZO" parquet::Compression::LZO"
ParquetCompression_BROTLI" parquet::Compression::BROTLI"
ParquetCompression_LZ4" parquet::Compression::LZ4"
+ ParquetCompression_ZSTD" parquet::Compression::ZSTD"
enum ParquetVersion" parquet::ParquetVersion::type":
ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index a0cb857..e40a57c 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -809,7 +809,8 @@ cdef class ParquetReader:
return array
cdef int check_compression_name(name) except -1:
- if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4']:
+ if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
+ 'ZSTD']:
raise ArrowException("Unsupported compression: " + name)
return 0
@@ -826,6 +827,8 @@ cdef ParquetCompression compression_from_name(str name):
return ParquetCompression_BROTLI
elif name == "LZ4":
return ParquetCompression_LZ4
+ elif name == "ZSTD":
+ return ParquetCompression_ZSTD
else:
return ParquetCompression_UNCOMPRESSED
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index d7e8b4d..ad1fa54 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -251,6 +251,7 @@ coerce_timestamps : string, default None
Valid values: {None, 'ms', 'us'}
compression : str or dict
Specify the compression codec, either on a general basis or per-column.
+ Valid values: {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD'}
flavor : {'spark'}, default None
Sanitize schema or set other compatibility options for compatibility"""
diff --git a/python/pyarrow/tests/test_parquet.py
b/python/pyarrow/tests/test_parquet.py
index eb405af..51d3b01 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -501,7 +501,7 @@ def test_pandas_parquet_configuration_options(tmpdir):
df_read = table_read.to_pandas()
tm.assert_frame_equal(df, df_read)
- for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4']:
+ for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4', 'ZSTD']:
_write_table(arrow_table, filename.strpath,
version="2.0",
compression=compression)
--
To stop receiving notification emails like this one, please contact
[email protected].