This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d4755e4  ARROW-2677: [Python] Expose Parquet ZSTD compression
d4755e4 is described below

commit d4755e46494b7280b8c9e6192e03e4d3a5dd5342
Author: Korn, Uwe <[email protected]>
AuthorDate: Thu Jun 14 18:24:10 2018 +0200

    ARROW-2677: [Python] Expose Parquet ZSTD compression
    
    Author: Korn, Uwe <[email protected]>
    
    Closes #2120 from xhochy/ARROW-2677 and squashes the following commits:
    
    585a4bf4 <Korn, Uwe> flake8
    c5407965 <Korn, Uwe> Mention possible compression options in docstring
    68f83bb8 <Korn, Uwe> ARROW-2677:  Expose Parquet ZSTD compression
---
 python/pyarrow/_parquet.pxd          | 1 +
 python/pyarrow/_parquet.pyx          | 5 ++++-
 python/pyarrow/parquet.py            | 1 +
 python/pyarrow/tests/test_parquet.py | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 3ecd5be..ca20ce2 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -102,6 +102,7 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" 
nogil:
         ParquetCompression_LZO" parquet::Compression::LZO"
         ParquetCompression_BROTLI" parquet::Compression::BROTLI"
         ParquetCompression_LZ4" parquet::Compression::LZ4"
+        ParquetCompression_ZSTD" parquet::Compression::ZSTD"
 
     enum ParquetVersion" parquet::ParquetVersion::type":
         ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index a0cb857..e40a57c 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -809,7 +809,8 @@ cdef class ParquetReader:
         return array
 
 cdef int check_compression_name(name) except -1:
-    if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4']:
+    if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
+                            'ZSTD']:
         raise ArrowException("Unsupported compression: " + name)
     return 0
 
@@ -826,6 +827,8 @@ cdef ParquetCompression compression_from_name(str name):
         return ParquetCompression_BROTLI
     elif name == "LZ4":
         return ParquetCompression_LZ4
+    elif name == "ZSTD":
+        return ParquetCompression_ZSTD
     else:
         return ParquetCompression_UNCOMPRESSED
 
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index d7e8b4d..ad1fa54 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -251,6 +251,7 @@ coerce_timestamps : string, default None
     Valid values: {None, 'ms', 'us'}
 compression : str or dict
     Specify the compression codec, either on a general basis or per-column.
+    Valid values: {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD'}
 flavor : {'spark'}, default None
     Sanitize schema or set other compatibility options for compatibility"""
 
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index eb405af..51d3b01 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -501,7 +501,7 @@ def test_pandas_parquet_configuration_options(tmpdir):
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
-    for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4']:
+    for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4', 'ZSTD']:
         _write_table(arrow_table, filename.strpath,
                      version="2.0",
                      compression=compression)

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to