blackmwk commented on code in PR #2288:
URL: https://github.com/apache/iceberg-rust/pull/2288#discussion_r3007318913


##########
crates/iceberg/src/compression.rs:
##########
@@ -17,28 +17,74 @@
 
 //! Compression codec support for data compression and decompression.
 
+use std::fmt;
 use std::io::{Read, Write};
 
 use flate2::Compression;
 use flate2::read::GzDecoder;
 use flate2::write::GzEncoder;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
 use crate::{Error, ErrorKind, Result};
 
 /// Data compression formats
-#[derive(Debug, PartialEq, Eq, Clone, Copy, Default, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
 pub enum CompressionCodec {
     #[default]
     /// No compression
     None,
     /// LZ4 single compression frame with content size present
     Lz4,
-    /// Zstandard single compression frame with content size present
-    Zstd,
-    /// Gzip compression
-    Gzip,
+    /// Zstandard single compression frame with content size present. Optional 
level 0–22,
+    /// where 0 means default compression level (not no compression, unlike 
Gzip).
+    Zstd(Option<u8>),

Review Comment:
   If there is no `no compression` option, why this has to be Option?



##########
crates/iceberg/src/compression.rs:
##########
@@ -66,19 +116,25 @@ impl CompressionCodec {
                 ErrorKind::FeatureUnsupported,
                 "LZ4 compression is not supported currently",
             )),
-            CompressionCodec::Zstd => {
+            CompressionCodec::Zstd(level) => {
                 let writer = Vec::<u8>::new();
-                let mut encoder = zstd::stream::Encoder::new(writer, 3)?;
+                let mut encoder = zstd::stream::Encoder::new(writer, 
level.unwrap_or(3) as i32)?;
                 encoder.include_checksum(true)?;
                 encoder.set_pledged_src_size(Some(bytes.len().try_into()?))?;
                 std::io::copy(&mut &bytes[..], &mut encoder)?;
                 Ok(encoder.finish()?)
             }
-            CompressionCodec::Gzip => {
-                let mut encoder = GzEncoder::new(Vec::new(), 
Compression::default());
+            CompressionCodec::Gzip(level) => {
+                let compression =
+                    level.map_or_else(Compression::default, |l| 
Compression::new(l.min(9) as u32));

Review Comment:
   Ditto.



##########
crates/iceberg/src/spec/table_metadata.rs:
##########
@@ -3618,7 +3618,7 @@ mod tests {
         let original_metadata: TableMetadata = 
get_test_table_metadata("TableMetadataV2Valid.json");
         let json = serde_json::to_string(&original_metadata).unwrap();
 
-        let compressed = CompressionCodec::Gzip
+        let compressed = CompressionCodec::Gzip(None)

Review Comment:
   I think this is incorrect? It should be 9?



##########
crates/iceberg/src/compression.rs:
##########
@@ -66,19 +116,25 @@ impl CompressionCodec {
                 ErrorKind::FeatureUnsupported,
                 "LZ4 compression is not supported currently",
             )),
-            CompressionCodec::Zstd => {
+            CompressionCodec::Zstd(level) => {
                 let writer = Vec::<u8>::new();
-                let mut encoder = zstd::stream::Encoder::new(writer, 3)?;
+                let mut encoder = zstd::stream::Encoder::new(writer, 
level.unwrap_or(3) as i32)?;

Review Comment:
   Not related to this pr, but I don't think we should use a magic number here. 
We should create a constant.



##########
crates/iceberg/src/puffin/writer.rs:
##########
@@ -251,7 +251,8 @@ mod tests {
     async fn test_write_zstd_compressed_metric_data() {
         let temp_dir = TempDir::new().unwrap();
         let blobs = vec![blob_0(), blob_1()];
-        let blobs_with_compression = blobs_with_compression(blobs.clone(), 
CompressionCodec::Zstd);
+        let blobs_with_compression =
+            blobs_with_compression(blobs.clone(), 
CompressionCodec::Zstd(None));

Review Comment:
   This is incorrect, similar to the one with gzip.



##########
crates/iceberg/src/spec/table_metadata.rs:
##########
@@ -3618,7 +3618,7 @@ mod tests {
         let original_metadata: TableMetadata = 
get_test_table_metadata("TableMetadataV2Valid.json");
         let json = serde_json::to_string(&original_metadata).unwrap();
 
-        let compressed = CompressionCodec::Gzip
+        let compressed = CompressionCodec::Gzip(None)

Review Comment:
   If 9 is default compression level, you could create a method 
`CompressionCodec::gzip_default` for it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to