wgtmac commented on code in PR #3610:
URL: https://github.com/apache/avro/pull/3610#discussion_r2657570752


##########
lang/c++/impl/DataFile.cc:
##########
@@ -64,36 +64,105 @@ const size_t maxSyncInterval = 1u << 30;
 // Recommended by https://www.zlib.net/zlib_how.html
 const size_t zlibBufGrowSize = 128 * 1024;
 
+std::string codecToString(Codec codec) {
+    switch (codec) {
+        case NULL_CODEC: return "null";
+        case DEFLATE_CODEC: return "deflate";
+        case SNAPPY_CODEC: return "snappy";
+        case ZSTD_CODEC: return "zstandard";
+        default: return "unknown";
+    }
+}
+
+void validateCodec(Codec codec, std::optional<int> level) {
+    if (!isCodecAvailable(codec)) {
+        throw Exception("Codec {} is not available.", codecToString(codec));
+    }
+
+    if (!level.has_value()) {
+        return;
+    }
+
+    int levelValue = level.value();
+    switch (codec) {
+        case NULL_CODEC:
+        case SNAPPY_CODEC:
+            // These codecs don't support compression levels, ignore
+            break;
+        case DEFLATE_CODEC:
+            if (levelValue < 0 || levelValue > 9) {
+                throw Exception("Invalid compression level {} for deflate 
codec. "
+                                "Valid range is 0-9.",
+                                levelValue);
+            }
+            break;
+        case ZSTD_CODEC:
+            if (levelValue < 1 || levelValue > 22) {
+                throw Exception("Invalid compression level {} for zstandard 
codec. "
+                                "Valid range is 1-22.",
+                                levelValue);
+            }
+            break;
+        default:
+            throw Exception("Unknown codec: {}", static_cast<int>(codec));
+    }
+}
+
 } // namespace
 
+bool isCodecAvailable(Codec codec) {
+    switch (codec) {
+        case NULL_CODEC:
+        case DEFLATE_CODEC:
+            return true;
+        case SNAPPY_CODEC:
+#ifdef SNAPPY_CODEC_AVAILABLE
+            return true;
+#else
+            return false;
+#endif
+        case ZSTD_CODEC:
+#ifdef ZSTD_CODEC_AVAILABLE
+            return true;
+#else
+            return false;
+#endif
+        default:
+            return false;
+    }
+}
+
 DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema 
&schema, size_t syncInterval,
-                                       Codec codec, const Metadata &metadata) 
: filename_(filename),
-                                                                               
 schema_(schema),
-                                                                               
 encoderPtr_(binaryEncoder()),
-                                                                               
 syncInterval_(syncInterval),
-                                                                               
 codec_(codec),
-                                                                               
 stream_(fileOutputStream(filename)),
-                                                                               
 buffer_(memoryOutputStream()),
-                                                                               
 sync_(makeSync()),
-                                                                               
 objectCount_(0),
-                                                                               
 metadata_(metadata),
-                                                                               
 lastSync_(0) {
+                                       Codec codec, const Metadata &metadata,
+                                       std::optional<int> compressionLevel) : 
filename_(filename),
+                                                                              
schema_(schema),
+                                                                              
encoderPtr_(binaryEncoder()),
+                                                                              
syncInterval_(syncInterval),
+                                                                              
codec_(codec),
+                                                                              
compressionLevel_(compressionLevel),
+                                                                              
stream_(fileOutputStream(filename)),

Review Comment:
   I agree that this is an issue. Another constructor 
`DataFileWriter(std::unique_ptr<OutputStream> outputStream, ...)` also have the 
same issue. However, this is a separate issue and fixing it may complicate this 
PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to