martin-g commented on code in PR #3610: URL: https://github.com/apache/avro/pull/3610#discussion_r2657057649
########## lang/c++/impl/DataFile.cc: ########## Review Comment: ```suggestion #include <optional> #include <random> ``` ########## lang/c++/impl/DataFile.cc: ########## @@ -64,36 +64,105 @@ const size_t maxSyncInterval = 1u << 30; // Recommended by https://www.zlib.net/zlib_how.html const size_t zlibBufGrowSize = 128 * 1024; +std::string codecToString(Codec codec) { + switch (codec) { + case NULL_CODEC: return "null"; + case DEFLATE_CODEC: return "deflate"; + case SNAPPY_CODEC: return "snappy"; + case ZSTD_CODEC: return "zstandard"; + default: return "unknown"; + } +} + +void validateCodec(Codec codec, std::optional<int> level) { + if (!isCodecAvailable(codec)) { + throw Exception("Codec {} is not available.", codecToString(codec)); + } + + if (!level.has_value()) { + return; + } + + int levelValue = level.value(); + switch (codec) { + case NULL_CODEC: + case SNAPPY_CODEC: + // These codecs don't support compression levels, ignore + break; + case DEFLATE_CODEC: + if (levelValue < 0 || levelValue > 9) { + throw Exception("Invalid compression level {} for deflate codec. " + "Valid range is 0-9.", + levelValue); + } + break; + case ZSTD_CODEC: + if (levelValue < 1 || levelValue > 22) { + throw Exception("Invalid compression level {} for zstandard codec. " + "Valid range is 1-22.", + levelValue); + } + break; + default: + throw Exception("Unknown codec: {}", static_cast<int>(codec)); + } +} + } // namespace +bool isCodecAvailable(Codec codec) { + switch (codec) { + case NULL_CODEC: + case DEFLATE_CODEC: + return true; + case SNAPPY_CODEC: +#ifdef SNAPPY_CODEC_AVAILABLE + return true; +#else + return false; +#endif + case ZSTD_CODEC: +#ifdef ZSTD_CODEC_AVAILABLE + return true; +#else + return false; +#endif + default: + return false; + } +} + DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema &schema, size_t syncInterval, - Codec codec, const Metadata &metadata) : filename_(filename), - schema_(schema), - encoderPtr_(binaryEncoder()), - syncInterval_(syncInterval), - codec_(codec), - stream_(fileOutputStream(filename)), - buffer_(memoryOutputStream()), - sync_(makeSync()), - objectCount_(0), - metadata_(metadata), - lastSync_(0) { + Codec codec, const Metadata &metadata, + std::optional<int> compressionLevel) : filename_(filename), + schema_(schema), + encoderPtr_(binaryEncoder()), + syncInterval_(syncInterval), + codec_(codec), + compressionLevel_(compressionLevel), + stream_(fileOutputStream(filename)), Review Comment: nit: This seems to be an old issue: `fileOutputStream(filename)` creates/truncates the output file before running the validation checks in `init()`, i.e. an invalid codec/compression_level/syncInterval will lead to an exception but the output file will be already created. ########## lang/c++/impl/DataFile.cc: ########## @@ -103,20 +172,17 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co "Invalid sync interval: {}. Should be between {} and {}", syncInterval, minSyncInterval, maxSyncInterval); } - setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); + validateCodec(codec_, compressionLevel_); + setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); if (codec_ == NULL_CODEC) { setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); } else if (codec_ == DEFLATE_CODEC) { setMetadata(AVRO_CODEC_KEY, AVRO_DEFLATE_CODEC); -#ifdef SNAPPY_CODEC_AVAILABLE Review Comment: The constant `AVRO_SNAPPY_CODEC` is declared only if `SNAPPY_CODEC_AVAILABLE` is defined. ########## lang/c++/impl/DataFile.cc: ########## @@ -103,20 +172,17 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co "Invalid sync interval: {}. Should be between {} and {}", syncInterval, minSyncInterval, maxSyncInterval); } - setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); + validateCodec(codec_, compressionLevel_); + setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); if (codec_ == NULL_CODEC) { Review Comment: This branch is not needed since line 177 sets NULL_CODEC as a default one. ########## lang/c++/impl/ZstdCompressWrapper.cc: ########## Review Comment: ```suggestion #include <optional> #include <zstd.h> ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
