wgtmac commented on code in PR #3457:
URL: https://github.com/apache/avro/pull/3457#discussion_r2267241066


##########
lang/c++/test/DataFileTests.cc:
##########
@@ -838,6 +838,81 @@ void testSkipStringZstdCodec() {
 }
 #endif
 
+struct Weather {
+    std::string station;
+    int64_t time;
+    int32_t temp;
+    Weather(const char *station, int64_t time, int32_t temp)
+        : station(station), time(time), temp(temp) {}
+
+    bool operator==(const Weather &other) const {
+        return station == other.station && time == other.time && temp == 
other.temp;
+    }
+    friend std::ostream &operator<<(std::ostream &os, const Weather &w) {
+        return os << w.station << ' ' << w.time << ' ' << w.temp;
+    }
+};
+
+namespace avro {
+template<>
+struct codec_traits<Weather> {
+    static void decode(Decoder &d, Weather &v) {
+        avro::decode(d, v.station);
+        avro::decode(d, v.time);
+        avro::decode(d, v.temp);
+    }
+};
+} // namespace avro
+
+void testCompatibility(const char *filename) {
+    const char *readerSchemaStr = "{"
+                                  "\"type\": \"record\", \"name\": 
\"test.Weather\", \"fields\":["
+                                  "{\"name\": \"station\", \"type\": 
\"string\", \"order\": \"ignore\"},"
+                                  "{\"name\": \"time\", \"type\": \"long\"},"
+                                  "{\"name\": \"temp\", \"type\": \"int\"}"
+                                  "]}";
+    avro::ValidSchema readerSchema =
+        avro::compileJsonSchemaFromString(readerSchemaStr);
+    avro::DataFileReader<Weather> df(filename, readerSchema);
+
+    Weather ro("", -1, -1);
+    BOOST_CHECK_EQUAL(df.read(ro), true);
+    BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619524000000L, 0));
+    BOOST_CHECK_EQUAL(df.read(ro), true);
+    BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619506000000L, 22));
+    BOOST_CHECK_EQUAL(df.read(ro), true);
+    BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619484400000L, -11));
+    BOOST_CHECK_EQUAL(df.read(ro), true);
+    BOOST_CHECK_EQUAL(ro, Weather("012650-99999", -655531200000L, 111));
+    BOOST_CHECK_EQUAL(df.read(ro), true);
+    BOOST_CHECK_EQUAL(ro, Weather("012650-99999", -655509600000L, 78));
+    BOOST_CHECK_EQUAL(df.read(ro), false);
+}
+
+void testCompatibilityNullCodec() {
+    BOOST_TEST_CHECKPOINT(__func__);
+    testCompatibility("../../share/test/data/weather.avro");
+}
+
+void testCompatibilityDeflateCodec() {
+    BOOST_TEST_CHECKPOINT(__func__);
+    testCompatibility("../../share/test/data/weather-deflate.avro");
+}
+
+#ifdef SNAPPY_CODEC_AVAILABLE
+void testCompatibilitySnappyCodec() {
+    BOOST_TEST_CHECKPOINT(__func__);
+    testCompatibility("../../share/test/data/weather-snappy.avro");
+}
+#endif
+
+#ifdef ZSTD_CODEC_AVAILABLE
+void testCompatibilityZstdCodec() {
+    BOOST_TEST_CHECKPOINT(__func__);
+    testCompatibility("../../share/test/data/weather-zstd.avro");

Review Comment:
   How did you create this file?



##########
lang/c++/impl/DataFile.cc:
##########
@@ -340,7 +341,8 @@ void DataFileReaderBase::init(const ValidSchema 
&readerSchema) {
 static void drain(InputStream &in) {
     const uint8_t *p = nullptr;
     size_t n = 0;
-    while (in.next(&p, &n));
+    while (in.next(&p, &n))
+        ;

Review Comment:
   This line looks weird. Could you please revert it?



##########
lang/c++/impl/DataFile.cc:
##########
@@ -54,7 +54,8 @@ const string AVRO_SNAPPY_CODEC = "snappy";
 #endif
 
 #ifdef ZSTD_CODEC_AVAILABLE
-const string AVRO_ZSTD_CODEC = "zstd";
+const string AVRO_ZSTD_CODEC = "zstandard";
+const string AVRO_ZSTD_CODEC_LEGACY = "zstd";

Review Comment:
   ```suggestion
   ```
   
   I think you are right. Let's remove this.



##########
lang/c++/impl/DataFile.cc:
##########
@@ -617,8 +637,8 @@ void DataFileReaderBase::readHeader() {
         codec_ = SNAPPY_CODEC;
 #endif
 #ifdef ZSTD_CODEC_AVAILABLE
-    } else if (it != metadata_.end()
-               && toString(it->second) == AVRO_ZSTD_CODEC) {
+    } else if ((it != metadata_.end() && toString(it->second) == 
AVRO_ZSTD_CODEC)
+               || (it != metadata_.end() && toString(it->second) == 
AVRO_ZSTD_CODEC_LEGACY)) {

Review Comment:
   ```suggestion
       } else if ((it != metadata_.end() && toString(it->second) == 
AVRO_ZSTD_CODEC)) {
   ```



##########
lang/c++/impl/DataFile.cc:
##########
@@ -491,22 +494,39 @@ void DataFileReaderBase::readDataBlock() {
         if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
             throw Exception("ZSTD: Not a valid compressed frame");
         } else if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
-            throw Exception("ZSTD: Unable to determine decompressed size");
-        }
-
-        // Decompress the data
-        uncompressed.clear();
-        uncompressed.resize(decompressed_size);
-        size_t result = ZSTD_decompress(
-            uncompressed.data(), decompressed_size,
-            reinterpret_cast<const char *>(compressed_.data()), 
compressed_.size());
-
-        if (ZSTD_isError(result)) {
-            throw Exception("ZSTD decompression error: {}", 
ZSTD_getErrorName(result));
-        }
-        if (result != decompressed_size) {
-            throw Exception("ZSTD: Decompressed size mismatch: expected {}, 
got {}",
-                            decompressed_size, result);
+            // Stream decompress the data
+            ZSTD_DCtx *dctx = ZSTD_createDCtx();
+            if (!dctx) {
+                throw Exception("ZSTD decompression error: ZSTD_createDCtx() 
failed");
+            }
+            ZSTD_inBuffer in{compressed_.data(), compressed_.size(), 0};
+            std::vector<char> tmp(ZSTD_DStreamOutSize());
+            ZSTD_outBuffer out{tmp.data(), tmp.size(), 0};
+            size_t ret;
+            do {
+                out.pos = 0;
+                ret = ZSTD_decompressStream(dctx, &out, &in);

Review Comment:
   I think this fix makes sense. Can it be tested?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to