rok commented on code in PR #14293:
URL: https://github.com/apache/arrow/pull/14293#discussion_r1102246678


##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -1493,5 +1528,105 @@ TYPED_TEST(TestDeltaBitPackEncoding, 
NonZeroPaddedMiniblockBitWidth) {
   }
 }
 
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encode/decode tests.
+
+template <typename Type>
+class TestDeltaLengthByteArrayEncoding : public TestEncodingBase<Type> {
+ public:
+  using c_type = typename Type::c_type;
+  static constexpr int TYPE = Type::type_num;
+
+  virtual void CheckRoundtrip() {
+    auto encoder =
+        MakeTypedEncoder<Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, false, 
descr_.get());
+    auto decoder =
+        MakeTypedDecoder<Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, 
descr_.get());
+
+    encoder->Put(draws_, num_values_);
+    encode_buffer_ = encoder->FlushValues();
+
+    decoder->SetData(num_values_, encode_buffer_->data(),
+                     static_cast<int>(encode_buffer_->size()));
+    int values_decoded = decoder->Decode(decode_buf_, num_values_);
+    ASSERT_EQ(num_values_, values_decoded);
+    ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decode_buf_, draws_, 
num_values_));
+  }
+
+  void CheckRoundtripSpaced(const uint8_t* valid_bits, int64_t 
valid_bits_offset) {
+    auto encoder =
+        MakeTypedEncoder<Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, false, 
descr_.get());
+    auto decoder =
+        MakeTypedDecoder<Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, 
descr_.get());
+    int null_count = 0;
+    for (auto i = 0; i < num_values_; i++) {
+      if (!bit_util::GetBit(valid_bits, valid_bits_offset + i)) {
+        null_count++;
+      }
+    }
+
+    encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset);
+    encode_buffer_ = encoder->FlushValues();
+    decoder->SetData(num_values_ - null_count, encode_buffer_->data(),
+                     static_cast<int>(encode_buffer_->size()));
+    auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, 
null_count,
+                                                valid_bits, valid_bits_offset);
+    ASSERT_EQ(num_values_, values_decoded);
+    ASSERT_NO_FATAL_FAILURE(VerifyResultsSpaced<c_type>(decode_buf_, draws_, 
num_values_,
+                                                        valid_bits, 
valid_bits_offset));
+  }
+
+ protected:
+  USING_BASE_MEMBERS();
+};
+
+typedef ::testing::Types<ByteArrayType> TestDeltaLengthByteArrayEncodingTypes;
+TYPED_TEST_SUITE(TestDeltaLengthByteArrayEncoding, 
TestDeltaLengthByteArrayEncodingTypes);
+
+TYPED_TEST(TestDeltaLengthByteArrayEncoding, BasicRoundTrip) {
+  ASSERT_NO_FATAL_FAILURE(this->Execute(0, 0));
+  ASSERT_NO_FATAL_FAILURE(this->Execute(2000, 200));
+  ASSERT_NO_FATAL_FAILURE(this->ExecuteSpaced(
+      /*nvalues*/ 1234, /*repeats*/ 1, /*valid_bits_offset*/ 64,
+      /*null_probability*/ 0.1));
+}
+
+TEST(DeltaLengthByteArrayEncodingAdHoc, ArrowBinaryDirectPut) {
+  const int64_t size = 50;
+  const int32_t min_length = 0;
+  const int32_t max_length = 10;
+  const double null_probability = 0.25;
+  auto encoder = 
MakeTypedEncoder<ByteArrayType>(Encoding::DELTA_LENGTH_BYTE_ARRAY);
+  auto decoder = 
MakeTypedDecoder<ByteArrayType>(Encoding::DELTA_LENGTH_BYTE_ARRAY);
+
+  auto CheckSeed = [&](int seed, int64_t size) {
+    ::arrow::random::RandomArrayGenerator rag(seed);
+    auto values = rag.String(size, min_length, max_length, null_probability);

Review Comment:
   I've added test. The issue here is that physical type decoded with 
`EncodingTraits<ByteArrayType>` will be 32 bit (as that is what is encoded). 
I'm adjusting with a cast in the test, but we might want to do something else?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to