mapleFU commented on code in PR #45622:
URL: https://github.com/apache/arrow/pull/45622#discussion_r1984368171
##########
cpp/src/parquet/decoder.cc:
##########
@@ -1675,13 +1675,90 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
num_valid_values_ = num_length;
}
+ Status DecodeArrowDenseFastPath(
+ int num_values, int null_count, const uint8_t* valid_bits,
+ int64_t valid_bits_offset, typename
EncodingTraits<ByteArrayType>::Accumulator* out,
+ int* out_num_values) {
+ int max_values = num_values - null_count;
+ if (num_values - null_count > num_valid_values_) {
+ throw ParquetException("Expected to decode ", num_values - null_count,
+ " values, but can decode decoded ",
num_valid_values_,
+ " values.");
+ }
+ const int32_t* length_ptr = buffered_length_->data_as<int32_t>() +
length_idx_;
+ int bytes_offset = len_ - decoder_->bytes_left();
+ const uint8_t* data_ptr = data_ + bytes_offset;
+ const int64_t initial_offset = out->builder->value_data_length();
+ auto* offsets_builder = out->builder->offsets_builder();
+ auto* value_data_builder = out->builder->value_data_builder();
+ // Phase1: get total length of binary data and append to value_data_builder
+ int64_t accum_length = 0;
+ for (int i = 0; i < max_values; ++i) {
+ if (ARROW_PREDICT_FALSE(length_ptr[i] < 0)) {
+ return Status::Invalid("negative string delta length");
+ }
+ accum_length += length_ptr[i];
+ }
+ if (ARROW_PREDICT_FALSE(accum_length >
std::numeric_limits<int32_t>::max())) {
Review Comment:
Yes, but currently, the builder is just `BinaryBuilder`, and large binary
might uses multiple chunks, so it might hit the case `!CanFit`. But once fit
this would never `accum_length > std::numeric_limits<int32_t>::max()`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]