wjones127 commented on code in PR #14353: URL: https://github.com/apache/arrow/pull/14353#discussion_r1066409132
########## cpp/src/parquet/column_reader.cc: ########## @@ -1957,6 +1970,139 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>, typename EncodingTraits<ByteArrayType>::Accumulator accumulator_; }; +class ByteArrayChunkedOptRecordReader : public TypedRecordReader<ByteArrayType>, + virtual public BinaryRecordReader { + public: + ByteArrayChunkedOptRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, + ::arrow::MemoryPool* pool) + : TypedRecordReader<ByteArrayType>(descr, leaf_info, pool) { + DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY); + accumulator_.builder.reset(new ::arrow::BinaryBuilder(pool)); + values_ = AllocateBuffer(pool); + offset_ = AllocateBuffer(pool); + } + + ::arrow::ArrayVector GetBuilderChunks() override { + if (uses_opt_) { + std::vector<std::shared_ptr<Buffer>> buffers = {ReleaseIsValid(), ReleaseOffsets(), + ReleaseValues()}; + auto data = std::make_shared<::arrow::ArrayData>( + ::arrow::binary(), values_written(), buffers, null_count()); + + auto chunks = ::arrow::ArrayVector({::arrow::MakeArray(data)}); + return chunks; + } else { + ::arrow::ArrayVector result = accumulator_.chunks; + if (result.size() == 0 || accumulator_.builder->length() > 0) { + std::shared_ptr<::arrow::Array> last_chunk; + PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk)); + result.push_back(std::move(last_chunk)); + } + accumulator_.chunks = {}; + return result; Review Comment: I've found locally that if I merge the implementations, the unit tests pass. Could you please merge them in the PR? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org