rok commented on code in PR #47418:
URL: https://github.com/apache/arrow/pull/47418#discussion_r2878003806
##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -215,6 +244,42 @@ struct GetByteRangesArray {
Status Visit(const LargeListType& type) const { return VisitBaseList(type); }
+ template <typename BaseListViewType>
+ Status VisitBaseListView(const BaseListViewType& type) const {
+ using offset_type = typename BaseListViewType::offset_type;
+ RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+
+ const Buffer& offsets_buffer = *input.buffers[1];
+ RETURN_NOT_OK(
+
range_starts->Append(reinterpret_cast<uint64_t>(offsets_buffer.data())));
+ RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+ RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+ const Buffer& lengths_buffer = *input.buffers[2];
+ RETURN_NOT_OK(
+
range_starts->Append(reinterpret_cast<uint64_t>(lengths_buffer.data())));
+ RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+ RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+ // The following calculation is an over/under estimate of the byte size
since views
+ // buffer might
+ // 1. Not reference all the values in data buffers (the array was filtered
without gc)
+ // 2. Reference a value multiple times without repeating it in the data
buffer
+ //
+ // Producing exact byte size would require linear scan of all values in
view buffer
+ GetByteRangesArray child{*input.child_data[0],
+ 0,
+ (*input.child_data[0]).length,
Review Comment:
```suggestion
input.child_data[0]->length,
```
##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -215,6 +244,42 @@ struct GetByteRangesArray {
Status Visit(const LargeListType& type) const { return VisitBaseList(type); }
+ template <typename BaseListViewType>
+ Status VisitBaseListView(const BaseListViewType& type) const {
+ using offset_type = typename BaseListViewType::offset_type;
+ RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+
+ const Buffer& offsets_buffer = *input.buffers[1];
+ RETURN_NOT_OK(
+
range_starts->Append(reinterpret_cast<uint64_t>(offsets_buffer.data())));
+ RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+ RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+ const Buffer& lengths_buffer = *input.buffers[2];
+ RETURN_NOT_OK(
+
range_starts->Append(reinterpret_cast<uint64_t>(lengths_buffer.data())));
+ RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+ RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+ // The following calculation is an over/under estimate of the byte size
since views
Review Comment:
```suggestion
// The following calculation is an over estimate of the byte size since
views
```
##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -192,6 +192,35 @@ struct GetByteRangesArray {
Status Visit(const LargeBinaryType& type) const { return
VisitBaseBinary(type); }
+ template <typename BaseViewType>
+ Status VisitBaseViewType(const BaseViewType& type) const {
+ using c_type = typename BaseViewType::c_type;
+ RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+ const Buffer& views = *input.buffers[1];
+
RETURN_NOT_OK(range_starts->Append(reinterpret_cast<uint64_t>(views.data())));
+ RETURN_NOT_OK(range_offsets->Append(sizeof(c_type) * offset));
+ RETURN_NOT_OK(range_lengths->Append(sizeof(c_type) * length));
+
+ // The following calculation is an over/under estimate of the size since
views buffer
Review Comment:
Can this actually underestimate?
```suggestion
// The following calculation is an over estimate of the size since views
buffer
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]