rok commented on code in PR #47418:
URL: https://github.com/apache/arrow/pull/47418#discussion_r2878003806


##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -215,6 +244,42 @@ struct GetByteRangesArray {
 
   Status Visit(const LargeListType& type) const { return VisitBaseList(type); }
 
+  template <typename BaseListViewType>
+  Status VisitBaseListView(const BaseListViewType& type) const {
+    using offset_type = typename BaseListViewType::offset_type;
+    RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+
+    const Buffer& offsets_buffer = *input.buffers[1];
+    RETURN_NOT_OK(
+        
range_starts->Append(reinterpret_cast<uint64_t>(offsets_buffer.data())));
+    RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+    RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+    const Buffer& lengths_buffer = *input.buffers[2];
+    RETURN_NOT_OK(
+        
range_starts->Append(reinterpret_cast<uint64_t>(lengths_buffer.data())));
+    RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+    RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+    // The following calculation is an over/under estimate of the byte size 
since views
+    // buffer might
+    // 1. Not reference all the values in data buffers (the array was filtered 
without gc)
+    // 2. Reference a value multiple times without repeating it in the data 
buffer
+    //
+    // Producing exact byte size would require linear scan of all values in 
view buffer
+    GetByteRangesArray child{*input.child_data[0],
+                             0,
+                             (*input.child_data[0]).length,

Review Comment:
   ```suggestion
                                input.child_data[0]->length,
   ```



##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -215,6 +244,42 @@ struct GetByteRangesArray {
 
   Status Visit(const LargeListType& type) const { return VisitBaseList(type); }
 
+  template <typename BaseListViewType>
+  Status VisitBaseListView(const BaseListViewType& type) const {
+    using offset_type = typename BaseListViewType::offset_type;
+    RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+
+    const Buffer& offsets_buffer = *input.buffers[1];
+    RETURN_NOT_OK(
+        
range_starts->Append(reinterpret_cast<uint64_t>(offsets_buffer.data())));
+    RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+    RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+    const Buffer& lengths_buffer = *input.buffers[2];
+    RETURN_NOT_OK(
+        
range_starts->Append(reinterpret_cast<uint64_t>(lengths_buffer.data())));
+    RETURN_NOT_OK(range_offsets->Append(sizeof(offset_type) * offset));
+    RETURN_NOT_OK(range_lengths->Append(sizeof(offset_type) * length));
+
+    // The following calculation is an over/under estimate of the byte size 
since views

Review Comment:
   ```suggestion
       // The following calculation is an over estimate of the byte size since 
views
   ```



##########
cpp/src/arrow/util/byte_size.cc:
##########
@@ -192,6 +192,35 @@ struct GetByteRangesArray {
 
   Status Visit(const LargeBinaryType& type) const { return 
VisitBaseBinary(type); }
 
+  template <typename BaseViewType>
+  Status VisitBaseViewType(const BaseViewType& type) const {
+    using c_type = typename BaseViewType::c_type;
+    RETURN_NOT_OK(VisitBitmap(input.buffers[0]));
+    const Buffer& views = *input.buffers[1];
+    
RETURN_NOT_OK(range_starts->Append(reinterpret_cast<uint64_t>(views.data())));
+    RETURN_NOT_OK(range_offsets->Append(sizeof(c_type) * offset));
+    RETURN_NOT_OK(range_lengths->Append(sizeof(c_type) * length));
+
+    // The following calculation is an over/under estimate of the size since 
views buffer

Review Comment:
   Can this actually underestimate?
   ```suggestion
       // The following calculation is an over estimate of the size since views 
buffer
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to