This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1b262a29e5 GH-37437: [C++] Fix MakeArrayOfNull for list array with 
large string values type (#37467)
1b262a29e5 is described below

commit 1b262a29e5cddb2534457fadfe0b77bd7767f297
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Thu Oct 5 17:27:20 2023 +0200

    GH-37437: [C++] Fix MakeArrayOfNull for list array with large string values 
type (#37467)
    
    ### Rationale for this change
    
    `MakeArrayOfNull` for list type was assuming that the values child field 
didn't need to be considered, but those values could also require a minimum 
buffer size (eg for offsets) and which could be of greater size than the list 
offsets if those are int32 offsets.
    
    ### Are these changes tested?
    
    Yes
    
    * Closes: #37437
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/array/array_test.cc | 5 +++++
 cpp/src/arrow/array/util.cc       | 6 ++++--
 cpp/src/arrow/array/validate.cc   | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/array/array_test.cc 
b/cpp/src/arrow/array/array_test.cc
index 0b82a82fbd..2bef9d725d 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -388,7 +388,12 @@ static std::vector<std::shared_ptr<DataType>> 
TestArrayUtilitiesAgainstTheseType
       large_utf8(),
       list(utf8()),
       list(int64()),  // NOTE: Regression case for ARROW-9071/MakeArrayOfNull
+      list(large_utf8()),
+      list(list(int64())),
+      list(list(large_utf8())),
+      large_list(utf8()),
       large_list(large_utf8()),
+      large_list(list(large_utf8())),
       fixed_size_list(utf8(), 3),
       fixed_size_list(int64(), 4),
       dictionary(int32(), utf8()),
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index e84ab404ad..98e9d51b5f 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -366,9 +366,11 @@ class NullArrayFactory {
     }
 
     template <typename T>
-    enable_if_var_size_list<T, Status> Visit(const T&) {
+    enable_if_var_size_list<T, Status> Visit(const T& type) {
       // values array may be empty, but there must be at least one offset of 0
-      return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
+      RETURN_NOT_OK(MaxOf(sizeof(typename T::offset_type) * (length_ + 1)));
+      RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
+      return Status::OK();
     }
 
     template <typename T>
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 0f2bd45835..19ff8e28b5 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -713,8 +713,10 @@ struct ValidateArrayImpl {
     }
 
     // An empty list array can have 0 offsets
-    const auto required_offsets = (data.length > 0) ? data.length + 
data.offset + 1 : 0;
     const auto offsets_byte_size = data.buffers[1]->size();
+    const auto required_offsets = ((data.length > 0) || (offsets_byte_size > 
0))
+                                      ? data.length + data.offset + 1
+                                      : 0;
     if (offsets_byte_size / static_cast<int32_t>(sizeof(offset_type)) <
         required_offsets) {
       return Status::Invalid("Offsets buffer size (bytes): ", 
offsets_byte_size,

Reply via email to