This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 60800272ab GH-38184: [C++] Add systematic tests for
Builder::AppendArraySlice (#49132)
60800272ab is described below
commit 60800272abec99f12ce7c1fc0a16a539e1a03b2d
Author: Abhishek Bansal <[email protected]>
AuthorDate: Wed Feb 18 15:16:58 2026 +0530
GH-38184: [C++] Add systematic tests for Builder::AppendArraySlice (#49132)
### Rationale for this change
Introduce systematic test coverage for
`arrow::ArrayBuilder::AppendArraySlice` across all major Arrow data types to
ensure correctness for diverse physical layouts.
### What changes are included in this PR?
- Added 30 new test cases covering Primitives, Binary/String, Lists
(including Views), Structs, Unions, Decimals, and REE types.
- Added specialized logical equality validation for Dictionary arrays.
### Are these changes tested?
Yes, all 30 new cases passed locally with varying null probabilities.
### Are there any user-facing changes?
No.
* GitHub Issue: #38184
Authored-by: Abhishek Bansal <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/array/array_test.cc | 118 ++++++++++++++++++++++++++++++++++++++
1 file changed, 118 insertions(+)
diff --git a/cpp/src/arrow/array/array_test.cc
b/cpp/src/arrow/array/array_test.cc
index dc82488f6a..64ea3fd71a 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -41,6 +41,7 @@
#include "arrow/array/builder_primitive.h"
#include "arrow/array/builder_run_end.h"
#include "arrow/array/builder_time.h"
+#include "arrow/array/concatenate.h"
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
@@ -997,6 +998,123 @@ TEST_F(TestArray, TestAppendArraySlice) {
}
}
+class TestBuilderAppendArraySlice : public TestArray {
+ public:
+ virtual void AssertResult(const Array& expected, const Array& actual) {
+ AssertArraysEqual(expected, actual, true);
+ }
+
+ void CheckAppendArraySlice(const std::shared_ptr<DataType>& type) {
+ auto rag = random::RandomArrayGenerator(0xdeadbeef);
+ const int64_t total_length = 100;
+
+ for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
+ auto array = rag.ArrayOf(type, total_length, null_probability);
+
+ std::unique_ptr<ArrayBuilder> builder;
+ ASSERT_OK(MakeBuilder(pool_, type, &builder));
+
+ // Slice the array into multiple pieces
+ ArrayVector slices;
+ std::vector<int64_t> offsets = {0, 10, 10, 25, 60, 100};
+ for (size_t i = 0; i < offsets.size() - 1; ++i) {
+ int64_t start = offsets[i];
+ int64_t length = offsets[i + 1] - offsets[i];
+ auto slice = array->Slice(start, length);
+ slices.push_back(slice);
+
+ ArraySpan span(*slice->data());
+ ASSERT_OK(builder->AppendArraySlice(span, 0, slice->length()));
+ }
+
+ std::shared_ptr<Array> actual;
+ ASSERT_OK(builder->Finish(&actual));
+ ASSERT_OK(actual->ValidateFull());
+
+ ASSERT_OK_AND_ASSIGN(auto expected, Concatenate(slices, pool_));
+ AssertResult(*expected, *actual);
+ }
+ }
+
+ void CheckAppendArraySlice(const std::vector<std::shared_ptr<DataType>>&
types) {
+ for (const auto& type : types) {
+ ARROW_SCOPED_TRACE("type = ", type->ToString());
+ CheckAppendArraySlice(type);
+ }
+ }
+};
+
+TEST_F(TestBuilderAppendArraySlice, Primitives) {
+ CheckAppendArraySlice(PrimitiveTypes());
+}
+
+TEST_F(TestBuilderAppendArraySlice, Temporals) {
CheckAppendArraySlice(TemporalTypes()); }
+
+TEST_F(TestBuilderAppendArraySlice, Intervals) {
CheckAppendArraySlice(IntervalTypes()); }
+
+TEST_F(TestBuilderAppendArraySlice, Durations) {
CheckAppendArraySlice(DurationTypes()); }
+
+TEST_F(TestBuilderAppendArraySlice, Decimals) {
+ CheckAppendArraySlice(
+ {decimal32(7, 2), decimal64(12, 2), decimal128(10, 2), decimal256(10,
2)});
+}
+
+TEST_F(TestBuilderAppendArraySlice, Nested) {
+ CheckAppendArraySlice({list(int32()), large_list(int32()),
list_view(int32()),
+ large_list_view(int32()), fixed_size_list(int32(), 3),
+ struct_({field("a", int32()), field("b", utf8())}),
+ sparse_union({field("a", int32()), field("b",
utf8())}),
+ dense_union({field("a", int32()), field("b",
utf8())})});
+}
+
+TEST_F(TestBuilderAppendArraySlice, FixedSizeBinary) {
+ CheckAppendArraySlice(fixed_size_binary(10));
+}
+
+TEST_F(TestBuilderAppendArraySlice, Float16) {
CheckAppendArraySlice(float16()); }
+
+TEST_F(TestBuilderAppendArraySlice, RunEndEncoded) {
+ CheckAppendArraySlice(run_end_encoded(int32(), utf8()));
+ CheckAppendArraySlice(run_end_encoded(int32(), int64()));
+}
+
+// Dictionary types require a custom AssertResult because DictionaryBuilder
+// re-encodes values based on discovery order. This can change both the
+// dictionary and the indices, causing standard physical equality checks to
fail.
+//
+// Example: Slicing values ["b", "a"] from an array with dictionary ["a", "b"]
+// (indices [1, 0]) and appending them to a fresh builder results in a new
+// dictionary ["b", "a"] (indices [0, 1]). Both represent the same logical
+// data but differ physically.
+class TestBuilderAppendArraySliceDictionary : public
TestBuilderAppendArraySlice {
+ public:
+ void AssertResult(const Array& expected, const Array& actual) override {
+ const auto& expected_dict = internal::checked_cast<const
DictionaryArray&>(expected);
+ const auto& actual_dict = internal::checked_cast<const
DictionaryArray&>(actual);
+ const auto& expected_values = *expected_dict.dictionary();
+ const auto& actual_values = *actual_dict.dictionary();
+
+ ASSERT_EQ(expected.length(), actual.length());
+ for (int64_t i = 0; i < expected.length(); ++i) {
+ if (expected.IsNull(i)) {
+ ASSERT_TRUE(actual.IsNull(i));
+ } else {
+ ASSERT_FALSE(actual.IsNull(i));
+ ASSERT_OK_AND_ASSIGN(auto expected_val,
+
expected_values.GetScalar(expected_dict.GetValueIndex(i)));
+ ASSERT_OK_AND_ASSIGN(auto actual_val,
+
actual_values.GetScalar(actual_dict.GetValueIndex(i)));
+ AssertScalarsEqual(*expected_val, *actual_val);
+ }
+ }
+ }
+};
+
+TEST_F(TestBuilderAppendArraySliceDictionary, Dictionary) {
+ CheckAppendArraySlice(dictionary(int8(), utf8()));
+ CheckAppendArraySlice(dictionary(int32(), utf8()));
+}
+
// GH-39976: Test out-of-line data size calculation in
// BinaryViewBuilder::AppendArraySlice.
TEST_F(TestArray, TestBinaryViewAppendArraySlice) {