bkietz commented on code in PR #47531:
URL: https://github.com/apache/arrow/pull/47531#discussion_r2346805616
##########
cpp/src/arrow/stl_iterator.h:
##########
@@ -247,33 +248,39 @@ class ChunkedArrayIterator {
};
/// Return an iterator to the beginning of the chunked array
-template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType>
-ChunkedArrayIterator<ArrayType> Begin(const ChunkedArray& chunked_array) {
- return ChunkedArrayIterator<ArrayType>(chunked_array);
+template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType,
+ typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+ChunkedArrayIterator<ArrayType, ValueAccessor> Begin(const ChunkedArray&
chunked_array) {
+ return ChunkedArrayIterator<ArrayType, ValueAccessor>(chunked_array);
}
/// Return an iterator to the end of the chunked array
-template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType>
-ChunkedArrayIterator<ArrayType> End(const ChunkedArray& chunked_array) {
- return ChunkedArrayIterator<ArrayType>(chunked_array,
chunked_array.length());
+template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType,
+ typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+ChunkedArrayIterator<ArrayType, ValueAccessor> End(const ChunkedArray&
chunked_array) {
+ return ChunkedArrayIterator<ArrayType, ValueAccessor>(chunked_array,
+
chunked_array.length());
}
-template <typename ArrayType>
+template <typename ArrayType,
+ typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
struct ChunkedArrayRange {
const ChunkedArray* chunked_array;
- ChunkedArrayIterator<ArrayType> begin() {
- return stl::ChunkedArrayIterator<ArrayType>(*chunked_array);
+ ChunkedArrayIterator<ArrayType, ValueAccessor> begin() {
+ return stl::ChunkedArrayIterator<ArrayType, ValueAccessor>(*chunked_array);
}
- ChunkedArrayIterator<ArrayType> end() {
- return stl::ChunkedArrayIterator<ArrayType>(*chunked_array,
chunked_array->length());
+ ChunkedArrayIterator<ArrayType, ValueAccessor> end() {
+ return stl::ChunkedArrayIterator<ArrayType, ValueAccessor>(*chunked_array,
+
chunked_array->length());
}
};
/// Return an iterable range over the chunked array
-template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType>
-ChunkedArrayRange<ArrayType> Iterate(const ChunkedArray& chunked_array) {
- return stl::ChunkedArrayRange<ArrayType>{&chunked_array};
+template <typename Type, typename ArrayType = typename
TypeTraits<Type>::ArrayType,
+ typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+ChunkedArrayRange<ArrayType, ValueAccessor> Iterate(const ChunkedArray&
chunked_array) {
+ return stl::ChunkedArrayRange<ArrayType, ValueAccessor>{&chunked_array};
}
Review Comment:
The trait specializations need to include the ValueAccessor template
argument, otherwise they will not include ArrayIterators which have custom
ValueAccessor
##########
cpp/src/arrow/stl_iterator_test.cc:
##########
@@ -545,5 +607,62 @@ TEST(ChunkedArrayIterator, ForEachIterator) {
ASSERT_EQ(values, expected);
}
+TEST(ChunkedArrayIterator, CustomValueAccessorDictionary) {
+ // Create multiple dictionary arrays with the same dictionary
+ auto dict = ArrayFromJSON(utf8(), R"(["red", "green", "blue", "yellow"])");
+
+ auto indices1 = ArrayFromJSON(int32(), "[0, 1, 2]");
+ auto indices2 = ArrayFromJSON(int32(), "[3, 2, null]");
+ auto indices3 = ArrayFromJSON(int32(), "[1, 0, 3, 2]");
+
+ auto dict_type = dictionary(int32(), utf8());
+ auto dict_array1 = std::make_shared<DictionaryArray>(dict_type, indices1,
dict);
+ auto dict_array2 = std::make_shared<DictionaryArray>(dict_type, indices2,
dict);
+ auto dict_array3 = std::make_shared<DictionaryArray>(dict_type, indices3,
dict);
+
+ // Create chunked array from dictionary arrays
+ auto chunked_array = std::make_shared<ChunkedArray>(
+ std::vector<std::shared_ptr<Array>>{dict_array1, dict_array2,
dict_array3},
+ dict_type);
+
+ // Use custom accessor to iterate over decoded values across chunks
+ auto it =
+ Begin<DictionaryType, DictionaryArray,
TestDictionaryValueAccessor>(*chunked_array);
Review Comment:
I think rather than requiring users to provide all of these template
arguments we should add `ChunkedArray::range<ArrowType, ValueAccessor =
DefaultEtc>()`. Then we can write
```c++
for (int i : chunked_array->range<Int32Type>()) {}
```
Or
```c++
for (int i : chunked_array->range<DictionaryType,
TestDictionaryValueAccessor>()) {}
```
The same member function could be added to `Array`, in which case, the above
could also be used on a dictionary array.
... Actually, using the `argument_type` trait in util/functional.h it would
be possible to infer *all* template arguments from a lambda value accessor:
```c++
auto accessor = [](const DictionaryArray& array, int64_t index) {
int64_t dict_index = array.GetValueIndex(index);
const auto& dict = checked_cast<const StringArray&>(*array.dictionary());
return dict->GetView(dict_index);
};
for (int i : chunked_array->range(accessor)) {}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]