edponce commented on code in PR #13009: URL: https://github.com/apache/arrow/pull/13009#discussion_r870292161
########## cpp/src/arrow/stl_iterator.h: ########## @@ -128,6 +131,162 @@ class ArrayIterator { int64_t index_; }; +template <typename ArrayType, + typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>> +class ChunkedArrayIterator { + public: + using value_type = arrow::util::optional<typename ValueAccessor::ValueType>; + using difference_type = int64_t; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::random_access_iterator_tag; + + // Some algorithms need to default-construct an iterator + ChunkedArrayIterator() : chunked_array_(NULLPTR), index_(0) {} + + explicit ChunkedArrayIterator(const ChunkedArray& chunked_array, int64_t index = 0) + : chunked_array_(&chunked_array), index_(index) { + if (index_ != chunked_array.length()) { + auto chunk_location = GetChunkLocation(this->index_); + current_array_iterator_ = ArrayIterator<ArrayType>( + arrow::internal::checked_cast<const ArrayType&>( + *chunked_array_->chunk(static_cast<int>(chunk_location.chunk_index))), + chunk_location.index_in_chunk); + } else { + current_array_iterator_ = {}; + } + } + + // Value access + value_type operator*() const { return *current_array_iterator_; } + + value_type operator[](difference_type n) const { + auto chunk_location = GetChunkLocation(index_ + n); + ArrayIterator<ArrayType> target_iterator{ + arrow::internal::checked_cast<const ArrayType&>( + *chunked_array_->chunk(static_cast<int>(chunk_location.chunk_index)))}; + return target_iterator[chunk_location.index_in_chunk]; + } + + int64_t index() const { return index_; } + + // Forward / backward + ChunkedArrayIterator& operator++() { + (*this) += 1; + return *this; + } + ChunkedArrayIterator& operator--() { + (*this) -= 1; + return *this; + } + + ChunkedArrayIterator operator++(int) { + ChunkedArrayIterator tmp(*this); + ++*this; + return tmp; + } + ChunkedArrayIterator operator--(int) { + ChunkedArrayIterator tmp(*this); + --*this; + return tmp; + } + + // Arithmetic + difference_type operator-(const ChunkedArrayIterator& other) const { + return index_ - other.index_; + } + ChunkedArrayIterator operator+(difference_type n) const { + return ChunkedArrayIterator(*chunked_array_, index_ + n); + } + ChunkedArrayIterator operator-(difference_type n) const { + return ChunkedArrayIterator(*chunked_array_, index_ - n); + } + friend inline ChunkedArrayIterator operator+(difference_type diff, + const ChunkedArrayIterator& other) { + return ChunkedArrayIterator(*other.chunked_array_, diff + other.index_); + } + friend inline ChunkedArrayIterator operator-(difference_type diff, + const ChunkedArrayIterator& other) { + return ChunkedArrayIterator(*other.chunked_array_, diff - other.index_); + } + ChunkedArrayIterator& operator+=(difference_type n) { + index_ += n; + if (index_ != chunked_array_->length()) { Review Comment: It seems that any value is valid as long as you can return to the initial value with its inverse operation. So the check `!=` follows the operation semantics of a RandomAccessIterator. Actually, the `<` check would be insufficient because it allows negative indices (need absolute value of index). The question now becomes, do Arrow iterators guard against out-of-bounds accesses? From a performance perspective, they have basically the same cost. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org