pitrou commented on code in PR #13009:
URL: https://github.com/apache/arrow/pull/13009#discussion_r860697542


##########
cpp/src/arrow/stl_iterator.h:
##########
@@ -128,6 +131,215 @@ class ArrayIterator {
   int64_t index_;
 };
 
+template <typename ArrayType,
+          typename ValueAccessor = detail::DefaultValueAccessor<ArrayType>>
+class ChunkedArrayIterator {
+ public:
+  using value_type = arrow::util::optional<typename ValueAccessor::ValueType>;
+  using difference_type = int64_t;
+  using pointer = value_type*;
+  using reference = value_type&;
+  using iterator_category = std::random_access_iterator_tag;
+
+  // Some algorithms need to default-construct an iterator
+  ChunkedArrayIterator() : chunked_array_(NULLPTR), index_(0), 
current_chunk_index_(0) {}
+
+  explicit ChunkedArrayIterator(const ChunkedArray& chunked_array, int64_t 
index = 0)
+      : chunked_array_(&chunked_array), index_(index) {
+    InitializeComponents(*this);
+  }
+
+  // Value access
+  value_type operator*() const { return 
*iterators_list_[current_chunk_index_]; }
+
+  value_type operator[](difference_type n) const {
+    int64_t chunk_index = GetChunkIndex(index_ + n);
+    int64_t index_in_chunk =
+        chunk_index ? index_ + n - chunks_lengths_[chunk_index - 1] : index_ + 
n;
+    return iterators_list_[chunk_index]
+                          [index_in_chunk - 
iterators_list_[chunk_index].index()];
+  }
+
+  int64_t index() const { return index_; }
+
+  // Forward / backward
+  ChunkedArrayIterator& operator++() {
+    ++index_;
+    if (iterators_list_[current_chunk_index_].index() ==
+        static_cast<int64_t>(
+            
chunked_array_->chunk(static_cast<int>(current_chunk_index_))->length()) -
+            1) {
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+      current_chunk_index_++;
+      if (!static_cast<int64_t>(
+              
chunked_array_->chunk(static_cast<int>(current_chunk_index_))->length())) {
+        current_chunk_index_ = GetChunkIndex(index_);
+      }
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+    } else {
+      iterators_list_[current_chunk_index_]++;
+    }
+    return *this;
+  }
+  ChunkedArrayIterator& operator--() {
+    --index_;
+    if (iterators_list_[current_chunk_index_].index()) {
+      iterators_list_[current_chunk_index_]--;
+    } else {
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+      current_chunk_index_--;
+      if (!static_cast<int64_t>(
+              
chunked_array_->chunk(static_cast<int>(current_chunk_index_))->length())) {
+        current_chunk_index_ = GetChunkIndex(index_);
+      }
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+      iterators_list_[current_chunk_index_] +=
+          static_cast<int64_t>(
+              
chunked_array_->chunk(static_cast<int>(current_chunk_index_))->length()) -
+          1;
+    }
+    return *this;
+  }
+
+  ChunkedArrayIterator operator++(int) {
+    ChunkedArrayIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+  ChunkedArrayIterator operator--(int) {
+    ChunkedArrayIterator tmp(*this);
+    --*this;
+    return tmp;
+  }
+
+  // Arithmetic
+  difference_type operator-(const ChunkedArrayIterator& other) const {
+    return index_ - other.index_;
+  }
+  ChunkedArrayIterator operator+(difference_type n) const {
+    return ChunkedArrayIterator(*chunked_array_, index_ + n);
+  }
+  ChunkedArrayIterator operator-(difference_type n) const {
+    return ChunkedArrayIterator(*chunked_array_, index_ - n);
+  }
+  friend inline ChunkedArrayIterator operator+(difference_type diff,
+                                               const ChunkedArrayIterator& 
other) {
+    return ChunkedArrayIterator(*other.chunked_array_, diff + other.index_);
+  }
+  friend inline ChunkedArrayIterator operator-(difference_type diff,
+                                               const ChunkedArrayIterator& 
other) {
+    return ChunkedArrayIterator(*other.chunked_array_, diff - other.index_);
+  }
+  ChunkedArrayIterator& operator+=(difference_type n) {
+    if (n < static_cast<int64_t>(
+                
chunked_array_->chunk(static_cast<int>(current_chunk_index_))->length()) -
+                iterators_list_[current_chunk_index_].index()) {
+      index_ += n;
+      iterators_list_[current_chunk_index_] += n;
+      return *this;
+    } else {
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+      index_ += n;
+      InitializeComponents(*this, true);
+      return *this;
+    }
+  }
+  ChunkedArrayIterator& operator-=(difference_type n) {
+    if (n <= iterators_list_[current_chunk_index_].index()) {
+      index_ -= n;
+      iterators_list_[current_chunk_index_] -= n;
+      return *this;
+    } else {
+      iterators_list_[current_chunk_index_] -=
+          iterators_list_[current_chunk_index_].index();
+      index_ -= n;
+      InitializeComponents(*this, true);
+      return *this;
+    }
+  }
+
+  // Comparisons
+  bool operator==(const ChunkedArrayIterator& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const ChunkedArrayIterator& other) const {
+    return index_ != other.index_;
+  }
+  bool operator<(const ChunkedArrayIterator& other) const {
+    return index_ < other.index_;
+  }
+  bool operator>(const ChunkedArrayIterator& other) const {
+    return index_ > other.index_;
+  }
+  bool operator<=(const ChunkedArrayIterator& other) const {
+    return index_ <= other.index_;
+  }
+  bool operator>=(const ChunkedArrayIterator& other) const {
+    return index_ >= other.index_;
+  }
+
+ private:
+  int64_t GetChunkIndex(int64_t index) const {
+    return static_cast<int64_t>(
+        std::upper_bound(chunks_lengths_.begin(), chunks_lengths_.end(), 
index) -
+        chunks_lengths_.begin());
+  }
+
+  void InitializeComponents(ChunkedArrayIterator<ArrayType>& 
chunked_array_iterator,
+                            bool update = false) {
+    if (!update) {
+      int64_t chunk_index = 0;
+      for (const auto& array : 
chunked_array_iterator.chunked_array_->chunks()) {
+        chunked_array_iterator.iterators_list_.emplace_back(
+            *arrow::internal::checked_pointer_cast<ArrayType>(array));
+        auto chunk_length = static_cast<int64_t>(array->length());
+        if (chunk_index) {
+          chunked_array_iterator.chunks_lengths_.push_back(
+              chunk_length + 
chunked_array_iterator.chunks_lengths_[chunk_index - 1]);
+        } else {
+          chunked_array_iterator.chunks_lengths_.push_back(chunk_length);
+        }
+        chunk_index++;
+      }
+    }
+
+    chunked_array_iterator.current_chunk_index_ =
+        GetChunkIndex(chunked_array_iterator.index_);
+    auto& current_iterator =
+        chunked_array_iterator
+            .iterators_list_[chunked_array_iterator.current_chunk_index_];
+    current_iterator -= current_iterator.index();
+    if (chunked_array_iterator.current_chunk_index_)
+      current_iterator +=
+          chunked_array_iterator.index_ -
+          chunked_array_iterator
+              .chunks_lengths_[chunked_array_iterator.current_chunk_index_ - 
1];
+    else
+      current_iterator += chunked_array_iterator.index_;
+  }

Review Comment:
   I agree with this suggestion for the record. We don't enforce it but we try 
to maintain it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to