pitrou commented on a change in pull request #7461:
URL: https://github.com/apache/arrow/pull/7461#discussion_r441525758



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -121,18 +123,34 @@ struct ArrayIterator<Type, enable_if_boolean<Type>> {
 
 template <typename Type>
 struct ArrayIterator<Type, enable_if_base_binary<Type>> {
-  int64_t position = 0;
-  typename TypeTraits<Type>::ArrayType arr;
-  explicit ArrayIterator(const ArrayData& data) : arr(data.Copy()) {}
-  util::string_view operator()() { return arr.GetView(position++); }
+  using offset_type = typename Type::offset_type;
+  const ArrayData& arr;
+  const offset_type* offsets;
+  offset_type cur_offset;
+  const char* data;
+  int64_t position;
+  explicit ArrayIterator(const ArrayData& arr)
+      : arr(arr),
+        offsets(reinterpret_cast<const offset_type*>(arr.buffers[1]->data()) +
+                arr.offset),
+        cur_offset(offsets[0]),
+        data(reinterpret_cast<const char*>(arr.buffers[2]->data())),
+        position(0) {}
+
+  util::string_view operator()() {
+    offset_type next_offset = offsets[position++ + 1];
+    auto result = util::string_view(data + cur_offset, next_offset - 
cur_offset);
+    cur_offset = next_offset;
+    return result;
+  }

Review comment:
       I don't think micro-benchmarks are useful here, as actual performance 
will depend on overall optimization of the calling function by the compiler.
   
   In any case, I don't dispute the numbers. But it also seems the improvements 
would be mostly compiler-dependent.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to