jorisvandenbossche commented on code in PR #14395:
URL: https://github.com/apache/arrow/pull/14395#discussion_r1013856089


##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -87,6 +89,199 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
   return Status::OK();
 }
 
+template <typename Type, typename IndexType>
+struct ListSlice {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+
+    // Invariants
+    if (opts.start < 0 || (opts.start >= opts.stop && opts.stop != -1)) {
+      // TODO: support start == stop which should give empty lists
+      return Status::Invalid("`start`(", opts.start,
+                             ") should be greater than 0 and smaller than 
`stop`(",
+                             opts.stop, ")");
+    }
+    if (opts.step != 1) {
+      // TODO: support step in slicing
+      return Status::NotImplemented(
+          "Setting `step` to anything other than 1 is not supported; got 
step=",
+          opts.step);
+    }
+    if (opts.stop == -1) {
+      // TODO: Support slicing to arbitrary end
+      // For variable size list, this would be the largest difference in 
offsets
+      // For fixed size list, this would be the fixed size.
+      return Status::NotImplemented(
+          "Setting `stop==-1` to signify slicing to end, not yet 
implemented.");
+    }
+
+    const ArraySpan& list_ = batch[0].array;
+    const Type* list_type = checked_cast<const Type*>(list_.type);
+    const auto value_type = list_type->value_type();
+
+    std::unique_ptr<ArrayBuilder> builder;
+
+    // construct array values
+    if (opts.return_fixed_size_list) {
+      RETURN_NOT_OK(MakeBuilder(
+          ctx->memory_pool(),
+          fixed_size_list(value_type, static_cast<int32_t>(opts.stop - 
opts.start)),
+          &builder));
+      RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder));
+    } else {
+      if constexpr (std::is_same_v<Type, LargeListType>) {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder));
+      } else {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder));
+      }
+    }
+
+    // build output arrays and set result
+    ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+    out->value = result->data();
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts,
+                           ArrayBuilder& builder) {
+    if constexpr (std::is_same_v<Type, FixedSizeListType>) {
+      RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, 
builder));
+    } else {
+      RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder));
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch,
+                                                const ListSliceOptions& opts,
+                                                ArrayBuilder& builder) {
+    const auto list_size =
+        checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
+    const ArraySpan& list_ = batch[0].array;
+    const ArraySpan& list_values = list_.child_data[0];
+    const offset_type n_offsets = list_values.length / list_size;
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (offset_type offset = 0; offset < n_offsets * list_size;
+         offset = offset + list_size) {
+      auto next_offset = offset + list_size;
+      if (list_values.IsNull(offset)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromListType(const ExecSpan& batch,
+                                       const ListSliceOptions& opts,
+                                       ArrayBuilder& builder) {
+    const ArraySpan& list_ = batch[0].array;
+    const offset_type* offsets = list_.GetValues<offset_type>(1);
+    const auto n_offsets = static_cast<offset_type>(
+        static_cast<size_t>(list_.GetBuffer(1)->size()) / sizeof(offset_type));
+
+    // validity bitmap if set.
+    std::unique_ptr<arrow::internal::Bitmap> validity_bitmap;
+    if (list_.buffers[0].data != nullptr) {
+      arrow::internal::Bitmap bitmap{list_.buffers[0].data, list_.offset, 
list_.length};
+      validity_bitmap = std::make_unique<arrow::internal::Bitmap>(bitmap);
+    }
+    const ArraySpan& list_values = list_.child_data[0];
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (auto i = 0; i < n_offsets - 1; ++i) {
+      const offset_type offset = offsets[i];
+      const offset_type next_offset = offsets[i + 1];
+      if (validity_bitmap != nullptr && !validity_bitmap->GetBit(i)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+  template <typename BuilderType>
+  static Status SetValues(BuilderType* list_builder, const offset_type offset,
+                          const offset_type next_offset, const 
ListSliceOptions* opts,
+                          const ArraySpan* list_values) {
+    auto value_builder = list_builder->value_builder();
+    auto cursor = offset;
+
+    RETURN_NOT_OK(list_builder->Append());
+    while (cursor < offset + (opts->stop - opts->start)) {
+      if (cursor + opts->start >= next_offset) {
+        if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) {
+          break;  // don't pad nulls for variable sized list output
+        }
+        RETURN_NOT_OK(value_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(
+            value_builder->AppendArraySlice(*list_values, cursor + 
opts->start, 1));
+      }
+      ++cursor;
+    }
+    return Status::OK();
+  }
+};
+
+Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx,
+                                        const std::vector<TypeHolder>& types) {
+  const auto start = OptionsWrapper<ListSliceOptions>::Get(ctx).start;
+  const auto stop = OptionsWrapper<ListSliceOptions>::Get(ctx).stop;
+  const auto return_fixed_size_list =
+      OptionsWrapper<ListSliceOptions>::Get(ctx).return_fixed_size_list;
+  const auto list_type = checked_cast<const BaseListType*>(types[0].type);
+  if (return_fixed_size_list) {
+    return TypeHolder(
+        fixed_size_list(list_type->value_type(), static_cast<int32_t>(stop - 
start)));
+  } else {
+    // Returning large list if that's what we got in and didn't ask for fixed 
size
+    if (list_type->id() == Type::LARGE_LIST) {
+      return TypeHolder(large_list(list_type->value_type()));

Review Comment:
   See my comment on the test you added, it's the name of the list child field 
itself we need to test (and I think that will still require code changes to get 
that working)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to