jorisvandenbossche commented on code in PR #14395: URL: https://github.com/apache/arrow/pull/14395#discussion_r1013254988
########## cpp/src/arrow/compute/kernels/scalar_nested.cc: ########## @@ -87,6 +89,195 @@ Status GetListElementIndex(const ExecValue& value, T* out) { return Status::OK(); } +template <typename Type, typename IndexType> +struct ListSlice { + using offset_type = typename Type::offset_type; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx); + + // Invariants + if (!opts.stop.has_value()) { + // TODO: Support slicing to arbitrary end + // For variable size list, this would be the largest difference in offsets + // For fixed size list, this would be the fixed size. + return Status::NotImplemented( + "Slicing to end not yet implemented, please set `stop` parameter."); + } + if (opts.start < 0 || opts.start >= opts.stop.value()) { + // TODO: support start == stop which should give empty lists + return Status::Invalid("`start`(", opts.start, + ") should be greater than 0 and smaller than `stop`(", + opts.stop, ")"); + } + if (opts.step != 1) { + // TODO: support step in slicing + return Status::NotImplemented( + "Setting `step` to anything other than 1 is not supported; got step=", + opts.step); + } + + const ArraySpan& list_ = batch[0].array; + const Type* list_type = checked_cast<const Type*>(list_.type); + const auto value_type = list_type->value_type(); + + std::unique_ptr<ArrayBuilder> builder; + + // construct array values + if (opts.return_fixed_size_list) { + RETURN_NOT_OK(MakeBuilder( + ctx->memory_pool(), + fixed_size_list(value_type, + static_cast<int32_t>(opts.stop.value() - opts.start)), + &builder)); + RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + if constexpr (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder)); + } else { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), &builder)); Review Comment: Instead of creating a (large_)list type, can we use the original one (`list_type` defined above)? That would then preserve the field names? ########## cpp/src/arrow/compute/kernels/scalar_nested.cc: ########## @@ -87,6 +89,195 @@ Status GetListElementIndex(const ExecValue& value, T* out) { return Status::OK(); } +template <typename Type, typename IndexType> +struct ListSlice { + using offset_type = typename Type::offset_type; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx); + + // Invariants + if (!opts.stop.has_value()) { + // TODO: Support slicing to arbitrary end + // For variable size list, this would be the largest difference in offsets + // For fixed size list, this would be the fixed size. + return Status::NotImplemented( + "Slicing to end not yet implemented, please set `stop` parameter."); + } + if (opts.start < 0 || opts.start >= opts.stop.value()) { + // TODO: support start == stop which should give empty lists + return Status::Invalid("`start`(", opts.start, + ") should be greater than 0 and smaller than `stop`(", + opts.stop, ")"); + } + if (opts.step != 1) { + // TODO: support step in slicing + return Status::NotImplemented( + "Setting `step` to anything other than 1 is not supported; got step=", + opts.step); + } + + const ArraySpan& list_ = batch[0].array; + const Type* list_type = checked_cast<const Type*>(list_.type); + const auto value_type = list_type->value_type(); + + std::unique_ptr<ArrayBuilder> builder; + + // construct array values + if (opts.return_fixed_size_list) { + RETURN_NOT_OK(MakeBuilder( + ctx->memory_pool(), + fixed_size_list(value_type, + static_cast<int32_t>(opts.stop.value() - opts.start)), + &builder)); + RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + if constexpr (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder)); + } else { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder)); + } + } + + // build output arrays and set result + ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish()); + out->value = std::move(result->data()); + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts, + ArrayBuilder& builder) { + if constexpr (std::is_same_v<Type, FixedSizeListType>) { + RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, builder)); + } else { + RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder)); + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const auto list_size = + checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size(); + const ArraySpan& list_ = batch[0].array; + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + auto offset = (i + list_.offset) * list_size; + auto next_offset = offset + list_size; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const ArraySpan& list_ = batch[0].array; + const offset_type* offsets = list_.GetValues<offset_type>(1); + + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + const offset_type offset = offsets[i]; + const offset_type next_offset = offsets[i + 1]; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + template <typename BuilderType> + static Status SetValues(BuilderType* list_builder, const offset_type offset, + const offset_type next_offset, const ListSliceOptions* opts, + const ArraySpan* list_values) { + auto value_builder = list_builder->value_builder(); + auto cursor = offset; + + RETURN_NOT_OK(list_builder->Append()); + while (cursor < offset + (opts->stop.value() - opts->start)) { + if (cursor + opts->start >= next_offset) { + if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) { + break; // don't pad nulls for variable sized list output + } + RETURN_NOT_OK(value_builder->AppendNull()); + } else { + RETURN_NOT_OK( + value_builder->AppendArraySlice(*list_values, cursor + opts->start, 1)); + } + ++cursor; + } + return Status::OK(); + } +}; + +Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx, + const std::vector<TypeHolder>& types) { + const auto start = OptionsWrapper<ListSliceOptions>::Get(ctx).start; + const auto stop = OptionsWrapper<ListSliceOptions>::Get(ctx).stop; + const auto return_fixed_size_list = + OptionsWrapper<ListSliceOptions>::Get(ctx).return_fixed_size_list; + const auto list_type = checked_cast<const BaseListType*>(types[0].type); + if (return_fixed_size_list) { + if (!stop.has_value()) { + return Status::NotImplemented( + "Unable to produce FixedSizeListArray without `stop` being set."); + } + return TypeHolder(fixed_size_list(list_type->value_type(), + static_cast<int32_t>(stop.value() - start))); + } else { + // Returning large list if that's what we got in and didn't ask for fixed size + if (list_type->id() == Type::LARGE_LIST) { + return list_type; + } + return TypeHolder(list(list_type->value_type())); + } +} + +template <typename InListType, template <typename...> class Functor> +void AddListSliceKernels(ScalarFunction* func) { + for (const auto& index_type : IntTypes()) { + auto inputs = {InputType(InListType::type_id)}; + auto output = OutputType{MakeListSliceResolve}; + auto scalar_exec = GenerateInteger<Functor, InListType>({index_type->id()}); + ScalarKernel kernel(inputs, output, std::move(scalar_exec), + OptionsWrapper<ListSliceOptions>::Init); + kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + DCHECK_OK(func->AddKernel(std::move(kernel))); + } +} + +void AddListSliceKernels(ScalarFunction* func) { + AddListSliceKernels<ListType, ListSlice>(func); + AddListSliceKernels<LargeListType, ListSlice>(func); + AddListSliceKernels<FixedSizeListType, ListSlice>(func); +} + +const FunctionDoc list_slice_doc( + "Slice list into a subset", + ("`lists` must have a list-like type.\n" + "Return subset array of the list, as either a variable\n" Review Comment: Maybe something like "For each list value of `lists`, compute a slice and return as a new list array (either a variable or fixed size list array depending on the options)"? ########## python/pyarrow/_compute.pyx: ########## @@ -1168,6 +1168,42 @@ class SliceOptions(_SliceOptions): self._set_options(start, stop, step) +cdef class _ListSliceOptions(FunctionOptions): + cpdef _set_options(self, start, stop=None, step=1, return_fixed_size_list=True): + cdef: + CListSliceOptions* opts + if stop is None: + opts = new CListSliceOptions(start, + <optional[int64_t]>nullopt, + step, return_fixed_size_list) + else: + opts = new CListSliceOptions(start, + <optional[int64_t]>(<int64_t>stop), + step, return_fixed_size_list) + self.wrapped.reset(opts) + + +class ListSliceOptions(_ListSliceOptions): + """ + Options for list array slicing. + + Parameters + ---------- + start : int + Index to start slicing inner list elements (inclusive). + stop : Optional[int], default None + If given, index to stop slicing at (exclusive). + If not given, slicing will stop at the end. (NotImplemented) + step : int, default 1 + Slice step. + return_fixed_size_list : bool, default True + Whether to return a FixedSizeListArray. Review Comment: We should also include here the " If stop is after a list element's length, nulls will be appended to the requested slice size." as you have in the C++ docstring ########## cpp/src/arrow/compute/kernels/scalar_nested.cc: ########## @@ -87,6 +89,195 @@ Status GetListElementIndex(const ExecValue& value, T* out) { return Status::OK(); } +template <typename Type, typename IndexType> +struct ListSlice { + using offset_type = typename Type::offset_type; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx); + + // Invariants + if (!opts.stop.has_value()) { + // TODO: Support slicing to arbitrary end + // For variable size list, this would be the largest difference in offsets + // For fixed size list, this would be the fixed size. + return Status::NotImplemented( + "Slicing to end not yet implemented, please set `stop` parameter."); + } + if (opts.start < 0 || opts.start >= opts.stop.value()) { + // TODO: support start == stop which should give empty lists + return Status::Invalid("`start`(", opts.start, + ") should be greater than 0 and smaller than `stop`(", + opts.stop, ")"); + } + if (opts.step != 1) { + // TODO: support step in slicing + return Status::NotImplemented( + "Setting `step` to anything other than 1 is not supported; got step=", + opts.step); + } + + const ArraySpan& list_ = batch[0].array; + const Type* list_type = checked_cast<const Type*>(list_.type); + const auto value_type = list_type->value_type(); + + std::unique_ptr<ArrayBuilder> builder; + + // construct array values + if (opts.return_fixed_size_list) { + RETURN_NOT_OK(MakeBuilder( + ctx->memory_pool(), + fixed_size_list(value_type, + static_cast<int32_t>(opts.stop.value() - opts.start)), + &builder)); + RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + if constexpr (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder)); + } else { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder)); + } + } + + // build output arrays and set result + ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish()); + out->value = std::move(result->data()); + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts, + ArrayBuilder& builder) { + if constexpr (std::is_same_v<Type, FixedSizeListType>) { + RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, builder)); + } else { + RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder)); + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const auto list_size = + checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size(); + const ArraySpan& list_ = batch[0].array; + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + auto offset = (i + list_.offset) * list_size; + auto next_offset = offset + list_size; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const ArraySpan& list_ = batch[0].array; + const offset_type* offsets = list_.GetValues<offset_type>(1); + + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + const offset_type offset = offsets[i]; + const offset_type next_offset = offsets[i + 1]; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + template <typename BuilderType> + static Status SetValues(BuilderType* list_builder, const offset_type offset, + const offset_type next_offset, const ListSliceOptions* opts, + const ArraySpan* list_values) { + auto value_builder = list_builder->value_builder(); + auto cursor = offset; + + RETURN_NOT_OK(list_builder->Append()); + while (cursor < offset + (opts->stop.value() - opts->start)) { + if (cursor + opts->start >= next_offset) { + if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) { + break; // don't pad nulls for variable sized list output + } + RETURN_NOT_OK(value_builder->AppendNull()); + } else { + RETURN_NOT_OK( + value_builder->AppendArraySlice(*list_values, cursor + opts->start, 1)); + } + ++cursor; + } + return Status::OK(); + } +}; + +Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx, + const std::vector<TypeHolder>& types) { + const auto start = OptionsWrapper<ListSliceOptions>::Get(ctx).start; + const auto stop = OptionsWrapper<ListSliceOptions>::Get(ctx).stop; + const auto return_fixed_size_list = + OptionsWrapper<ListSliceOptions>::Get(ctx).return_fixed_size_list; + const auto list_type = checked_cast<const BaseListType*>(types[0].type); + if (return_fixed_size_list) { + if (!stop.has_value()) { + return Status::NotImplemented( + "Unable to produce FixedSizeListArray without `stop` being set."); + } + return TypeHolder(fixed_size_list(list_type->value_type(), + static_cast<int32_t>(stop.value() - start))); + } else { + // Returning large list if that's what we got in and didn't ask for fixed size + if (list_type->id() == Type::LARGE_LIST) { + return list_type; + } + return TypeHolder(list(list_type->value_type())); + } +} + +template <typename InListType, template <typename...> class Functor> +void AddListSliceKernels(ScalarFunction* func) { + for (const auto& index_type : IntTypes()) { + auto inputs = {InputType(InListType::type_id)}; + auto output = OutputType{MakeListSliceResolve}; + auto scalar_exec = GenerateInteger<Functor, InListType>({index_type->id()}); + ScalarKernel kernel(inputs, output, std::move(scalar_exec), + OptionsWrapper<ListSliceOptions>::Init); + kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + DCHECK_OK(func->AddKernel(std::move(kernel))); + } +} + +void AddListSliceKernels(ScalarFunction* func) { + AddListSliceKernels<ListType, ListSlice>(func); + AddListSliceKernels<LargeListType, ListSlice>(func); + AddListSliceKernels<FixedSizeListType, ListSlice>(func); +} + +const FunctionDoc list_slice_doc( + "Slice list into a subset", Review Comment: I find the "into a subset" not super clear (thinking about a better summary ..) ########## cpp/src/arrow/compute/api_scalar.h: ########## @@ -346,6 +347,23 @@ class ARROW_EXPORT SliceOptions : public FunctionOptions { int64_t start, stop, step; }; +class ARROW_EXPORT ListSliceOptions : public FunctionOptions { + public: + explicit ListSliceOptions(int64_t start, std::optional<int64_t> stop = std::nullopt, + int64_t step = 1, bool return_fixed_size_list = true); + ListSliceOptions(); + static constexpr char const kTypeName[] = "ListSliceOptions"; + /// The start of list slicing. + int64_t start; + /// Optional stop of list slicing. If not set, then slice to end. (NotImplemented) + std::optional<int64_t> stop; + /// Slicing step + int64_t step; + /// Whether to return a FixedSizeListArray. If stop is after a list element's length, + /// nulls will be appended to the requested slice size. Review Comment: the "appending nulls" is only in case of FixedSizeList output? We should mention somewhere that this behaviour for variable size list is different ########## cpp/src/arrow/compute/kernels/scalar_nested.cc: ########## @@ -87,6 +89,195 @@ Status GetListElementIndex(const ExecValue& value, T* out) { return Status::OK(); } +template <typename Type, typename IndexType> +struct ListSlice { + using offset_type = typename Type::offset_type; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx); + + // Invariants + if (!opts.stop.has_value()) { + // TODO: Support slicing to arbitrary end + // For variable size list, this would be the largest difference in offsets + // For fixed size list, this would be the fixed size. + return Status::NotImplemented( + "Slicing to end not yet implemented, please set `stop` parameter."); + } + if (opts.start < 0 || opts.start >= opts.stop.value()) { + // TODO: support start == stop which should give empty lists + return Status::Invalid("`start`(", opts.start, + ") should be greater than 0 and smaller than `stop`(", + opts.stop, ")"); + } + if (opts.step != 1) { + // TODO: support step in slicing + return Status::NotImplemented( + "Setting `step` to anything other than 1 is not supported; got step=", + opts.step); + } + + const ArraySpan& list_ = batch[0].array; + const Type* list_type = checked_cast<const Type*>(list_.type); + const auto value_type = list_type->value_type(); + + std::unique_ptr<ArrayBuilder> builder; + + // construct array values + if (opts.return_fixed_size_list) { + RETURN_NOT_OK(MakeBuilder( + ctx->memory_pool(), + fixed_size_list(value_type, + static_cast<int32_t>(opts.stop.value() - opts.start)), + &builder)); + RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + if constexpr (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder)); + } else { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), &builder)); + RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder)); + } + } + + // build output arrays and set result + ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish()); + out->value = std::move(result->data()); + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts, + ArrayBuilder& builder) { + if constexpr (std::is_same_v<Type, FixedSizeListType>) { + RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, builder)); + } else { + RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder)); + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const auto list_size = + checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size(); + const ArraySpan& list_ = batch[0].array; + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + auto offset = (i + list_.offset) * list_size; + auto next_offset = offset + list_size; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + + template <typename BuilderType> + static Status BuildArrayFromListType(const ExecSpan& batch, + const ListSliceOptions& opts, + ArrayBuilder& builder) { + const ArraySpan& list_ = batch[0].array; + const offset_type* offsets = list_.GetValues<offset_type>(1); + + const ArraySpan& list_values = list_.child_data[0]; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (auto i = 0; i < list_.length; ++i) { + const offset_type offset = offsets[i]; + const offset_type next_offset = offsets[i + 1]; + if (list_.IsNull(i)) { + RETURN_NOT_OK(list_builder->AppendNull()); + } else { + RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, next_offset, &opts, + &list_values)); + } + } + return Status::OK(); + } + template <typename BuilderType> + static Status SetValues(BuilderType* list_builder, const offset_type offset, + const offset_type next_offset, const ListSliceOptions* opts, + const ArraySpan* list_values) { + auto value_builder = list_builder->value_builder(); + auto cursor = offset; + + RETURN_NOT_OK(list_builder->Append()); + while (cursor < offset + (opts->stop.value() - opts->start)) { + if (cursor + opts->start >= next_offset) { + if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) { + break; // don't pad nulls for variable sized list output + } + RETURN_NOT_OK(value_builder->AppendNull()); + } else { + RETURN_NOT_OK( + value_builder->AppendArraySlice(*list_values, cursor + opts->start, 1)); + } + ++cursor; + } + return Status::OK(); + } +}; + +Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx, + const std::vector<TypeHolder>& types) { + const auto start = OptionsWrapper<ListSliceOptions>::Get(ctx).start; Review Comment: Small nit for readability, maybe do `const auto& options = OptionsWrapper<ListSliceOptions>::Get(ctx);` and then use `options` for the three times we use this? ########## cpp/src/arrow/compute/kernels/scalar_nested_test.cc: ########## @@ -116,6 +117,111 @@ TEST(TestScalarNested, ListElementInvalid) { Raises(StatusCode::Invalid)); } +TEST(TestScalarNested, ListSliceVariableOutput) { + const auto value_types = {float32(), int32()}; + for (auto value_type : value_types) { + /* Variable list size output required variable size list input. */ + auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6], null]")}; + for (auto input : inputs) { + ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1, + /*return_fixed_size_list=*/false); + auto expected = ArrayFromJSON(list(value_type), "[[1, 2], [4, 5], [6], null]"); + CheckScalarUnary("list_slice", input, expected, &args); + + args.start = 1; + expected = ArrayFromJSON(list(value_type), "[[2], [5], [], null]"); + CheckScalarUnary("list_slice", input, expected, &args); + + args.start = 2; + args.stop = 4; + expected = ArrayFromJSON(list(value_type), "[[3], [], [], null]"); + CheckScalarUnary("list_slice", input, expected, &args); + } + } + + // Verify passing `return_fixed_size_list=false` with fixed size input + // returns variable size even if stop is beyond list_size + ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1, + /*return_fixed_size_list=*/false); + auto input = ArrayFromJSON(fixed_size_list(int32(), 1), "[[1]]"); + auto expected = ArrayFromJSON(list(int32()), "[[1]]"); Review Comment: Do we need to support this? I find it a bit strange that someone would ever want to slice an already fixed size list array (so slicing will always give list elements of all the same size) as a non-fixed size list. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org