pitrou commented on code in PR #43302:
URL: https://github.com/apache/arrow/pull/43302#discussion_r1705205069
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -305,19 +310,198 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
ctx, input, out->array_data().get());
}
+// View -> Span
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value &&
is_base_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
+ using OutputBuilderType = typename TypeTraits<O>::BuilderType;
+ const CastOptions& options = checked_cast<const
CastState&>(*ctx->state()).options;
+ const ArraySpan& input = batch[0].array;
+
+ if constexpr (!I::is_utf8 && O::is_utf8) {
+ if (!options.allow_invalid_utf8) {
+ InitializeUTF8();
+ ArraySpanVisitor<I> visitor;
+ Utf8Validator validator;
+ RETURN_NOT_OK(visitor.Visit(input, &validator));
+ }
+ }
+
+ // XXX: a more efficient implementation that zero-copies the validity bitmap
+ // is possible, but requires a more complex implementation for building the
+ // offsets and data buffers
+ OutputBuilderType builder(options.to_type.GetSharedPtr(),
ctx->memory_pool());
+ RETURN_NOT_OK(builder.Resize(input.length));
Review Comment:
"visiting the whole array to sum the sizes" sounds fine to me (I think we do
it in other places as well), it should be beneficial especially if the input is
large.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]