lidavidm commented on a change in pull request #12248:
URL: https://github.com/apache/arrow/pull/12248#discussion_r798580395
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,73 @@ void AddListCast(CastFunction* func) {
DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
}
+struct CastStruct {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const CastOptions& options = CastState::Get(ctx);
+ const auto in_field_count =
+ checked_cast<const StructType&>(*batch[0].type()).num_fields();
+ const auto out_field_count =
+ checked_cast<const StructType&>(*out->type()).num_fields();
+
+ if (in_field_count != out_field_count) {
+ ARROW_RETURN_NOT_OK(
+ Status(StatusCode::TypeError, "struct field sizes do not match"));
+ }
+
+ for (int64_t i = 0; i < in_field_count; ++i) {
+ const auto in_field_name =
+ checked_cast<const StructType&>(*batch[0].type()).field(i)->name();
+ const auto out_field_name =
+ checked_cast<const StructType&>(*out->type()).field(i)->name();
+ if (in_field_name != out_field_name) {
+ ARROW_RETURN_NOT_OK(
+ Status(StatusCode::TypeError, "struct field names do not match"));
+ }
+ }
+
+ if (out->kind() == Datum::SCALAR) {
+ const auto& in_scalar = checked_cast<const
StructScalar&>(*batch[0].scalar());
+ auto out_scalar = checked_cast<StructScalar*>(out->scalar().get());
+
+ for (int64_t i = 0; i < in_field_count; i++) {
+ auto values = in_scalar.value[i];
+ auto target_type = out->type()->field(i)->type();
+ ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+ Cast(values, target_type, options,
ctx->exec_context()));
+ DCHECK_EQ(Datum::SCALAR, cast_values.kind());
+ out_scalar->value.push_back(cast_values.scalar());
+ }
+
+ out_scalar->is_valid = true;
+ return Status::OK();
+ }
+
+ const ArrayData& in_array = *batch[0].array();
+ ArrayData* out_array = out->mutable_array();
+
+ for (int64_t i = 0; i < in_field_count; ++i) {
+ auto values = in_array.child_data[0];
Review comment:
Now there are test cases failing, I believe due to this case.
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,76 @@ void AddListCast(CastFunction* func) {
DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
}
+struct CastStruct {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const CastOptions& options = CastState::Get(ctx);
+ const auto in_field_count =
+ checked_cast<const StructType&>(*batch[0].type()).num_fields();
+ const auto out_field_count =
+ checked_cast<const StructType&>(*out->type()).num_fields();
+
+ if (in_field_count != out_field_count) {
+ return Status::TypeError("struct field sizes do not match: ",
in_field_count,
+ " and ", out_field_count);
+ }
Review comment:
Sorry, looking at the output:
```
[ RUN ] ScanNode.MaterializationOfNestedVirtualColumn
/Users/runner/work/arrow/arrow/cpp/src/arrow/dataset/scanner_test.cc:1631:
Failure
Value of: _st.IsNotImplemented()
Actual: false
Expected: true
Expected 'fut.status()' to fail with NotImplemented, but got Type error:
struct field sizes do not match: 1 and 2
/Users/runner/work/arrow/arrow/cpp/src/arrow/dataset/scanner_test.cc:1631:
Failure
Value of: _st.ToString()
Expected: has substring "Unsupported cast from struct<e: int64> to struct"
Actual: "Type error: struct field sizes do not match: 1 and 2"
[ FAILED ] ScanNode.MaterializationOfNestedVirtualColumn (3 ms)
```
including the stringified types would probably be more helpful than the
field count after all
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2217,6 +2217,78 @@ TEST(Cast, ListToListOptionsPassthru) {
}
}
+static void CheckStructToStruct(
+ const std::vector<std::shared_ptr<DataType>>& value_types) {
+ for (const auto& src_value_type : value_types) {
+ for (const auto& dest_value_type : value_types) {
+ std::vector<std::string> field_names = {"a"};
+ std::shared_ptr<Array> a1, b1, a2, b2;
+ a1 = ArrayFromJSON(src_value_type, "[1, 2]");
+ a2 = ArrayFromJSON(dest_value_type, "[1, 2]");
Review comment:
We should make these arrays longer so that CheckCast/CheckScalar will
actually test sliced inputs.
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,76 @@ void AddListCast(CastFunction* func) {
DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
}
+struct CastStruct {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const CastOptions& options = CastState::Get(ctx);
+ const auto in_field_count =
+ checked_cast<const StructType&>(*batch[0].type()).num_fields();
+ const auto out_field_count =
+ checked_cast<const StructType&>(*out->type()).num_fields();
+
+ if (in_field_count != out_field_count) {
+ return Status::TypeError("struct field sizes do not match: ",
in_field_count,
+ " and ", out_field_count);
+ }
Review comment:
Also, that test needs adjusting^: we can just change it to expect
TypeError instead of NotImplemented (we can implement the full path later)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]