lidavidm commented on a change in pull request #12724: URL: https://github.com/apache/arrow/pull/12724#discussion_r836869051
########## File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc ########## @@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) { struct CastStruct { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { const CastOptions& options = CastState::Get(ctx); - const StructType& in_type = checked_cast<const StructType&>(*batch[0].type()); - const StructType& out_type = checked_cast<const StructType&>(*out->type()); - const auto in_field_count = in_type.num_fields(); - - if (in_field_count != out_type.num_fields()) { - return Status::TypeError("struct field sizes do not match: ", in_type.ToString(), - " ", out_type.ToString()); - } - - for (int i = 0; i < in_field_count; ++i) { - const auto in_field = in_type.field(i); - const auto out_field = out_type.field(i); - if (in_field->name() != out_field->name()) { - return Status::TypeError("struct field names do not match: ", in_type.ToString(), - " ", out_type.ToString()); + const auto& in_type = checked_cast<const StructType&>(*batch[0].type()); + const auto& out_type = checked_cast<const StructType&>(*out->type()); + const int in_field_count = in_type.num_fields(); + const int out_field_count = out_type.num_fields(); + + std::vector<int> fields_to_select(out_field_count, -1); + + int out_field_index = 0; + for (int in_field_index = 0; + in_field_index < in_field_count && out_field_index < out_field_count; + ++in_field_index) { + const auto in_field = in_type.field(in_field_index); + const auto out_field = out_type.field(out_field_index); + if (in_field->name() == out_field->name()) { + if (in_field->nullable() && !out_field->nullable()) { + return Status::TypeError("cannot cast nullable struct to non-nullable struct: ", + in_type.ToString(), " ", out_type.ToString()); + } + fields_to_select[out_field_index++] = in_field_index; } + } - if (in_field->nullable() && !out_field->nullable()) { - return Status::TypeError("cannot cast nullable struct to non-nullable struct: ", - in_type.ToString(), " ", out_type.ToString()); - } + if (out_field_index < out_field_count) { + return Status::TypeError( + "struct (sub)fields don't match or are in the wrong order: Input fields: ", Review comment: nit, but we can just say "struct fields" ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc ########## @@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) { struct CastStruct { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { const CastOptions& options = CastState::Get(ctx); - const StructType& in_type = checked_cast<const StructType&>(*batch[0].type()); - const StructType& out_type = checked_cast<const StructType&>(*out->type()); - const auto in_field_count = in_type.num_fields(); - - if (in_field_count != out_type.num_fields()) { - return Status::TypeError("struct field sizes do not match: ", in_type.ToString(), - " ", out_type.ToString()); - } - - for (int i = 0; i < in_field_count; ++i) { - const auto in_field = in_type.field(i); - const auto out_field = out_type.field(i); - if (in_field->name() != out_field->name()) { - return Status::TypeError("struct field names do not match: ", in_type.ToString(), - " ", out_type.ToString()); + const auto& in_type = checked_cast<const StructType&>(*batch[0].type()); + const auto& out_type = checked_cast<const StructType&>(*out->type()); + const int in_field_count = in_type.num_fields(); + const int out_field_count = out_type.num_fields(); + + std::vector<int> fields_to_select(out_field_count, -1); + + int out_field_index = 0; + for (int in_field_index = 0; + in_field_index < in_field_count && out_field_index < out_field_count; + ++in_field_index) { + const auto in_field = in_type.field(in_field_index); + const auto out_field = out_type.field(out_field_index); + if (in_field->name() == out_field->name()) { + if (in_field->nullable() && !out_field->nullable()) { + return Status::TypeError("cannot cast nullable struct to non-nullable struct: ", Review comment: ```suggestion return Status::TypeError("cannot cast nullable field to non-nullable field: ", ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2245,10 +2245,132 @@ static void CheckStructToStruct( } } -TEST(Cast, StructToSameSizedAndNamedStruct) { - CheckStructToStruct({int32(), float32(), int64()}); +static void CheckStructToStructSubset( + const std::vector<std::shared_ptr<DataType>>& value_types) { + for (const auto& src_value_type : value_types) { + for (const auto& dest_value_type : value_types) { + std::vector<std::string> field_names = {"a", "b", "c", "d", "e"}; + + std::shared_ptr<Array> a1, b1, c1, d1, e1; + a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]"); + b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]"); + c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]"); + d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]"); + e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]"); + + std::shared_ptr<Array> a2, b2, c2, d2, e2; + a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]"); + b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]"); + c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]"); + d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]"); + e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]"); + + ASSERT_OK_AND_ASSIGN(auto src, + StructArray::Make({a1, b1, c1, d1, e1}, field_names)); + ASSERT_OK_AND_ASSIGN( + auto dest1, StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"})); + CheckCast(src, dest1); + + ASSERT_OK_AND_ASSIGN( + auto dest2, StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"})); + CheckCast(src, dest2); + + ASSERT_OK_AND_ASSIGN( + auto dest3, StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"})); + CheckCast(src, dest3); + + ASSERT_OK_AND_ASSIGN( + auto dest4, + StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", "e"})); + CheckCast(src, dest4); + + ASSERT_OK_AND_ASSIGN( + auto dest5, + StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", "e"})); + CheckCast(src, dest5); + + ASSERT_OK_AND_ASSIGN( + auto dest6, StructArray::Make({a2, b2, c2, e2}, + std::vector<std::string>{"a", "b", "c", "e"})); + CheckCast(src, dest6); + + ASSERT_OK_AND_ASSIGN( + auto dest7, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c", "d", "e"})); + CheckCast(src, dest7); + + const auto dest8 = arrow::struct_({std::make_shared<Field>("a", int8()), + std::make_shared<Field>("d", int16()), + std::make_shared<Field>("f", int64())}); + const auto options = CastOptions::Safe(dest8); + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr( + "struct (sub)fields don't match or are in the wrong order"), + Cast(src, options)); + + // With nulls + std::shared_ptr<Buffer> null_bitmap; + BitmapFromVector<int>({0, 1, 0}, &null_bitmap); + + ASSERT_OK_AND_ASSIGN(auto src_null, StructArray::Make({a1, b1, c1, d1, e1}, + field_names, null_bitmap)); + ASSERT_OK_AND_ASSIGN( + auto dest1_null, + StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"}, null_bitmap)); + CheckCast(src_null, dest1_null); + + ASSERT_OK_AND_ASSIGN( + auto dest2_null, + StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"}, null_bitmap)); + CheckCast(src_null, dest2_null); + + ASSERT_OK_AND_ASSIGN( + auto dest3_null, + StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"}, null_bitmap)); + CheckCast(src_null, dest3_null); + + ASSERT_OK_AND_ASSIGN( + auto dest4_null, + StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", "e"}, + null_bitmap)); + CheckCast(src_null, dest4_null); + + ASSERT_OK_AND_ASSIGN( + auto dest5_null, + StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", "e"}, + null_bitmap)); + CheckCast(src_null, dest5_null); + + ASSERT_OK_AND_ASSIGN( + auto dest6_null, + StructArray::Make({a2, b2, c2, e2}, + std::vector<std::string>{"a", "b", "c", "e"}, null_bitmap)); + CheckCast(src_null, dest6_null); + + ASSERT_OK_AND_ASSIGN( + auto dest7_null, + StructArray::Make({a2, b2, c2, d2, e2}, + std::vector<std::string>{"a", "b", "c", "d", "e"}, + null_bitmap)); + CheckCast(src_null, dest7_null); + + const auto dest8_null = arrow::struct_({std::make_shared<Field>("a", int8()), + std::make_shared<Field>("d", int16()), + std::make_shared<Field>("f", int64())}); + const auto options_null = CastOptions::Safe(dest8_null); + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr( + "struct (sub)fields don't match or are in the wrong order"), + Cast(src_null, options_null)); Review comment: Can we also add a case or two dealing with duplicate field names on either side? (It looks like the implementation will handle this sensibly.) ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2245,10 +2245,132 @@ static void CheckStructToStruct( } } -TEST(Cast, StructToSameSizedAndNamedStruct) { - CheckStructToStruct({int32(), float32(), int64()}); +static void CheckStructToStructSubset( + const std::vector<std::shared_ptr<DataType>>& value_types) { + for (const auto& src_value_type : value_types) { Review comment: and ditto for the destination type ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2262,12 +2384,11 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) { EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, - ::testing::HasSubstr("Type error: struct field names do not match: struct<a: int8, " - "b: int8> struct<c: int8, d: int8>"), + ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong order"), Cast(src, options)); } -TEST(Cast, StructToDifferentSizeStruct) { +TEST(Cast, StructToLargerSizeStruct) { Review comment: just a nit ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2245,10 +2245,132 @@ static void CheckStructToStruct( } } -TEST(Cast, StructToSameSizedAndNamedStruct) { - CheckStructToStruct({int32(), float32(), int64()}); +static void CheckStructToStructSubset( + const std::vector<std::shared_ptr<DataType>>& value_types) { + for (const auto& src_value_type : value_types) { Review comment: ```suggestion for (const auto& src_value_type : value_types) { ARROW_SCOPED_TRACE("From type: ", src_value_type->ToString()); ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2245,10 +2245,132 @@ static void CheckStructToStruct( } } -TEST(Cast, StructToSameSizedAndNamedStruct) { - CheckStructToStruct({int32(), float32(), int64()}); +static void CheckStructToStructSubset( + const std::vector<std::shared_ptr<DataType>>& value_types) { + for (const auto& src_value_type : value_types) { + for (const auto& dest_value_type : value_types) { + std::vector<std::string> field_names = {"a", "b", "c", "d", "e"}; + + std::shared_ptr<Array> a1, b1, c1, d1, e1; + a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]"); + b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]"); + c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]"); + d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]"); + e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]"); + + std::shared_ptr<Array> a2, b2, c2, d2, e2; + a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]"); + b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]"); + c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]"); + d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]"); + e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]"); + + ASSERT_OK_AND_ASSIGN(auto src, + StructArray::Make({a1, b1, c1, d1, e1}, field_names)); + ASSERT_OK_AND_ASSIGN( Review comment: I think some of these cases are a little redundant. ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2245,10 +2245,132 @@ static void CheckStructToStruct( } } -TEST(Cast, StructToSameSizedAndNamedStruct) { - CheckStructToStruct({int32(), float32(), int64()}); +static void CheckStructToStructSubset( + const std::vector<std::shared_ptr<DataType>>& value_types) { + for (const auto& src_value_type : value_types) { + for (const auto& dest_value_type : value_types) { + std::vector<std::string> field_names = {"a", "b", "c", "d", "e"}; + + std::shared_ptr<Array> a1, b1, c1, d1, e1; + a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]"); + b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]"); + c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]"); + d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]"); + e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]"); + + std::shared_ptr<Array> a2, b2, c2, d2, e2; + a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]"); + b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]"); + c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]"); + d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]"); + e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]"); + + ASSERT_OK_AND_ASSIGN(auto src, + StructArray::Make({a1, b1, c1, d1, e1}, field_names)); + ASSERT_OK_AND_ASSIGN( + auto dest1, StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"})); + CheckCast(src, dest1); + + ASSERT_OK_AND_ASSIGN( + auto dest2, StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"})); + CheckCast(src, dest2); + + ASSERT_OK_AND_ASSIGN( + auto dest3, StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"})); + CheckCast(src, dest3); + + ASSERT_OK_AND_ASSIGN( + auto dest4, + StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", "e"})); + CheckCast(src, dest4); + + ASSERT_OK_AND_ASSIGN( + auto dest5, + StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", "e"})); + CheckCast(src, dest5); + + ASSERT_OK_AND_ASSIGN( + auto dest6, StructArray::Make({a2, b2, c2, e2}, + std::vector<std::string>{"a", "b", "c", "e"})); + CheckCast(src, dest6); + + ASSERT_OK_AND_ASSIGN( + auto dest7, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c", "d", "e"})); + CheckCast(src, dest7); + + const auto dest8 = arrow::struct_({std::make_shared<Field>("a", int8()), + std::make_shared<Field>("d", int16()), + std::make_shared<Field>("f", int64())}); + const auto options = CastOptions::Safe(dest8); + EXPECT_RAISES_WITH_MESSAGE_THAT( + TypeError, + ::testing::HasSubstr( + "struct (sub)fields don't match or are in the wrong order"), + Cast(src, options)); + + // With nulls + std::shared_ptr<Buffer> null_bitmap; + BitmapFromVector<int>({0, 1, 0}, &null_bitmap); + + ASSERT_OK_AND_ASSIGN(auto src_null, StructArray::Make({a1, b1, c1, d1, e1}, + field_names, null_bitmap)); + ASSERT_OK_AND_ASSIGN( + auto dest1_null, + StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"}, null_bitmap)); + CheckCast(src_null, dest1_null); + + ASSERT_OK_AND_ASSIGN( + auto dest2_null, + StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"}, null_bitmap)); + CheckCast(src_null, dest2_null); + + ASSERT_OK_AND_ASSIGN( + auto dest3_null, + StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"}, null_bitmap)); + CheckCast(src_null, dest3_null); + + ASSERT_OK_AND_ASSIGN( + auto dest4_null, + StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", "e"}, + null_bitmap)); + CheckCast(src_null, dest4_null); + + ASSERT_OK_AND_ASSIGN( + auto dest5_null, + StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", "e"}, + null_bitmap)); + CheckCast(src_null, dest5_null); + + ASSERT_OK_AND_ASSIGN( + auto dest6_null, + StructArray::Make({a2, b2, c2, e2}, + std::vector<std::string>{"a", "b", "c", "e"}, null_bitmap)); + CheckCast(src_null, dest6_null); + + ASSERT_OK_AND_ASSIGN( + auto dest7_null, + StructArray::Make({a2, b2, c2, d2, e2}, + std::vector<std::string>{"a", "b", "c", "d", "e"}, + null_bitmap)); + CheckCast(src_null, dest7_null); + + const auto dest8_null = arrow::struct_({std::make_shared<Field>("a", int8()), + std::make_shared<Field>("d", int16()), + std::make_shared<Field>("f", int64())}); Review comment: Can we also add a case where the fields exist, but are reversed? ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2281,8 +2402,7 @@ TEST(Cast, StructToDifferentSizeStruct) { EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, - ::testing::HasSubstr("Type error: struct field sizes do not match: struct<a: int8, " - "b: int8> struct<a: int8, b: int8, c: int8>"), + ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong order"), Cast(src, options)); } Review comment: Can we extend the nullability test below with a couple cases as well? ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -2262,12 +2384,11 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) { EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, - ::testing::HasSubstr("Type error: struct field names do not match: struct<a: int8, " - "b: int8> struct<c: int8, d: int8>"), + ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong order"), Cast(src, options)); } -TEST(Cast, StructToDifferentSizeStruct) { +TEST(Cast, StructToLargerSizeStruct) { Review comment: ```suggestion TEST(Cast, StructToBiggerStruct) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org