lidavidm commented on a change in pull request #12724:
URL: https://github.com/apache/arrow/pull/12724#discussion_r836869051



##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) {
 struct CastStruct {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const CastOptions& options = CastState::Get(ctx);
-    const StructType& in_type = checked_cast<const 
StructType&>(*batch[0].type());
-    const StructType& out_type = checked_cast<const StructType&>(*out->type());
-    const auto in_field_count = in_type.num_fields();
-
-    if (in_field_count != out_type.num_fields()) {
-      return Status::TypeError("struct field sizes do not match: ", 
in_type.ToString(),
-                               " ", out_type.ToString());
-    }
-
-    for (int i = 0; i < in_field_count; ++i) {
-      const auto in_field = in_type.field(i);
-      const auto out_field = out_type.field(i);
-      if (in_field->name() != out_field->name()) {
-        return Status::TypeError("struct field names do not match: ", 
in_type.ToString(),
-                                 " ", out_type.ToString());
+    const auto& in_type = checked_cast<const StructType&>(*batch[0].type());
+    const auto& out_type = checked_cast<const StructType&>(*out->type());
+    const int in_field_count = in_type.num_fields();
+    const int out_field_count = out_type.num_fields();
+
+    std::vector<int> fields_to_select(out_field_count, -1);
+
+    int out_field_index = 0;
+    for (int in_field_index = 0;
+         in_field_index < in_field_count && out_field_index < out_field_count;
+         ++in_field_index) {
+      const auto in_field = in_type.field(in_field_index);
+      const auto out_field = out_type.field(out_field_index);
+      if (in_field->name() == out_field->name()) {
+        if (in_field->nullable() && !out_field->nullable()) {
+          return Status::TypeError("cannot cast nullable struct to 
non-nullable struct: ",
+                                   in_type.ToString(), " ", 
out_type.ToString());
+        }
+        fields_to_select[out_field_index++] = in_field_index;
       }
+    }
 
-      if (in_field->nullable() && !out_field->nullable()) {
-        return Status::TypeError("cannot cast nullable struct to non-nullable 
struct: ",
-                                 in_type.ToString(), " ", out_type.ToString());
-      }
+    if (out_field_index < out_field_count) {
+      return Status::TypeError(
+          "struct (sub)fields don't match or are in the wrong order: Input 
fields: ",

Review comment:
       nit, but we can just say "struct fields"

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) {
 struct CastStruct {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const CastOptions& options = CastState::Get(ctx);
-    const StructType& in_type = checked_cast<const 
StructType&>(*batch[0].type());
-    const StructType& out_type = checked_cast<const StructType&>(*out->type());
-    const auto in_field_count = in_type.num_fields();
-
-    if (in_field_count != out_type.num_fields()) {
-      return Status::TypeError("struct field sizes do not match: ", 
in_type.ToString(),
-                               " ", out_type.ToString());
-    }
-
-    for (int i = 0; i < in_field_count; ++i) {
-      const auto in_field = in_type.field(i);
-      const auto out_field = out_type.field(i);
-      if (in_field->name() != out_field->name()) {
-        return Status::TypeError("struct field names do not match: ", 
in_type.ToString(),
-                                 " ", out_type.ToString());
+    const auto& in_type = checked_cast<const StructType&>(*batch[0].type());
+    const auto& out_type = checked_cast<const StructType&>(*out->type());
+    const int in_field_count = in_type.num_fields();
+    const int out_field_count = out_type.num_fields();
+
+    std::vector<int> fields_to_select(out_field_count, -1);
+
+    int out_field_index = 0;
+    for (int in_field_index = 0;
+         in_field_index < in_field_count && out_field_index < out_field_count;
+         ++in_field_index) {
+      const auto in_field = in_type.field(in_field_index);
+      const auto out_field = out_type.field(out_field_index);
+      if (in_field->name() == out_field->name()) {
+        if (in_field->nullable() && !out_field->nullable()) {
+          return Status::TypeError("cannot cast nullable struct to 
non-nullable struct: ",

Review comment:
       ```suggestion
             return Status::TypeError("cannot cast nullable field to 
non-nullable field: ",
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,10 +2245,132 @@ static void CheckStructToStruct(
   }
 }
 
-TEST(Cast, StructToSameSizedAndNamedStruct) {
-  CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a", "b", "c", "d", "e"};
+
+      std::shared_ptr<Array> a1, b1, c1, d1, e1;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]");
+      b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]");
+      c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]");
+      d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]");
+      e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]");
+
+      std::shared_ptr<Array> a2, b2, c2, d2, e2;
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]");
+      b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]");
+      c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]");
+      d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]");
+      e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]");
+
+      ASSERT_OK_AND_ASSIGN(auto src,
+                           StructArray::Make({a1, b1, c1, d1, e1}, 
field_names));
+      ASSERT_OK_AND_ASSIGN(
+          auto dest1, StructArray::Make({a2, c2}, 
std::vector<std::string>{"a", "c"}));
+      CheckCast(src, dest1);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest2, StructArray::Make({b2, d2}, 
std::vector<std::string>{"b", "d"}));
+      CheckCast(src, dest2);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest3, StructArray::Make({c2, e2}, 
std::vector<std::string>{"c", "e"}));
+      CheckCast(src, dest3);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest4,
+          StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", 
"e"}));
+      CheckCast(src, dest4);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest5,
+          StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", 
"e"}));
+      CheckCast(src, dest5);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest6, StructArray::Make({a2, b2, c2, e2},
+                                        std::vector<std::string>{"a", "b", 
"c", "e"}));
+      CheckCast(src, dest6);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest7, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c", 
"d", "e"}));
+      CheckCast(src, dest7);
+
+      const auto dest8 = arrow::struct_({std::make_shared<Field>("a", int8()),
+                                         std::make_shared<Field>("d", int16()),
+                                         std::make_shared<Field>("f", 
int64())});
+      const auto options = CastOptions::Safe(dest8);
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          TypeError,
+          ::testing::HasSubstr(
+              "struct (sub)fields don't match or are in the wrong order"),
+          Cast(src, options));
+
+      // With nulls
+      std::shared_ptr<Buffer> null_bitmap;
+      BitmapFromVector<int>({0, 1, 0}, &null_bitmap);
+
+      ASSERT_OK_AND_ASSIGN(auto src_null, StructArray::Make({a1, b1, c1, d1, 
e1},
+                                                            field_names, 
null_bitmap));
+      ASSERT_OK_AND_ASSIGN(
+          auto dest1_null,
+          StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"}, 
null_bitmap));
+      CheckCast(src_null, dest1_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest2_null,
+          StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"}, 
null_bitmap));
+      CheckCast(src_null, dest2_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest3_null,
+          StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"}, 
null_bitmap));
+      CheckCast(src_null, dest3_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest4_null,
+          StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", 
"e"},
+                            null_bitmap));
+      CheckCast(src_null, dest4_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest5_null,
+          StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", 
"e"},
+                            null_bitmap));
+      CheckCast(src_null, dest5_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest6_null,
+          StructArray::Make({a2, b2, c2, e2},
+                            std::vector<std::string>{"a", "b", "c", "e"}, 
null_bitmap));
+      CheckCast(src_null, dest6_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest7_null,
+          StructArray::Make({a2, b2, c2, d2, e2},
+                            std::vector<std::string>{"a", "b", "c", "d", "e"},
+                            null_bitmap));
+      CheckCast(src_null, dest7_null);
+
+      const auto dest8_null = arrow::struct_({std::make_shared<Field>("a", 
int8()),
+                                              std::make_shared<Field>("d", 
int16()),
+                                              std::make_shared<Field>("f", 
int64())});
+      const auto options_null = CastOptions::Safe(dest8_null);
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          TypeError,
+          ::testing::HasSubstr(
+              "struct (sub)fields don't match or are in the wrong order"),
+          Cast(src_null, options_null));

Review comment:
       Can we also add a case or two dealing with duplicate field names on 
either side? (It looks like the implementation will handle this sensibly.)

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,10 +2245,132 @@ static void CheckStructToStruct(
   }
 }
 
-TEST(Cast, StructToSameSizedAndNamedStruct) {
-  CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {

Review comment:
       and ditto for the destination type

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2262,12 +2384,11 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) {
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       TypeError,
-      ::testing::HasSubstr("Type error: struct field names do not match: 
struct<a: int8, "
-                           "b: int8> struct<c: int8, d: int8>"),
+      ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong 
order"),
       Cast(src, options));
 }
 
-TEST(Cast, StructToDifferentSizeStruct) {
+TEST(Cast, StructToLargerSizeStruct) {

Review comment:
       just a nit

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,10 +2245,132 @@ static void CheckStructToStruct(
   }
 }
 
-TEST(Cast, StructToSameSizedAndNamedStruct) {
-  CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {

Review comment:
       ```suggestion
     for (const auto& src_value_type : value_types) {
       ARROW_SCOPED_TRACE("From type: ", src_value_type->ToString());
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,10 +2245,132 @@ static void CheckStructToStruct(
   }
 }
 
-TEST(Cast, StructToSameSizedAndNamedStruct) {
-  CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a", "b", "c", "d", "e"};
+
+      std::shared_ptr<Array> a1, b1, c1, d1, e1;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]");
+      b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]");
+      c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]");
+      d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]");
+      e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]");
+
+      std::shared_ptr<Array> a2, b2, c2, d2, e2;
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]");
+      b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]");
+      c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]");
+      d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]");
+      e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]");
+
+      ASSERT_OK_AND_ASSIGN(auto src,
+                           StructArray::Make({a1, b1, c1, d1, e1}, 
field_names));
+      ASSERT_OK_AND_ASSIGN(

Review comment:
       I think some of these cases are a little redundant.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,10 +2245,132 @@ static void CheckStructToStruct(
   }
 }
 
-TEST(Cast, StructToSameSizedAndNamedStruct) {
-  CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a", "b", "c", "d", "e"};
+
+      std::shared_ptr<Array> a1, b1, c1, d1, e1;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]");
+      b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]");
+      c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]");
+      d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]");
+      e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]");
+
+      std::shared_ptr<Array> a2, b2, c2, d2, e2;
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]");
+      b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]");
+      c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]");
+      d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]");
+      e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]");
+
+      ASSERT_OK_AND_ASSIGN(auto src,
+                           StructArray::Make({a1, b1, c1, d1, e1}, 
field_names));
+      ASSERT_OK_AND_ASSIGN(
+          auto dest1, StructArray::Make({a2, c2}, 
std::vector<std::string>{"a", "c"}));
+      CheckCast(src, dest1);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest2, StructArray::Make({b2, d2}, 
std::vector<std::string>{"b", "d"}));
+      CheckCast(src, dest2);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest3, StructArray::Make({c2, e2}, 
std::vector<std::string>{"c", "e"}));
+      CheckCast(src, dest3);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest4,
+          StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", 
"e"}));
+      CheckCast(src, dest4);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest5,
+          StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", 
"e"}));
+      CheckCast(src, dest5);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest6, StructArray::Make({a2, b2, c2, e2},
+                                        std::vector<std::string>{"a", "b", 
"c", "e"}));
+      CheckCast(src, dest6);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest7, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c", 
"d", "e"}));
+      CheckCast(src, dest7);
+
+      const auto dest8 = arrow::struct_({std::make_shared<Field>("a", int8()),
+                                         std::make_shared<Field>("d", int16()),
+                                         std::make_shared<Field>("f", 
int64())});
+      const auto options = CastOptions::Safe(dest8);
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          TypeError,
+          ::testing::HasSubstr(
+              "struct (sub)fields don't match or are in the wrong order"),
+          Cast(src, options));
+
+      // With nulls
+      std::shared_ptr<Buffer> null_bitmap;
+      BitmapFromVector<int>({0, 1, 0}, &null_bitmap);
+
+      ASSERT_OK_AND_ASSIGN(auto src_null, StructArray::Make({a1, b1, c1, d1, 
e1},
+                                                            field_names, 
null_bitmap));
+      ASSERT_OK_AND_ASSIGN(
+          auto dest1_null,
+          StructArray::Make({a2, c2}, std::vector<std::string>{"a", "c"}, 
null_bitmap));
+      CheckCast(src_null, dest1_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest2_null,
+          StructArray::Make({b2, d2}, std::vector<std::string>{"b", "d"}, 
null_bitmap));
+      CheckCast(src_null, dest2_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest3_null,
+          StructArray::Make({c2, e2}, std::vector<std::string>{"c", "e"}, 
null_bitmap));
+      CheckCast(src_null, dest3_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest4_null,
+          StructArray::Make({a2, d2, e2}, std::vector<std::string>{"a", "d", 
"e"},
+                            null_bitmap));
+      CheckCast(src_null, dest4_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest5_null,
+          StructArray::Make({b2, c2, e2}, std::vector<std::string>{"b", "c", 
"e"},
+                            null_bitmap));
+      CheckCast(src_null, dest5_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest6_null,
+          StructArray::Make({a2, b2, c2, e2},
+                            std::vector<std::string>{"a", "b", "c", "e"}, 
null_bitmap));
+      CheckCast(src_null, dest6_null);
+
+      ASSERT_OK_AND_ASSIGN(
+          auto dest7_null,
+          StructArray::Make({a2, b2, c2, d2, e2},
+                            std::vector<std::string>{"a", "b", "c", "d", "e"},
+                            null_bitmap));
+      CheckCast(src_null, dest7_null);
+
+      const auto dest8_null = arrow::struct_({std::make_shared<Field>("a", 
int8()),
+                                              std::make_shared<Field>("d", 
int16()),
+                                              std::make_shared<Field>("f", 
int64())});

Review comment:
       Can we also add a case where the fields exist, but are reversed?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2281,8 +2402,7 @@ TEST(Cast, StructToDifferentSizeStruct) {
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       TypeError,
-      ::testing::HasSubstr("Type error: struct field sizes do not match: 
struct<a: int8, "
-                           "b: int8> struct<a: int8, b: int8, c: int8>"),
+      ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong 
order"),
       Cast(src, options));
 }

Review comment:
       Can we extend the nullability test below with a couple cases as well?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2262,12 +2384,11 @@ TEST(Cast, StructToSameSizedButDifferentNamedStruct) {
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       TypeError,
-      ::testing::HasSubstr("Type error: struct field names do not match: 
struct<a: int8, "
-                           "b: int8> struct<c: int8, d: int8>"),
+      ::testing::HasSubstr("struct (sub)fields don't match or are in the wrong 
order"),
       Cast(src, options));
 }
 
-TEST(Cast, StructToDifferentSizeStruct) {
+TEST(Cast, StructToLargerSizeStruct) {

Review comment:
       ```suggestion
   TEST(Cast, StructToBiggerStruct) {
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to