lidavidm commented on a change in pull request #12248:
URL: https://github.com/apache/arrow/pull/12248#discussion_r796583966



##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,74 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+struct CastStruct {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+    const auto in_field_count =
+        checked_cast<const StructType&>(*batch[0].type()).num_fields();
+    const auto out_field_count =
+        checked_cast<const StructType&>(*out->type()).num_fields();
+
+    if (in_field_count != out_field_count) {
+      return Status(StatusCode::TypeError, "struct field sizes do not match");
+    }
+
+    for (int64_t i = 0; i < in_field_count; ++i) {
+      const auto in_field_name =
+          checked_cast<const StructType&>(*batch[0].type()).field(i)->name();
+      const auto out_field_name =
+          checked_cast<const StructType&>(*out->type()).field(i)->name();
+      if (in_field_name != out_field_name) {
+        ARROW_RETURN_NOT_OK(
+            Status(StatusCode::TypeError, "struct field names do not match"));

Review comment:
       Just `return Status::TypeError(...)`

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,74 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+struct CastStruct {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+    const auto in_field_count =
+        checked_cast<const StructType&>(*batch[0].type()).num_fields();
+    const auto out_field_count =
+        checked_cast<const StructType&>(*out->type()).num_fields();
+
+    if (in_field_count != out_field_count) {
+      return Status(StatusCode::TypeError, "struct field sizes do not match");

Review comment:
       nit, but this can just be `return Status::TypeError("...");`

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2217,6 +2217,73 @@ TEST(Cast, ListToListOptionsPassthru) {
   }
 }
 
+static void CheckStructToStruct(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a"};
+      std::shared_ptr<Array> a1, b1, a2, b2;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2]");
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2]");
+      auto src = StructArray::Make({a1}, field_names).ValueOrDie();

Review comment:
       nit, but usually we use ASSERT_OK_AND_ASSIGN (or EXPECT_OK_AND_ASSIGN) 
to do this in tests

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2217,6 +2217,73 @@ TEST(Cast, ListToListOptionsPassthru) {
   }
 }
 
+static void CheckStructToStruct(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a"};
+      std::shared_ptr<Array> a1, b1, a2, b2;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2]");
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2]");
+      auto src = StructArray::Make({a1}, field_names).ValueOrDie();
+      auto dest = StructArray::Make({a2}, field_names).ValueOrDie();
+
+      CheckCast(src, dest);
+
+      // Test corner case using children with offsets
+      auto a3 = ArrayFromJSON(src_value_type, "[1, 2, 3]")->Slice(1, 2);
+      auto a4 = ArrayFromJSON(dest_value_type, "[2, 3]");
+      auto slicedSrc = StructArray::Make({a3}, field_names).ValueOrDie();
+      auto slicedDest = StructArray::Make({a4}, field_names).ValueOrDie();
+
+      CheckCast(slicedSrc, slicedDest);
+    }
+  }
+}
+
+TEST(Cast, StructToSameSizedAndNamedStruct) {
+  CheckStructToStruct({int32(), float32(), int64()});
+}
+
+TEST(Cast, StructToSameSizedButDifferentNamedStruct) {
+  std::vector<std::string> field_names = {"a", "b"};
+  std::shared_ptr<Array> a, b;
+  a = ArrayFromJSON(int8(), "[1, 2]");
+  b = ArrayFromJSON(int8(), "[3, 4]");
+  auto src = StructArray::Make({a, b}, field_names).ValueOrDie();
+
+  std::vector<std::string> field_names2 = {"c", "d"};
+  std::shared_ptr<Array> c, d;
+  c = ArrayFromJSON(int8(), "[1, 2]");
+  d = ArrayFromJSON(int8(), "[3, 4]");
+  auto dest = StructArray::Make({c, d}, field_names2).ValueOrDie();
+  auto options = CastOptions{};

Review comment:
       Use `CastOptions::Safe(dest->type())`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,74 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+struct CastStruct {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+    const auto in_field_count =
+        checked_cast<const StructType&>(*batch[0].type()).num_fields();
+    const auto out_field_count =
+        checked_cast<const StructType&>(*out->type()).num_fields();
+
+    if (in_field_count != out_field_count) {
+      return Status(StatusCode::TypeError, "struct field sizes do not match");

Review comment:
       It would be nice to include the counts in the error message to be 
user-friendly. Or at least, include the ToString() of both types.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,74 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+struct CastStruct {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+    const auto in_field_count =
+        checked_cast<const StructType&>(*batch[0].type()).num_fields();
+    const auto out_field_count =
+        checked_cast<const StructType&>(*out->type()).num_fields();
+
+    if (in_field_count != out_field_count) {
+      return Status(StatusCode::TypeError, "struct field sizes do not match");
+    }
+
+    for (int64_t i = 0; i < in_field_count; ++i) {
+      const auto in_field_name =
+          checked_cast<const StructType&>(*batch[0].type()).field(i)->name();
+      const auto out_field_name =
+          checked_cast<const StructType&>(*out->type()).field(i)->name();
+      if (in_field_name != out_field_name) {
+        ARROW_RETURN_NOT_OK(
+            Status(StatusCode::TypeError, "struct field names do not match"));

Review comment:
       It might be nice to include the index and the mismatching names to be 
user-friendly, or the ToString() of both types.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -150,6 +150,73 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+struct CastStruct {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+    const auto in_field_count =
+        checked_cast<const StructType&>(*batch[0].type()).num_fields();
+    const auto out_field_count =
+        checked_cast<const StructType&>(*out->type()).num_fields();
+
+    if (in_field_count != out_field_count) {
+      ARROW_RETURN_NOT_OK(
+          Status(StatusCode::TypeError, "struct field sizes do not match"));
+    }
+
+    for (int64_t i = 0; i < in_field_count; ++i) {
+      const auto in_field_name =
+          checked_cast<const StructType&>(*batch[0].type()).field(i)->name();
+      const auto out_field_name =
+          checked_cast<const StructType&>(*out->type()).field(i)->name();
+      if (in_field_name != out_field_name) {
+        ARROW_RETURN_NOT_OK(
+            Status(StatusCode::TypeError, "struct field names do not match"));
+      }
+    }
+
+    if (out->kind() == Datum::SCALAR) {
+      const auto& in_scalar = checked_cast<const 
StructScalar&>(*batch[0].scalar());
+      auto out_scalar = checked_cast<StructScalar*>(out->scalar().get());
+
+      for (int64_t i = 0; i < in_field_count; i++) {
+        auto values = in_scalar.value[i];
+        auto target_type = out->type()->field(i)->type();
+        ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                              Cast(values, target_type, options, 
ctx->exec_context()));
+        DCHECK_EQ(Datum::SCALAR, cast_values.kind());
+        out_scalar->value.push_back(cast_values.scalar());
+      }
+
+      out_scalar->is_valid = true;
+      return Status::OK();
+    }
+
+    const ArrayData& in_array = *batch[0].array();
+    ArrayData* out_array = out->mutable_array();
+
+    for (int64_t i = 0; i < in_field_count; ++i) {
+      auto values = in_array.child_data[0];

Review comment:
       CheckCast checks slices for you, but it only does so if there are more 
than three input elements - we should make the arrays used in the test longer.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2217,6 +2217,73 @@ TEST(Cast, ListToListOptionsPassthru) {
   }
 }
 
+static void CheckStructToStruct(
+    const std::vector<std::shared_ptr<DataType>>& value_types) {
+  for (const auto& src_value_type : value_types) {
+    for (const auto& dest_value_type : value_types) {
+      std::vector<std::string> field_names = {"a"};
+      std::shared_ptr<Array> a1, b1, a2, b2;
+      a1 = ArrayFromJSON(src_value_type, "[1, 2]");
+      a2 = ArrayFromJSON(dest_value_type, "[1, 2]");
+      auto src = StructArray::Make({a1}, field_names).ValueOrDie();
+      auto dest = StructArray::Make({a2}, field_names).ValueOrDie();
+
+      CheckCast(src, dest);
+
+      // Test corner case using children with offsets
+      auto a3 = ArrayFromJSON(src_value_type, "[1, 2, 3]")->Slice(1, 2);
+      auto a4 = ArrayFromJSON(dest_value_type, "[2, 3]");
+      auto slicedSrc = StructArray::Make({a3}, field_names).ValueOrDie();
+      auto slicedDest = StructArray::Make({a4}, field_names).ValueOrDie();
+
+      CheckCast(slicedSrc, slicedDest);
+    }
+  }
+}
+
+TEST(Cast, StructToSameSizedAndNamedStruct) {
+  CheckStructToStruct({int32(), float32(), int64()});
+}
+
+TEST(Cast, StructToSameSizedButDifferentNamedStruct) {
+  std::vector<std::string> field_names = {"a", "b"};
+  std::shared_ptr<Array> a, b;
+  a = ArrayFromJSON(int8(), "[1, 2]");
+  b = ArrayFromJSON(int8(), "[3, 4]");
+  auto src = StructArray::Make({a, b}, field_names).ValueOrDie();
+
+  std::vector<std::string> field_names2 = {"c", "d"};
+  std::shared_ptr<Array> c, d;
+  c = ArrayFromJSON(int8(), "[1, 2]");
+  d = ArrayFromJSON(int8(), "[3, 4]");
+  auto dest = StructArray::Make({c, d}, field_names2).ValueOrDie();
+  auto options = CastOptions{};
+  options.to_type = dest->type();
+
+  ASSERT_RAISES_WITH_MESSAGE(TypeError, "Type error: struct field names do not 
match",
+                             Cast(src, options));

Review comment:
       CI is failing, use `EXPECT_RAISES_WITH_MESSAGE_THAT(TypeError, 
::testing::HasSubstr("..."), ...)` to avoid that




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to