parthchandra commented on code in PR #2897:
URL: https://github.com/apache/datafusion-comet/pull/2897#discussion_r3060548901
##########
spark/src/test/scala/org/apache/comet/CometCastSuite.scala:
##########
@@ -1354,14 +1328,99 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
+ test("cast ArrayType to ArrayType") {
+ val types = Seq(
+ BooleanType,
+ StringType,
+ ByteType,
+ IntegerType,
+ LongType,
+ ShortType,
+ FloatType,
+ DoubleType,
+ DecimalType(10, 2),
+ DecimalType(38, 18),
+ DateType,
+ TimestampType,
+ BinaryType)
+ testArrayCastMatrix(types, ArrayType(_), generateArrays(100, _))
+ }
+
+ // https://github.com/apache/datafusion-comet/issues/3906
+ ignore("cast nested ArrayType to nested ArrayType") {
+ val types = Seq(
+ BooleanType,
+ StringType,
+ ByteType,
+ IntegerType,
+ LongType,
+ ShortType,
+ FloatType,
+ DoubleType,
+ DecimalType(10, 2),
+ DecimalType(38, 18),
+ DateType,
+ TimestampType,
+ BinaryType)
+ testArrayCastMatrix(
+ types,
+ dt => ArrayType(ArrayType(dt)),
+ dt => generateArrays(100, ArrayType(dt)))
+ }
+
+ private def testArrayCastMatrix(
+ elementTypes: Seq[DataType],
+ wrapType: DataType => DataType,
+ generateInput: DataType => DataFrame): Unit = {
+ for (fromType <- elementTypes) {
+ val input = generateInput(fromType)
+ for (toType <- elementTypes) {
+ val name = s"cast $fromType to $toType"
+ val fromWrappedType = wrapType(fromType)
+ val toWrappedType = wrapType(toType)
+ if (fromType != toType &&
+ testNames.contains(name) &&
+ !tags
+ .get(name)
+ .exists(s => s.contains("org.scalatest.Ignore")) &&
+ Cast.canCast(fromWrappedType, toWrappedType) &&
+ CometCast.isSupported(fromWrappedType, toWrappedType, None,
CometEvalMode.LEGACY) ==
+ Compatible()) {
+ val legacyOnly =
+ fromType == DateType || (fromType == BooleanType && toType ==
TimestampType)
Review Comment:
nit: this is not strictly correct. For instance `Array[Date] ->
Array[Timestamp]` is compatible and not legacy only.
##########
native/core/src/parquet/parquet_support.rs:
##########
@@ -200,6 +200,9 @@ fn parquet_convert_array(
.with_timezone(Arc::clone(tz)),
))
}
+ // Keep scan-time nested parquet conversion aligned with Spark's legacy
+ // array<Date> -> array<Int> behavior without affecting scalar Date ->
Int casts.
+ (Date32, Int32) => Ok(new_null_array(to_type, array.len())),
Review Comment:
Is there a test which covers this (since this is not covered in the cast
tests)?
##########
native/spark-expr/src/conversion_funcs/cast.rs:
##########
@@ -387,8 +369,23 @@ pub(crate) fn cast_array(
cast_options,
)?),
(List(_), Utf8) => Ok(cast_array_to_string(array.as_list(),
cast_options)?),
- (List(_), List(_)) if can_cast_types(&from_type, to_type) => {
- Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
+ (List(_), List(to)) => {
Review Comment:
Can you add a comment explaining why the `if can_cast_types(&from_type,
to_type)` guard is no longer needed?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]