This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0515f49 [SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type 0515f49 is described below commit 0515f490189466c5f13aa4f647e81aeb6c24d0bf Author: Gengliang Wang <ltn...@gmail.com> AuthorDate: Fri Mar 26 00:17:43 2021 +0800 [SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type ### What changes were proposed in this pull request? Allow casting complex types as string type in ANSI mode. ### Why are the changes needed? Currently, complex types are not allowed to cast as string type. This breaks the DataFrame.show() API. E.g ``` scala> sql(“select array(1, 2, 2)“).show(false) org.apache.spark.sql.AnalysisException: cannot resolve ‘CAST(`array(1, 2, 2)` AS STRING)’ due to data type mismatch: cannot cast array<int> to string with ANSI mode on. ``` We should allow the conversion as the extension of the ANSI SQL standard, so that the DataFrame.show() still work in ANSI mode. ### Does this PR introduce _any_ user-facing change? Yes, casting complex types as string type is now allowed in ANSI mode. ### How was this patch tested? Unit tests. Closes #31954 from gengliangwang/fixExplicitCast. Authored-by: Gengliang Wang <ltn...@gmail.com> Signed-off-by: Gengliang Wang <ltn...@gmail.com> --- docs/sql-ref-ansi-compliance.md | 9 +- .../spark/sql/catalyst/expressions/Cast.scala | 9 +- .../spark/sql/catalyst/expressions/CastSuite.scala | 228 ++++++++++----------- 3 files changed, 119 insertions(+), 127 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 557f27b..f4fd712 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -76,6 +76,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13 straightforward type conversions which are disallowed as per the ANSI standard: * NumericType <=> BooleanType * StringType <=> BinaryType +* ArrayType => String +* MapType => String +* StructType => String The valid combinations of target data type and source data type in a `CAST` expression are given by the following table. “Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid. @@ -89,9 +92,9 @@ The type conversion of Spark ANSI mode follows the syntax rules of section 6.13 | Interval | N | Y | N | N | Y | N | N | N | N | N | | Boolean | Y | Y | N | N | N | Y | N | N | N | N | | Binary | N | Y | N | N | N | N | Y | N | N | N | -| Array | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N | -| Map | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N | -| Struct | N | N | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | +| Array | N | Y | N | N | N | N | N | <span style="color:red">**Y**</span> | N | N | +| Map | N | Y | N | N | N | N | N | N | <span style="color:red">**Y**</span> | N | +| Struct | N | Y | N | N | N | N | N | N | N | <span style="color:red">**Y**</span> | In the table above, all the `CAST`s that can cause runtime exceptions are marked as red <span style="color:red">**Y**</span>: * CAST(Numeric AS Numeric): raise an overflow exception if the value is out of the target data type's range. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 9135e6c..7599947 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1873,6 +1873,8 @@ object AnsiCast { case (NullType, _) => true + case (_, StringType) => true + case (StringType, _: BinaryType) => true case (StringType, BooleanType) => true @@ -1890,13 +1892,6 @@ object AnsiCast { case (StringType, _: NumericType) => true case (BooleanType, _: NumericType) => true - case (_: NumericType, StringType) => true - case (_: DateType, StringType) => true - case (_: TimestampType, StringType) => true - case (_: CalendarIntervalType, StringType) => true - case (BooleanType, StringType) => true - case (BinaryType, StringType) => true - case (ArrayType(fromType, fn), ArrayType(toType, tn)) => canCast(fromType, toType) && resolvableNullability(fn || forceNullable(fromType, toType), tn) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1ee5ce6..1ed8c46 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -686,6 +686,117 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(value, DoubleType), Double.NaN) } } + + test("SPARK-22825 Cast array to string") { + val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType) + checkEvaluation(ret1, "[1, 2, 3, 4, 5]") + val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType) + checkEvaluation(ret2, "[ab, cde, f]") + Seq(false, true).foreach { omitNull => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { + val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType) + checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]") + } + } + val ret4 = + cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType) + checkEvaluation(ret4, "[ab, cde, f]") + val ret5 = cast( + Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)), + StringType) + checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]") + val ret6 = cast( + Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00") + .map(Timestamp.valueOf)), + StringType) + checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]") + val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType) + checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]") + val ret8 = cast( + Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))), + StringType) + checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]") + } + + test("SPARK-33291: Cast array with null elements to string") { + Seq(false, true).foreach { omitNull => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { + val ret1 = cast(Literal.create(Array(null, null)), StringType) + checkEvaluation( + ret1, + s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]") + } + } + } + + test("SPARK-22973 Cast map to string") { + Seq( + false -> ("{", "}"), + true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { + val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType) + checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb") + val ret2 = cast( + Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)), + StringType) + checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb") + val ret3 = cast( + Literal.create(Map( + 1 -> Date.valueOf("2014-12-03"), + 2 -> Date.valueOf("2014-12-04"), + 3 -> Date.valueOf("2014-12-05"))), + StringType) + checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb") + val ret4 = cast( + Literal.create(Map( + 1 -> Timestamp.valueOf("2014-12-03 13:01:00"), + 2 -> Timestamp.valueOf("2014-12-04 15:05:00"))), + StringType) + checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb") + val ret5 = cast( + Literal.create(Map( + 1 -> Array(1, 2, 3), + 2 -> Array(4, 5, 6))), + StringType) + checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb") + } + } + } + + test("SPARK-22981 Cast struct to string") { + Seq( + false -> ("{", "}"), + true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { + val ret1 = cast(Literal.create((1, "a", 0.1)), StringType) + checkEvaluation(ret1, s"${lb}1, a, 0.1$rb") + val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType) + checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb") + val ret3 = cast(Literal.create( + (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType) + checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb") + val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType) + checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb") + val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType) + checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb") + val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType) + checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb") + } + } + } + + test("SPARK-33291: Cast struct with null elements to string") { + Seq( + false -> ("{", "}"), + true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => + withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { + val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType) + checkEvaluation( + ret1, + s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb") + } + } + } } abstract class AnsiCastSuiteBase extends CastSuiteBase { @@ -851,12 +962,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure) } - test("ANSI mode: disallow casting complex types as String type") { - verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)) - verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType)) - verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType)) - } - test("cast from invalid string to numeric should throw NumberFormatException") { // cast to IntegerType Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType => @@ -1569,117 +1674,6 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast("abcd", DecimalType(38, 1)), null) } - test("SPARK-22825 Cast array to string") { - val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType) - checkEvaluation(ret1, "[1, 2, 3, 4, 5]") - val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType) - checkEvaluation(ret2, "[ab, cde, f]") - Seq(false, true).foreach { omitNull => - withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { - val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType) - checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]") - } - } - val ret4 = - cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType) - checkEvaluation(ret4, "[ab, cde, f]") - val ret5 = cast( - Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)), - StringType) - checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]") - val ret6 = cast( - Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00") - .map(Timestamp.valueOf)), - StringType) - checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]") - val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType) - checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]") - val ret8 = cast( - Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))), - StringType) - checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]") - } - - test("SPARK-33291: Cast array with null elements to string") { - Seq(false, true).foreach { omitNull => - withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { - val ret1 = cast(Literal.create(Array(null, null)), StringType) - checkEvaluation( - ret1, - s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]") - } - } - } - - test("SPARK-22973 Cast map to string") { - Seq( - false -> ("{", "}"), - true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => - withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { - val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType) - checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb") - val ret2 = cast( - Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)), - StringType) - checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb") - val ret3 = cast( - Literal.create(Map( - 1 -> Date.valueOf("2014-12-03"), - 2 -> Date.valueOf("2014-12-04"), - 3 -> Date.valueOf("2014-12-05"))), - StringType) - checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb") - val ret4 = cast( - Literal.create(Map( - 1 -> Timestamp.valueOf("2014-12-03 13:01:00"), - 2 -> Timestamp.valueOf("2014-12-04 15:05:00"))), - StringType) - checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb") - val ret5 = cast( - Literal.create(Map( - 1 -> Array(1, 2, 3), - 2 -> Array(4, 5, 6))), - StringType) - checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb") - } - } - } - - test("SPARK-22981 Cast struct to string") { - Seq( - false -> ("{", "}"), - true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => - withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { - val ret1 = cast(Literal.create((1, "a", 0.1)), StringType) - checkEvaluation(ret1, s"${lb}1, a, 0.1$rb") - val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType) - checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb") - val ret3 = cast(Literal.create( - (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType) - checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb") - val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType) - checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb") - val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType) - checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb") - val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType) - checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb") - } - } - } - - test("SPARK-33291: Cast struct with null elements to string") { - Seq( - false -> ("{", "}"), - true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => - withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { - val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType) - checkEvaluation( - ret1, - s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb") - } - } - } - test("data type casting II") { checkEvaluation( cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org