This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0515f49  [SPARK-34856][SQL] ANSI mode: Allow casting complex types as 
string type
0515f49 is described below

commit 0515f490189466c5f13aa4f647e81aeb6c24d0bf
Author: Gengliang Wang <ltn...@gmail.com>
AuthorDate: Fri Mar 26 00:17:43 2021 +0800

    [SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type
    
    ### What changes were proposed in this pull request?
    
    Allow casting complex types as string type in ANSI mode.
    
    ### Why are the changes needed?
    
    Currently, complex types are not allowed to cast as string type. This 
breaks the DataFrame.show() API. E.g
    ```
    scala> sql(“select array(1, 2, 2)“).show(false)
    org.apache.spark.sql.AnalysisException: cannot resolve ‘CAST(`array(1, 2, 
2)` AS STRING)’ due to data type mismatch:
     cannot cast array<int> to string with ANSI mode on.
    ```
    We should allow the conversion as the extension of the ANSI SQL standard, 
so that the DataFrame.show() still work in ANSI mode.
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, casting complex types as string type is now allowed in ANSI mode.
    
    ### How was this patch tested?
    
    Unit tests.
    
    Closes #31954 from gengliangwang/fixExplicitCast.
    
    Authored-by: Gengliang Wang <ltn...@gmail.com>
    Signed-off-by: Gengliang Wang <ltn...@gmail.com>
---
 docs/sql-ref-ansi-compliance.md                    |   9 +-
 .../spark/sql/catalyst/expressions/Cast.scala      |   9 +-
 .../spark/sql/catalyst/expressions/CastSuite.scala | 228 ++++++++++-----------
 3 files changed, 119 insertions(+), 127 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 557f27b..f4fd712 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -76,6 +76,9 @@ The type conversion of Spark ANSI mode follows the syntax 
rules of section 6.13
  straightforward type conversions which are disallowed as per the ANSI 
standard:
 * NumericType <=> BooleanType
 * StringType <=> BinaryType
+* ArrayType => String
+* MapType => String
+* StructType => String
 
  The valid combinations of target data type and source data type in a `CAST` 
expression are given by the following table.
 “Y” indicates that the combination is syntactically valid without restriction 
and “N” indicates that the combination is not valid.
@@ -89,9 +92,9 @@ The type conversion of Spark ANSI mode follows the syntax 
rules of section 6.13
 | Interval  | N       | Y      | N    | N         | Y        | N       | N     
 | N     | N   | N      |
 | Boolean   | Y       | Y      | N    | N         | N        | Y       | N     
 | N     | N   | N      |
 | Binary    | N       | Y      | N    | N         | N        | N       | Y     
 | N     | N   | N      |
-| Array     | N       | N      | N    | N         | N        | N       | N     
 | <span style="color:red">**Y**</span> | N   | N      |
-| Map       | N       | N      | N    | N         | N        | N       | N     
 | N     | <span style="color:red">**Y**</span> | N      |
-| Struct    | N       | N      | N    | N         | N        | N       | N     
 | N     | N   | <span style="color:red">**Y**</span> |
+| Array     | N       | Y      | N    | N         | N        | N       | N     
 | <span style="color:red">**Y**</span> | N   | N      |
+| Map       | N       | Y      | N    | N         | N        | N       | N     
 | N     | <span style="color:red">**Y**</span> | N      |
+| Struct    | N       | Y      | N    | N         | N        | N       | N     
 | N     | N   | <span style="color:red">**Y**</span> |
 
 In the table above, all the `CAST`s that can cause runtime exceptions are 
marked as red <span style="color:red">**Y**</span>:
 * CAST(Numeric AS Numeric): raise an overflow exception if the value is out of 
the target data type's range.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 9135e6c..7599947 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1873,6 +1873,8 @@ object AnsiCast {
 
     case (NullType, _) => true
 
+    case (_, StringType) => true
+
     case (StringType, _: BinaryType) => true
 
     case (StringType, BooleanType) => true
@@ -1890,13 +1892,6 @@ object AnsiCast {
     case (StringType, _: NumericType) => true
     case (BooleanType, _: NumericType) => true
 
-    case (_: NumericType, StringType) => true
-    case (_: DateType, StringType) => true
-    case (_: TimestampType, StringType) => true
-    case (_: CalendarIntervalType, StringType) => true
-    case (BooleanType, StringType) => true
-    case (BinaryType, StringType) => true
-
     case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
       canCast(fromType, toType) &&
         resolvableNullability(fn || forceNullable(fromType, toType), tn)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 1ee5ce6..1ed8c46 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -686,6 +686,117 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       checkEvaluation(cast(value, DoubleType), Double.NaN)
     }
   }
+
+  test("SPARK-22825 Cast array to string") {
+    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
+    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
+    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
+    checkEvaluation(ret2, "[ab, cde, f]")
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
omitNull.toString) {
+        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
+        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
+      }
+    }
+    val ret4 =
+      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), 
StringType)
+    checkEvaluation(ret4, "[ab, cde, f]")
+    val ret5 = cast(
+      Literal.create(Array("2014-12-03", "2014-12-04", 
"2014-12-06").map(Date.valueOf)),
+      StringType)
+    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
+    val ret6 = cast(
+      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
+        .map(Timestamp.valueOf)),
+      StringType)
+    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
+    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), 
StringType)
+    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
+    val ret8 = cast(
+      Literal.create(Array(Array(Array("a"), Array("b", "c")), 
Array(Array("d")))),
+      StringType)
+    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
+  }
+
+  test("SPARK-33291: Cast array with null elements to string") {
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
omitNull.toString) {
+        val ret1 = cast(Literal.create(Array(null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " 
null"}]")
+      }
+    }
+  }
+
+  test("SPARK-22973 Cast map to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
+        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), 
StringType)
+        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
+        val ret2 = cast(
+          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> 
"c".getBytes)),
+          StringType)
+        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " 
null"}, 3 -> c$rb")
+        val ret3 = cast(
+          Literal.create(Map(
+            1 -> Date.valueOf("2014-12-03"),
+            2 -> Date.valueOf("2014-12-04"),
+            3 -> Date.valueOf("2014-12-05"))),
+          StringType)
+        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 
2014-12-05$rb")
+        val ret4 = cast(
+          Literal.create(Map(
+            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
+            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
+          StringType)
+        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 
15:05:00$rb")
+        val ret5 = cast(
+          Literal.create(Map(
+            1 -> Array(1, 2, 3),
+            2 -> Array(4, 5, 6))),
+          StringType)
+        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
+      }
+    }
+  }
+
+  test("SPARK-22981 Cast struct to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
+        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
+        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
+        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, 
"a")), StringType)
+        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, 
a$rb")
+        val ret3 = cast(Literal.create(
+          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 
15:05:00"))), StringType)
+        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
+        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
+        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
+        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
+        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
+        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> 
"c"))), StringType)
+        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
+      }
+    }
+  }
+
+  test("SPARK-33291: Cast struct with null elements to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
+        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), 
StringType)
+        checkEvaluation(
+          ret1,
+          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " 
null"}$rb")
+      }
+    }
+  }
 }
 
 abstract class AnsiCastSuiteBase extends CastSuiteBase {
@@ -851,12 +962,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
     assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
   }
 
-  test("ANSI mode: disallow casting complex types as String type") {
-    verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType))
-    verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType))
-    verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType))
-  }
-
   test("cast from invalid string to numeric should throw 
NumberFormatException") {
     // cast to IntegerType
     Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
@@ -1569,117 +1674,6 @@ class CastSuite extends CastSuiteBase {
     checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
   }
 
-  test("SPARK-22825 Cast array to string") {
-    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
-    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
-    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
-    checkEvaluation(ret2, "[ab, cde, f]")
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
omitNull.toString) {
-        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
-        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
-      }
-    }
-    val ret4 =
-      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), 
StringType)
-    checkEvaluation(ret4, "[ab, cde, f]")
-    val ret5 = cast(
-      Literal.create(Array("2014-12-03", "2014-12-04", 
"2014-12-06").map(Date.valueOf)),
-      StringType)
-    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
-    val ret6 = cast(
-      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
-        .map(Timestamp.valueOf)),
-      StringType)
-    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
-    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), 
StringType)
-    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
-    val ret8 = cast(
-      Literal.create(Array(Array(Array("a"), Array("b", "c")), 
Array(Array("d")))),
-      StringType)
-    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
-  }
-
-  test("SPARK-33291: Cast array with null elements to string") {
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
omitNull.toString) {
-        val ret1 = cast(Literal.create(Array(null, null)), StringType)
-        checkEvaluation(
-          ret1,
-          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " 
null"}]")
-      }
-    }
-  }
-
-  test("SPARK-22973 Cast map to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
-        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), 
StringType)
-        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
-        val ret2 = cast(
-          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> 
"c".getBytes)),
-          StringType)
-        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " 
null"}, 3 -> c$rb")
-        val ret3 = cast(
-          Literal.create(Map(
-            1 -> Date.valueOf("2014-12-03"),
-            2 -> Date.valueOf("2014-12-04"),
-            3 -> Date.valueOf("2014-12-05"))),
-          StringType)
-        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 
2014-12-05$rb")
-        val ret4 = cast(
-          Literal.create(Map(
-            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
-            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
-          StringType)
-        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 
15:05:00$rb")
-        val ret5 = cast(
-          Literal.create(Map(
-            1 -> Array(1, 2, 3),
-            2 -> Array(4, 5, 6))),
-          StringType)
-        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
-      }
-    }
-  }
-
-  test("SPARK-22981 Cast struct to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
-        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
-        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
-        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, 
"a")), StringType)
-        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, 
a$rb")
-        val ret3 = cast(Literal.create(
-          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 
15:05:00"))), StringType)
-        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
-        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
-        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
-        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
-        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
-        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> 
"c"))), StringType)
-        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
-      }
-    }
-  }
-
-  test("SPARK-33291: Cast struct with null elements to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> 
legacyCast.toString) {
-        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), 
StringType)
-        checkEvaluation(
-          ret1,
-          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " 
null"}$rb")
-      }
-    }
-  }
-
   test("data type casting II") {
     checkEvaluation(
       cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to