This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new abba354  [SPARK-31166][SQL] UNION map<null, null> and other maps 
should not fail
abba354 is described below

commit abba354a6558bfba47f8621fa6e60bc1f3b00280
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Tue Mar 17 12:01:29 2020 +0800

    [SPARK-31166][SQL] UNION map<null, null> and other maps should not fail
    
    After https://github.com/apache/spark/pull/27542, `map()` returns 
`map<null, null>` instead of `map<string, string>`. However, this breaks 
queries which union `map()` and other maps.
    
    The reason is, `TypeCoercion` rules and `Cast` think it's illegal to cast 
null type map key to other types, as it makes the key nullable, but it's 
actually legal. This PR fixes it.
    
    To avoid breaking queries.
    
    Yes, now some queries that work in 2.x can work in 3.0 as well.
    
    new test
    
    Closes #27926 from cloud-fan/bug.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit d7b97a1d0daf65710317321490a833f696a46f21)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala     | 2 +-
 .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala    | 8 ++++++--
 sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  | 6 ++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 0a0bef6..e149bf2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -158,7 +158,7 @@ object TypeCoercion {
       }
     case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, 
valueContainsNull2)) =>
       findTypeFunc(kt1, kt2)
-        .filter { kt => !Cast.forceNullable(kt1, kt) && 
!Cast.forceNullable(kt2, kt) }
+        .filter { kt => Cast.canCastMapKeyNullSafe(kt1, kt) && 
Cast.canCastMapKeyNullSafe(kt2, kt) }
         .flatMap { kt =>
           findTypeFunc(vt1, vt2).map { vt =>
             MapType(kt, vt, valueContainsNull1 || valueContainsNull2 ||
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 7c4316f..8177136 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -77,8 +77,7 @@ object Cast {
         resolvableNullability(fn || forceNullable(fromType, toType), tn)
 
     case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
-      canCast(fromKey, toKey) &&
-        (!forceNullable(fromKey, toKey)) &&
+      canCast(fromKey, toKey) && canCastMapKeyNullSafe(fromKey, toKey) &&
         canCast(fromValue, toValue) &&
         resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
 
@@ -98,6 +97,11 @@ object Cast {
     case _ => false
   }
 
+  def canCastMapKeyNullSafe(fromType: DataType, toType: DataType): Boolean = {
+    // If the original map key type is NullType, it's OK as the map must be 
empty.
+    fromType == NullType || !forceNullable(fromType, toType)
+  }
+
   /**
    * Return true if we need to use the `timeZone` information casting `from` 
type to `to` type.
    * The patterns matched reflect the current implementation in the Cast node.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 81dfa798..feb1450 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3456,6 +3456,12 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
       """.stripMargin)
     checkAnswer(df2, Row(1) :: Nil)
   }
+
+  test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
+    checkAnswer(
+      sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),
+      Seq(Row(Map[Int, Int]()), Row(Map(1 -> 2))))
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to