This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d7b97a1  [SPARK-31166][SQL] UNION map<null, null> and other maps 
should not fail
d7b97a1 is described below

commit d7b97a1d0daf65710317321490a833f696a46f21
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Tue Mar 17 12:01:29 2020 +0800

    [SPARK-31166][SQL] UNION map<null, null> and other maps should not fail
    
    ### What changes were proposed in this pull request?
    
    After https://github.com/apache/spark/pull/27542, `map()` returns 
`map<null, null>` instead of `map<string, string>`. However, this breaks 
queries which union `map()` and other maps.
    
    The reason is, `TypeCoercion` rules and `Cast` think it's illegal to cast 
null type map key to other types, as it makes the key nullable, but it's 
actually legal. This PR fixes it.
    
    ### Why are the changes needed?
    
    To avoid breaking queries.
    
    ### Does this PR introduce any user-facing change?
    
    Yes, now some queries that work in 2.x can work in 3.0 as well.
    
    ### How was this patch tested?
    
    new test
    
    Closes #27926 from cloud-fan/bug.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala     | 2 +-
 .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala    | 8 ++++++--
 sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  | 6 ++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 0a0bef6..e149bf2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -158,7 +158,7 @@ object TypeCoercion {
       }
     case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, 
valueContainsNull2)) =>
       findTypeFunc(kt1, kt2)
-        .filter { kt => !Cast.forceNullable(kt1, kt) && 
!Cast.forceNullable(kt2, kt) }
+        .filter { kt => Cast.canCastMapKeyNullSafe(kt1, kt) && 
Cast.canCastMapKeyNullSafe(kt2, kt) }
         .flatMap { kt =>
           findTypeFunc(vt1, vt2).map { vt =>
             MapType(kt, vt, valueContainsNull1 || valueContainsNull2 ||
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 7c4316f..8177136 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -77,8 +77,7 @@ object Cast {
         resolvableNullability(fn || forceNullable(fromType, toType), tn)
 
     case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
-      canCast(fromKey, toKey) &&
-        (!forceNullable(fromKey, toKey)) &&
+      canCast(fromKey, toKey) && canCastMapKeyNullSafe(fromKey, toKey) &&
         canCast(fromValue, toValue) &&
         resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
 
@@ -98,6 +97,11 @@ object Cast {
     case _ => false
   }
 
+  def canCastMapKeyNullSafe(fromType: DataType, toType: DataType): Boolean = {
+    // If the original map key type is NullType, it's OK as the map must be 
empty.
+    fromType == NullType || !forceNullable(fromType, toType)
+  }
+
   /**
    * Return true if we need to use the `timeZone` information casting `from` 
type to `to` type.
    * The patterns matched reflect the current implementation in the Cast node.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 823e432..de0f780 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3487,6 +3487,12 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-31166: UNION map<null, null> and other maps should not fail") {
+    checkAnswer(
+      sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"),
+      Seq(Row(Map[Int, Int]()), Row(Map(1 -> 2))))
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to