This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d7b97a1 [SPARK-31166][SQL] UNION map<null, null> and other maps should not fail d7b97a1 is described below commit d7b97a1d0daf65710317321490a833f696a46f21 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Tue Mar 17 12:01:29 2020 +0800 [SPARK-31166][SQL] UNION map<null, null> and other maps should not fail ### What changes were proposed in this pull request? After https://github.com/apache/spark/pull/27542, `map()` returns `map<null, null>` instead of `map<string, string>`. However, this breaks queries which union `map()` and other maps. The reason is, `TypeCoercion` rules and `Cast` think it's illegal to cast null type map key to other types, as it makes the key nullable, but it's actually legal. This PR fixes it. ### Why are the changes needed? To avoid breaking queries. ### Does this PR introduce any user-facing change? Yes, now some queries that work in 2.x can work in 3.0 as well. ### How was this patch tested? new test Closes #27926 from cloud-fan/bug. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 8 ++++++-- sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 0a0bef6..e149bf2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -158,7 +158,7 @@ object TypeCoercion { } case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, valueContainsNull2)) => findTypeFunc(kt1, kt2) - .filter { kt => !Cast.forceNullable(kt1, kt) && !Cast.forceNullable(kt2, kt) } + .filter { kt => Cast.canCastMapKeyNullSafe(kt1, kt) && Cast.canCastMapKeyNullSafe(kt2, kt) } .flatMap { kt => findTypeFunc(vt1, vt2).map { vt => MapType(kt, vt, valueContainsNull1 || valueContainsNull2 || diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 7c4316f..8177136 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -77,8 +77,7 @@ object Cast { resolvableNullability(fn || forceNullable(fromType, toType), tn) case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => - canCast(fromKey, toKey) && - (!forceNullable(fromKey, toKey)) && + canCast(fromKey, toKey) && canCastMapKeyNullSafe(fromKey, toKey) && canCast(fromValue, toValue) && resolvableNullability(fn || forceNullable(fromValue, toValue), tn) @@ -98,6 +97,11 @@ object Cast { case _ => false } + def canCastMapKeyNullSafe(fromType: DataType, toType: DataType): Boolean = { + // If the original map key type is NullType, it's OK as the map must be empty. + fromType == NullType || !forceNullable(fromType, toType) + } + /** * Return true if we need to use the `timeZone` information casting `from` type to `to` type. * The patterns matched reflect the current implementation in the Cast node. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 823e432..de0f780 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3487,6 +3487,12 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-31166: UNION map<null, null> and other maps should not fail") { + checkAnswer( + sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"), + Seq(Row(Map[Int, Int]()), Row(Map(1 -> 2)))) + } } case class Foo(bar: Option[String]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org