This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new aae03ef [SPARK-27671][SQL] Fix error when casting from a nested null in a struct aae03ef is described below commit aae03ef5d9098dcce2cf590198774f507de72012 Author: Liang-Chi Hsieh <vii...@gmail.com> AuthorDate: Mon May 13 12:40:46 2019 -0700 [SPARK-27671][SQL] Fix error when casting from a nested null in a struct When a null in a nested field in struct, casting from the struct throws error, currently. ```scala scala> sql("select cast(struct(1, null) as struct<a:int,b:int>)").show scala.MatchError: NullType (of class org.apache.spark.sql.types.NullType$) at org.apache.spark.sql.catalyst.expressions.Cast.castToInt(Cast.scala:447) at org.apache.spark.sql.catalyst.expressions.Cast.cast(Cast.scala:635) at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castStruct$1(Cast.scala:603) ``` Similarly, inline table, which casts null in nested field under the hood, also throws an error. ```scala scala> sql("select * FROM VALUES (('a', (10, null))), (('b', (10, 50))), (('c', null)) AS tab(x, y)").show org.apache.spark.sql.AnalysisException: failed to evaluate expression named_struct('col1', 10, 'col2', NULL): NullType (of class org.apache.spark.sql.t ypes.NullType$); line 1 pos 14 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:47) at org.apache.spark.sql.catalyst.analysis.ResolveInlineTables.$anonfun$convert$6(ResolveInlineTables.scala:106) ``` This fixes the issue. Added tests. Closes #24576 from viirya/cast-null. Authored-by: Liang-Chi Hsieh <vii...@gmail.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> (cherry picked from commit 8b0bdaa8e018607f1c4e790d1c0eb8cd480dee24) Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 6 ++++++ .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 15 +++++++++++++++ .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++ 3 files changed, 30 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index ac02dac..780db65 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -607,6 +607,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String // We can return what the children return. Same thing should happen in the codegen path. if (DataType.equalsStructurally(from, to)) { identity + } else if (from == NullType) { + // According to `canCast`, NullType can be casted to any type. + // For primitive types, we don't reach here because the guard of `nullSafeEval`. + // But for nested types like struct, we might reach here for nested null type field. + // We won't call the returned function actually, but returns a placeholder. + _ => throw new SparkException(s"should not directly cast from NullType to $to.") } else { to match { case dt if dt == from => identity[Any] diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index b1531ba..c9a8c29 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -989,4 +989,19 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-27671: cast from nested null type in struct") { + import DataTypeTestUtils._ + + atomicTypes.foreach { atomicType => + val struct = Literal.create( + InternalRow(null), + StructType(Seq(StructField("a", NullType, nullable = true)))) + + val ret = cast(struct, StructType(Seq( + StructField("a", atomicType, nullable = true)))) + assert(ret.resolved) + checkEvaluation(ret, InternalRow(null)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 5075209..3d74206 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2622,4 +2622,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { checkAnswer(res, Row("1-1", 6, 6)) } } + + test("SPARK-27671: Fix analysis exception when casting null in nested field in struct") { + val df = sql("SELECT * FROM VALUES (('a', (10, null))), (('b', (10, 50))), " + + "(('c', null)) AS tab(x, y)") + checkAnswer(df, Row("a", Row(10, null)) :: Row("b", Row(10, 50)) :: Row("c", null) :: Nil) + + val cast = sql("SELECT cast(struct(1, null) AS struct<a:int,b:int>)") + checkAnswer(cast, Row(Row(1, null)) :: Nil) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org