This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3433f2a77d3 [SPARK-41452][SQL] `to_char` should return null when format is null 3433f2a77d3 is described below commit 3433f2a77d3dd665f42aa3d558152cf4c912c54c Author: Bruce Robbins <bersprock...@gmail.com> AuthorDate: Thu Dec 8 16:14:43 2022 -0800 [SPARK-41452][SQL] `to_char` should return null when format is null ### What changes were proposed in this pull request? When a user specifies a null format in `to_char`, return null instead of throwing a `NullPointerException`. ### Why are the changes needed? `to_char` currently throws a `NullPointerException` when the format is null: ``` spark-sql> select to_char(454, null); [INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal error. You hit a bug in Spark or the Spark plugins you use. Please, report this bug to the corresponding communities or vendors, and provide the full stack trace. org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal error. You hit a bug in Spark or the Spark plugins you use. Please, report this bug to the corresponding communities or vendors, and provide the full stack trace. ... Caused by: java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat$lzycompute(numberFormatExpressions.scala:227) at org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat(numberFormatExpressions.scala:227) at org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter$lzycompute(numberFormatExpressions.scala:228) at org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter(numberFormatExpressions.scala:228) at org.apache.spark.sql.catalyst.expressions.ToCharacter.checkInputDataTypes(numberFormatExpressions.scala:236) ``` Compare to `to_binary`: ``` spark-sql> SELECT to_binary('abc', null); NULL Time taken: 3.097 seconds, Fetched 1 row(s) spark-sql> ``` Also compare to `to_char` in PostgreSQL 14.6: ``` select to_char(454, null) is null as to_char_is_null; to_char_is_null ----------------- t (1 row) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #38986 from bersprockets/to_char_issue. Authored-by: Bruce Robbins <bersprock...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../expressions/numberFormatExpressions.scala | 20 ++++++++++++++------ .../expressions/StringExpressionsSuite.scala | 7 +++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index f5f86bfac19..2d4f0438db7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -224,17 +224,21 @@ case class TryToNumber(left: Expression, right: Expression) group = "string_funcs") case class ToCharacter(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { - private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT) - private lazy val numberFormatter = new ToNumberParser(numberFormat, true) + private lazy val numberFormatter = { + val value = right.eval() + if (value != null) { + new ToNumberParser(value.toString.toUpperCase(Locale.ROOT), true) + } else { + null + } + } override def dataType: DataType = StringType override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringType) override def checkInputDataTypes(): TypeCheckResult = { val inputTypeCheck = super.checkInputDataTypes() if (inputTypeCheck.isSuccess) { - if (right.foldable) { - numberFormatter.checkInputDataTypes() - } else { + if (!right.foldable) { DataTypeMismatch( errorSubClass = "NON_FOLDABLE_INPUT", messageParameters = Map( @@ -243,6 +247,10 @@ case class ToCharacter(left: Expression, right: Expression) "inputExpr" -> toSQLExpr(right) ) ) + } else if (numberFormatter == null) { + TypeCheckResult.TypeCheckSuccess + } else { + numberFormatter.checkInputDataTypes() } } else { inputTypeCheck @@ -260,7 +268,7 @@ case class ToCharacter(left: Expression, right: Expression) val result = code""" |${eval.code} - |boolean ${ev.isNull} = ${eval.isNull}; + |boolean ${ev.isNull} = ${eval.isNull} || ($builder == null); |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; |if (!${ev.isNull}) { | ${ev.value} = $builder.format(${eval.value}); diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index f0b320db3a5..8be732a52ce 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -1256,6 +1256,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ) } + test("SPARK-41452: ToCharacter: null format string") { + // if null format, to_number should return null + val toCharacterExpr = ToCharacter(Literal(Decimal(454)), Literal(null, StringType)) + assert(toCharacterExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(toCharacterExpr, null) + } + test("ToBinary: fails analysis if fmt is not foldable") { val wrongFmt = AttributeReference("invalidFormat", StringType)() val toBinaryExpr = ToBinary(Literal("abc"), Some(wrongFmt)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org