This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3433f2a77d3 [SPARK-41452][SQL] `to_char` should return null when 
format is null
3433f2a77d3 is described below

commit 3433f2a77d3dd665f42aa3d558152cf4c912c54c
Author: Bruce Robbins <bersprock...@gmail.com>
AuthorDate: Thu Dec 8 16:14:43 2022 -0800

    [SPARK-41452][SQL] `to_char` should return null when format is null
    
    ### What changes were proposed in this pull request?
    
    When a user specifies a null format in `to_char`, return null instead of 
throwing a `NullPointerException`.
    
    ### Why are the changes needed?
    
    `to_char` currently throws a `NullPointerException` when the format is null:
    ```
    spark-sql> select to_char(454, null);
    [INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal 
error. You hit a bug in Spark or the Spark plugins you use. Please, report this 
bug to the corresponding communities or vendors, and provide the full stack 
trace.
    org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase 
analysis failed with an internal error. You hit a bug in Spark or the Spark 
plugins you use. Please, report this bug to the corresponding communities or 
vendors, and provide the full stack trace.
    ...
    Caused by: java.lang.NullPointerException
            at 
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat$lzycompute(numberFormatExpressions.scala:227)
            at 
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat(numberFormatExpressions.scala:227)
            at 
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter$lzycompute(numberFormatExpressions.scala:228)
            at 
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter(numberFormatExpressions.scala:228)
            at 
org.apache.spark.sql.catalyst.expressions.ToCharacter.checkInputDataTypes(numberFormatExpressions.scala:236)
    ```
    Compare to `to_binary`:
    ```
    spark-sql> SELECT to_binary('abc', null);
    NULL
    Time taken: 3.097 seconds, Fetched 1 row(s)
    spark-sql>
    ```
    Also compare to `to_char` in PostgreSQL 14.6:
    ```
    select to_char(454, null) is null as to_char_is_null;
    
     to_char_is_null
    -----------------
     t
    (1 row)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    New unit test.
    
    Closes #38986 from bersprockets/to_char_issue.
    
    Authored-by: Bruce Robbins <bersprock...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../expressions/numberFormatExpressions.scala        | 20 ++++++++++++++------
 .../expressions/StringExpressionsSuite.scala         |  7 +++++++
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index f5f86bfac19..2d4f0438db7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -224,17 +224,21 @@ case class TryToNumber(left: Expression, right: 
Expression)
   group = "string_funcs")
 case class ToCharacter(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-  private lazy val numberFormat = 
right.eval().toString.toUpperCase(Locale.ROOT)
-  private lazy val numberFormatter = new ToNumberParser(numberFormat, true)
+  private lazy val numberFormatter = {
+    val value = right.eval()
+    if (value != null) {
+      new ToNumberParser(value.toString.toUpperCase(Locale.ROOT), true)
+    } else {
+      null
+    }
+  }
 
   override def dataType: DataType = StringType
   override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringType)
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
     if (inputTypeCheck.isSuccess) {
-      if (right.foldable) {
-        numberFormatter.checkInputDataTypes()
-      } else {
+      if (!right.foldable) {
         DataTypeMismatch(
           errorSubClass = "NON_FOLDABLE_INPUT",
           messageParameters = Map(
@@ -243,6 +247,10 @@ case class ToCharacter(left: Expression, right: Expression)
             "inputExpr" -> toSQLExpr(right)
           )
         )
+      } else if (numberFormatter == null) {
+        TypeCheckResult.TypeCheckSuccess
+      } else {
+        numberFormatter.checkInputDataTypes()
       }
     } else {
       inputTypeCheck
@@ -260,7 +268,7 @@ case class ToCharacter(left: Expression, right: Expression)
     val result =
       code"""
          |${eval.code}
-         |boolean ${ev.isNull} = ${eval.isNull};
+         |boolean ${ev.isNull} = ${eval.isNull} || ($builder == null);
          |${CodeGenerator.javaType(dataType)} ${ev.value} = 
${CodeGenerator.defaultValue(dataType)};
          |if (!${ev.isNull}) {
          |  ${ev.value} = $builder.format(${eval.value});
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index f0b320db3a5..8be732a52ce 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -1256,6 +1256,13 @@ class StringExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     )
   }
 
+  test("SPARK-41452: ToCharacter: null format string") {
+    // if null format, to_number should return null
+    val toCharacterExpr = ToCharacter(Literal(Decimal(454)), Literal(null, 
StringType))
+    assert(toCharacterExpr.checkInputDataTypes() == 
TypeCheckResult.TypeCheckSuccess)
+    checkEvaluation(toCharacterExpr, null)
+  }
+
   test("ToBinary: fails analysis if fmt is not foldable") {
     val wrongFmt = AttributeReference("invalidFormat", StringType)()
     val toBinaryExpr = ToBinary(Literal("abc"), Some(wrongFmt))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to