This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new f16dd05620ab [SPARK-47646][SQL] Make try_to_number return NULL for malformed input f16dd05620ab is described below commit f16dd05620ab24357d2417458ab89c8ee51e67bf Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Fri Mar 29 17:38:10 2024 +0900 [SPARK-47646][SQL] Make try_to_number return NULL for malformed input This PR proposes to add NULL check after parsing the number so the output can be safely null for `try_to_number` expression. ```scala import org.apache.spark.sql.functions._ val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11"))) df.select(try_to_number($"value", lit("$99.99"))).show() ``` ``` java.lang.NullPointerException: Cannot invoke "org.apache.spark.sql.types.Decimal.toPlainString()" because "<local7>" is null at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.serializefromobject_doConsume_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50) at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:388) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:894) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:894) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:368) at org.apache.spark.rdd.RDD.iterator(RDD.scala:332) ``` To fix the bug, and let `try_to_number` return `NULL` for malformed input as designed. Yes, it fixes a bug. Previously, `try_to_number` failed with NPE. Unittest was added. No. Closes #45771 from HyukjinKwon/SPARK-47646. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../spark/sql/catalyst/expressions/numberFormatExpressions.scala | 1 + .../src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index 2d4f0438db76..9dcca65efe5a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -86,6 +86,7 @@ abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Bo |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; |if (!${ev.isNull}) { | ${ev.value} = $builder.parse(${eval.value}); + | ${ev.isNull} = ${ev.isNull} || (${ev.value} == null); |} """.stripMargin) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 23e71bb2f49f..404a1e742d19 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -714,4 +714,9 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Row("QqQQdddoooo") :: Row(null) :: Nil ) } + + test("SPARK-47646: try_to_number should return NULL for malformed input") { + val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11"))) + checkAnswer(df.select(try_to_number($"value", lit("$99.99"))), Seq(Row(null))) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org