This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new fe2f646 [SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist no foldable null value fe2f646 is described below commit fe2f646a7ee2d9c456d9aa1a6916cc478b73dd12 Author: ulysses-you <ulyssesyo...@gmail.com> AuthorDate: Wed Oct 13 19:36:16 2021 +0300 [SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist no foldable null value ### What changes were proposed in this pull request? Wrap `expr.eval(input)` with Option in `json_tuple`. ### Why are the changes needed? If json_tuple exists no foldable null field, Spark would throw NPE during eval field.toString. e.g. the query will fail with: ```SQL SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 ); ``` ``` Caused by: java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$parseRow$2(jsonExpressions.scala:435) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.sql.catalyst.expressions.JsonTuple.parseRow(jsonExpressions.scala:435) at org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$eval$6(jsonExpressions.scala:413) ``` ### Does this PR introduce _any_ user-facing change? yes, bug fix. ### How was this patch tested? add test in `json-functions.sql`. Closes #34268 from ulysses-you/SPARK-36993. Authored-by: ulysses-you <ulyssesyo...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> (cherry picked from commit 7aedce44b73d9b0c56863f970257abf52ce551ce) Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/expressions/jsonExpressions.scala | 7 +++++-- .../test/resources/sql-tests/inputs/json-functions.sql | 4 ++++ .../resources/sql-tests/results/json-functions.sql.out | 18 +++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index a363615..5abac01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -426,12 +426,15 @@ case class JsonTuple(children: Seq[Expression]) foldableFieldNames.map(_.orNull) } else if (constantFields == 0) { // none are foldable so all field names need to be evaluated from the input row - fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString) + fieldExpressions.map { expr => + Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull + } } else { // if there is a mix of constant and non-constant expressions // prefer the cached copy when available foldableFieldNames.zip(fieldExpressions).map { - case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString + case (null, expr) => + Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull case (fieldName, _) => fieldName.orNull } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql index f6fa441..245a6a6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql @@ -25,6 +25,10 @@ select from_json(); SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a'); CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'); SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable; +-- json_tuple exists no foldable null field +SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 ); +SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) FROM ( SELECT 0 AS c1, rand() AS c2 ); + -- Clean up DROP VIEW IF EXISTS jsonTable; diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index b14e3e1..138e70c 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 71 +-- Number of queries: 73 -- !query @@ -192,6 +192,22 @@ struct<c0:string,c1:string,c2:string> -- !query +SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS c1 ) +-- !query schema +struct<c0:string> +-- !query output +NULL + + +-- !query +SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) FROM ( SELECT 0 AS c1, rand() AS c2 ) +-- !query schema +struct<c0:string,c1:string> +-- !query output +NULL NULL + + +-- !query DROP VIEW IF EXISTS jsonTable -- !query schema struct<> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org