This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new fe2f646  [SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist 
no foldable null value
fe2f646 is described below

commit fe2f646a7ee2d9c456d9aa1a6916cc478b73dd12
Author: ulysses-you <ulyssesyo...@gmail.com>
AuthorDate: Wed Oct 13 19:36:16 2021 +0300

    [SPARK-36993][SQL] Fix json_tuple throw NPE if fields exist no foldable 
null value
    
    ### What changes were proposed in this pull request?
    
    Wrap `expr.eval(input)` with Option in `json_tuple`.
    
    ### Why are the changes needed?
    
    If json_tuple exists no foldable null field, Spark would throw NPE during 
eval field.toString.
    
    e.g. the query will fail with:
    ```SQL
    SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() 
AS c1 );
    ```
    
    ```
    Caused by: java.lang.NullPointerException
        at 
org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$parseRow$2(jsonExpressions.scala:435)
        at 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
        at 
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
        at 
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
        at scala.collection.TraversableLike.map(TraversableLike.scala:286)
        at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
        at scala.collection.AbstractTraversable.map(Traversable.scala:108)
        at 
org.apache.spark.sql.catalyst.expressions.JsonTuple.parseRow(jsonExpressions.scala:435)
        at 
org.apache.spark.sql.catalyst.expressions.JsonTuple.$anonfun$eval$6(jsonExpressions.scala:413)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, bug fix.
    
    ### How was this patch tested?
    
    add test in `json-functions.sql`.
    
    Closes #34268 from ulysses-you/SPARK-36993.
    
    Authored-by: ulysses-you <ulyssesyo...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit 7aedce44b73d9b0c56863f970257abf52ce551ce)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../sql/catalyst/expressions/jsonExpressions.scala     |  7 +++++--
 .../test/resources/sql-tests/inputs/json-functions.sql |  4 ++++
 .../resources/sql-tests/results/json-functions.sql.out | 18 +++++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index a363615..5abac01 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -426,12 +426,15 @@ case class JsonTuple(children: Seq[Expression])
       foldableFieldNames.map(_.orNull)
     } else if (constantFields == 0) {
       // none are foldable so all field names need to be evaluated from the 
input row
-      fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String].toString)
+      fieldExpressions.map { expr =>
+        
Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
+      }
     } else {
       // if there is a mix of constant and non-constant expressions
       // prefer the cached copy when available
       foldableFieldNames.zip(fieldExpressions).map {
-        case (null, expr) => expr.eval(input).asInstanceOf[UTF8String].toString
+        case (null, expr) =>
+          
Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
         case (fieldName, _) => fieldName.orNull
       }
     }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
index f6fa441..245a6a6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
@@ -25,6 +25,10 @@ select from_json();
 SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL 
AS STRING), 'a');
 CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 
1, "b": 2}', 'a');
 SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable;
+-- json_tuple exists no foldable null field
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS 
c1 );
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) 
FROM ( SELECT 0 AS c1, rand() AS c2 );
+
 -- Clean up
 DROP VIEW IF EXISTS jsonTable;
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index b14e3e1..138e70c 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 71
+-- Number of queries: 73
 
 
 -- !query
@@ -192,6 +192,22 @@ struct<c0:string,c1:string,c2:string>
 
 
 -- !query
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a')) FROM ( SELECT rand() AS 
c1 )
+-- !query schema
+struct<c0:string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT json_tuple('{"a":"1"}', if(c1 < 1, null, 'a'), if(c2 < 1, null, 'a')) 
FROM ( SELECT 0 AS c1, rand() AS c2 )
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+NULL   NULL
+
+
+-- !query
 DROP VIEW IF EXISTS jsonTable
 -- !query schema
 struct<>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to