spark git commit: [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)
Repository: spark Updated Branches: refs/heads/branch-2.1 9873d57f2 -> 4af82d56f [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json) ## What changes were proposed in this pull request? This PR proposes to match up the behaviour of `to_json` to `from_json` function for null-safety. Currently, it throws `NullPointException` but this PR fixes this to produce `null` instead. with the data below: ```scala import spark.implicits._ val df = Seq(Some(Tuple1(Tuple1(1))), None).toDF("a") df.show() ``` ``` ++ | a| ++ | [1]| |null| ++ ``` the codes below ```scala import org.apache.spark.sql.functions._ df.select(to_json($"a")).show() ``` produces.. **Before** throws `NullPointException` as below: ``` java.lang.NullPointerException at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:138) at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:194) at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:131) at org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:193) at org.apache.spark.sql.catalyst.expressions.StructToJson.eval(jsonExpressions.scala:544) at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:142) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) ``` **After** ``` +---+ |structtojson(a)| +---+ | {"_1":1}| | null| +---+ ``` ## How was this patch tested? Unit test in `JsonExpressionsSuite.scala` and `JsonFunctionsSuite.scala`. Author: hyukjinkwonCloses #15792 from HyukjinKwon/SPARK-18295. (cherry picked from commit 3eda05703f02413540f180ade01f0f114e70b9cc) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4af82d56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4af82d56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4af82d56 Branch: refs/heads/branch-2.1 Commit: 4af82d56f79ac3cceb08b702413ae2b35dfea48b Parents: 9873d57 Author: hyukjinkwon Authored: Mon Nov 7 16:54:40 2016 -0800 Committer: Michael Armbrust Committed: Mon Nov 7 16:54:57 2016 -0800 -- .../sql/catalyst/expressions/jsonExpressions.scala| 14 +- .../catalyst/expressions/JsonExpressionsSuite.scala | 13 +++-- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 14 ++ 3 files changed, 30 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4af82d56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 89fe7c4..b61583d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -484,7 +484,7 @@ case class JsonTuple(children: Seq[Expression]) * Converts an json input string to a [[StructType]] with the specified schema. */ case class JsonToStruct(schema: StructType, options: Map[String, String], child: Expression) - extends Expression with CodegenFallback with ExpectsInputTypes { + extends UnaryExpression with CodegenFallback with ExpectsInputTypes { override def nullable: Boolean = true @transient @@ -495,11 +495,8 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: new JSONOptions(options ++ Map("mode" -> ParseModes.FAIL_FAST_MODE))) override def dataType: DataType = schema - override def children: Seq[Expression] = child :: Nil - override def eval(input: InternalRow): Any = { -val json = child.eval(input) -if (json == null) return null + override def nullSafeEval(json: Any): Any = { try parser.parse(json.toString).head catch { case _: SparkSQLJsonProcessingException => null } @@ -512,7 +509,7 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: * Converts a [[StructType]] to a json output string. */ case
spark git commit: [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)
Repository: spark Updated Branches: refs/heads/master 3a710b94b -> 3eda05703 [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json) ## What changes were proposed in this pull request? This PR proposes to match up the behaviour of `to_json` to `from_json` function for null-safety. Currently, it throws `NullPointException` but this PR fixes this to produce `null` instead. with the data below: ```scala import spark.implicits._ val df = Seq(Some(Tuple1(Tuple1(1))), None).toDF("a") df.show() ``` ``` ++ | a| ++ | [1]| |null| ++ ``` the codes below ```scala import org.apache.spark.sql.functions._ df.select(to_json($"a")).show() ``` produces.. **Before** throws `NullPointException` as below: ``` java.lang.NullPointerException at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:138) at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:194) at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:131) at org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:193) at org.apache.spark.sql.catalyst.expressions.StructToJson.eval(jsonExpressions.scala:544) at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:142) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48) at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) ``` **After** ``` +---+ |structtojson(a)| +---+ | {"_1":1}| | null| +---+ ``` ## How was this patch tested? Unit test in `JsonExpressionsSuite.scala` and `JsonFunctionsSuite.scala`. Author: hyukjinkwonCloses #15792 from HyukjinKwon/SPARK-18295. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3eda0570 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3eda0570 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3eda0570 Branch: refs/heads/master Commit: 3eda05703f02413540f180ade01f0f114e70b9cc Parents: 3a710b9 Author: hyukjinkwon Authored: Mon Nov 7 16:54:40 2016 -0800 Committer: Michael Armbrust Committed: Mon Nov 7 16:54:40 2016 -0800 -- .../sql/catalyst/expressions/jsonExpressions.scala| 14 +- .../catalyst/expressions/JsonExpressionsSuite.scala | 13 +++-- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 14 ++ 3 files changed, 30 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3eda0570/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 89fe7c4..b61583d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -484,7 +484,7 @@ case class JsonTuple(children: Seq[Expression]) * Converts an json input string to a [[StructType]] with the specified schema. */ case class JsonToStruct(schema: StructType, options: Map[String, String], child: Expression) - extends Expression with CodegenFallback with ExpectsInputTypes { + extends UnaryExpression with CodegenFallback with ExpectsInputTypes { override def nullable: Boolean = true @transient @@ -495,11 +495,8 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: new JSONOptions(options ++ Map("mode" -> ParseModes.FAIL_FAST_MODE))) override def dataType: DataType = schema - override def children: Seq[Expression] = child :: Nil - override def eval(input: InternalRow): Any = { -val json = child.eval(input) -if (json == null) return null + override def nullSafeEval(json: Any): Any = { try parser.parse(json.toString).head catch { case _: SparkSQLJsonProcessingException => null } @@ -512,7 +509,7 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: * Converts a [[StructType]] to a json output string. */ case class StructToJson(options: Map[String, String], child: Expression) - extends Expression with CodegenFallback with ExpectsInputTypes