spark git commit: [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)

2016-11-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 9873d57f2 -> 4af82d56f


[SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)

## What changes were proposed in this pull request?

This PR proposes to match up the behaviour of `to_json` to `from_json` function 
for null-safety.

Currently, it throws `NullPointException` but this PR fixes this to produce 
`null` instead.

with the data below:

```scala
import spark.implicits._

val df = Seq(Some(Tuple1(Tuple1(1))), None).toDF("a")
df.show()
```

```
++
|   a|
++
| [1]|
|null|
++
```

the codes below

```scala
import org.apache.spark.sql.functions._

df.select(to_json($"a")).show()
```

produces..

**Before**

throws `NullPointException` as below:

```
java.lang.NullPointerException
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:138)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:194)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:131)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:193)
  at 
org.apache.spark.sql.catalyst.expressions.StructToJson.eval(jsonExpressions.scala:544)
  at 
org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:142)
  at 
org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48)
  at 
org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30)
  at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
```

**After**

```
+---+
|structtojson(a)|
+---+
|   {"_1":1}|
|   null|
+---+
```

## How was this patch tested?

Unit test in `JsonExpressionsSuite.scala` and `JsonFunctionsSuite.scala`.

Author: hyukjinkwon 

Closes #15792 from HyukjinKwon/SPARK-18295.

(cherry picked from commit 3eda05703f02413540f180ade01f0f114e70b9cc)
Signed-off-by: Michael Armbrust 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4af82d56
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4af82d56
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4af82d56

Branch: refs/heads/branch-2.1
Commit: 4af82d56f79ac3cceb08b702413ae2b35dfea48b
Parents: 9873d57
Author: hyukjinkwon 
Authored: Mon Nov 7 16:54:40 2016 -0800
Committer: Michael Armbrust 
Committed: Mon Nov 7 16:54:57 2016 -0800

--
 .../sql/catalyst/expressions/jsonExpressions.scala| 14 +-
 .../catalyst/expressions/JsonExpressionsSuite.scala   | 13 +++--
 .../org/apache/spark/sql/JsonFunctionsSuite.scala | 14 ++
 3 files changed, 30 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4af82d56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 89fe7c4..b61583d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -484,7 +484,7 @@ case class JsonTuple(children: Seq[Expression])
  * Converts an json input string to a [[StructType]] with the specified schema.
  */
 case class JsonToStruct(schema: StructType, options: Map[String, String], 
child: Expression)
-  extends Expression with CodegenFallback with ExpectsInputTypes {
+  extends UnaryExpression with CodegenFallback with ExpectsInputTypes {
   override def nullable: Boolean = true
 
   @transient
@@ -495,11 +495,8 @@ case class JsonToStruct(schema: StructType, options: 
Map[String, String], child:
   new JSONOptions(options ++ Map("mode" -> ParseModes.FAIL_FAST_MODE)))
 
   override def dataType: DataType = schema
-  override def children: Seq[Expression] = child :: Nil
 
-  override def eval(input: InternalRow): Any = {
-val json = child.eval(input)
-if (json == null) return null
+  override def nullSafeEval(json: Any): Any = {
 try parser.parse(json.toString).head catch {
   case _: SparkSQLJsonProcessingException => null
 }
@@ -512,7 +509,7 @@ case class JsonToStruct(schema: StructType, options: 
Map[String, String], child:
  * Converts a [[StructType]] to a json output string.
  */
 case 

spark git commit: [SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)

2016-11-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 3a710b94b -> 3eda05703


[SPARK-18295][SQL] Make to_json function null safe (matching it to from_json)

## What changes were proposed in this pull request?

This PR proposes to match up the behaviour of `to_json` to `from_json` function 
for null-safety.

Currently, it throws `NullPointException` but this PR fixes this to produce 
`null` instead.

with the data below:

```scala
import spark.implicits._

val df = Seq(Some(Tuple1(Tuple1(1))), None).toDF("a")
df.show()
```

```
++
|   a|
++
| [1]|
|null|
++
```

the codes below

```scala
import org.apache.spark.sql.functions._

df.select(to_json($"a")).show()
```

produces..

**Before**

throws `NullPointException` as below:

```
java.lang.NullPointerException
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:138)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:194)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:131)
  at 
org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:193)
  at 
org.apache.spark.sql.catalyst.expressions.StructToJson.eval(jsonExpressions.scala:544)
  at 
org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:142)
  at 
org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48)
  at 
org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30)
  at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
```

**After**

```
+---+
|structtojson(a)|
+---+
|   {"_1":1}|
|   null|
+---+
```

## How was this patch tested?

Unit test in `JsonExpressionsSuite.scala` and `JsonFunctionsSuite.scala`.

Author: hyukjinkwon 

Closes #15792 from HyukjinKwon/SPARK-18295.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3eda0570
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3eda0570
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3eda0570

Branch: refs/heads/master
Commit: 3eda05703f02413540f180ade01f0f114e70b9cc
Parents: 3a710b9
Author: hyukjinkwon 
Authored: Mon Nov 7 16:54:40 2016 -0800
Committer: Michael Armbrust 
Committed: Mon Nov 7 16:54:40 2016 -0800

--
 .../sql/catalyst/expressions/jsonExpressions.scala| 14 +-
 .../catalyst/expressions/JsonExpressionsSuite.scala   | 13 +++--
 .../org/apache/spark/sql/JsonFunctionsSuite.scala | 14 ++
 3 files changed, 30 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3eda0570/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 89fe7c4..b61583d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -484,7 +484,7 @@ case class JsonTuple(children: Seq[Expression])
  * Converts an json input string to a [[StructType]] with the specified schema.
  */
 case class JsonToStruct(schema: StructType, options: Map[String, String], 
child: Expression)
-  extends Expression with CodegenFallback with ExpectsInputTypes {
+  extends UnaryExpression with CodegenFallback with ExpectsInputTypes {
   override def nullable: Boolean = true
 
   @transient
@@ -495,11 +495,8 @@ case class JsonToStruct(schema: StructType, options: 
Map[String, String], child:
   new JSONOptions(options ++ Map("mode" -> ParseModes.FAIL_FAST_MODE)))
 
   override def dataType: DataType = schema
-  override def children: Seq[Expression] = child :: Nil
 
-  override def eval(input: InternalRow): Any = {
-val json = child.eval(input)
-if (json == null) return null
+  override def nullSafeEval(json: Any): Any = {
 try parser.parse(json.toString).head catch {
   case _: SparkSQLJsonProcessingException => null
 }
@@ -512,7 +509,7 @@ case class JsonToStruct(schema: StructType, options: 
Map[String, String], child:
  * Converts a [[StructType]] to a json output string.
  */
 case class StructToJson(options: Map[String, String], child: Expression)
-  extends Expression with CodegenFallback with ExpectsInputTypes