Repository: spark Updated Branches: refs/heads/master 2d87a415f -> 55c4831d6
SPARK-6245 [SQL] jsonRDD() of empty RDD results in exception Avoid `UnsupportedOperationException` from JsonRDD.inferSchema on empty RDD. Not sure if this is supposed to be an error (but a better one), but it seems like this case can come up if the input is down-sampled so much that nothing is sampled. Now stuff like this: ``` sqlContext.jsonRDD(sc.parallelize(List[String]())) ``` just results in ``` org.apache.spark.sql.DataFrame = [] ``` Author: Sean Owen <so...@cloudera.com> Closes #4971 from srowen/SPARK-6245 and squashes the following commits: 3699964 [Sean Owen] Set() -> Set.empty 3c619e1 [Sean Owen] Avoid UnsupportedOperationException from JsonRDD.inferSchema on empty RDD Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55c4831d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55c4831d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55c4831d Branch: refs/heads/master Commit: 55c4831d68c8326380086b5540244f984ea9ec27 Parents: 2d87a41 Author: Sean Owen <so...@cloudera.com> Authored: Wed Mar 11 14:09:09 2015 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Wed Mar 11 14:09:09 2015 +0000 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/json/JsonRDD.scala | 6 +++++- .../src/test/scala/org/apache/spark/sql/json/JsonSuite.scala | 7 +++++++ .../test/scala/org/apache/spark/sql/json/TestJsonData.scala | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/55c4831d/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala index e54a2a3..2b0358c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala @@ -48,7 +48,11 @@ private[sql] object JsonRDD extends Logging { require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0") val schemaData = if (samplingRatio > 0.99) json else json.sample(false, samplingRatio, 1) val allKeys = - parseJson(schemaData, columnNameOfCorruptRecords).map(allKeysWithValueTypes).reduce(_ ++ _) + if (schemaData.isEmpty()) { + Set.empty[(String,DataType)] + } else { + parseJson(schemaData, columnNameOfCorruptRecords).map(allKeysWithValueTypes).reduce(_ ++ _) + } createSchema(allKeys) } http://git-wip-us.apache.org/repos/asf/spark/blob/55c4831d/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala index 0c21f72..320b80d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala @@ -1033,4 +1033,11 @@ class JsonSuite extends QueryTest { assert(!logicalRelation2.sameResult(logicalRelation3), s"$logicalRelation2 and $logicalRelation3 should be considered not having the same result.") } + + test("SPARK-6245 JsonRDD.inferSchema on empty RDD") { + // This is really a test that it doesn't throw an exception + val emptySchema = JsonRDD.inferSchema(empty, 1.0, "") + assert(StructType(Seq()) === emptySchema) + } + } http://git-wip-us.apache.org/repos/asf/spark/blob/55c4831d/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala index 15698f6..47a97a4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala @@ -185,4 +185,7 @@ object TestJsonData { """{"a":{, b:3}""" :: """{"b":"str_b_4", "a":"str_a_4", "c":"str_c_4"}""" :: """]""" :: Nil) + + val empty = + TestSQLContext.sparkContext.parallelize(Seq[String]()) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org