spark git commit: [SPARK-9894] [SQL] Json writer should handle MapData.

marmbrus Wed, 12 Aug 2015 16:45:57 -0700

Repository: spark
Updated Branches:
  refs/heads/master ab7e721cf -> 7035d880a



[SPARK-9894] [SQL] Json writer should handle MapData.

https://issues.apache.org/jira/browse/SPARK-9894

Author: Yin Huai <yh...@databricks.com>

Closes #8137 from yhuai/jsonMapData.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7035d880
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7035d880
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7035d880

Branch: refs/heads/master
Commit: 7035d880a0cf06910c19b4afd49645124c620f14
Parents: ab7e721
Author: Yin Huai <yh...@databricks.com>
Authored: Wed Aug 12 16:45:15 2015 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Wed Aug 12 16:45:15 2015 -0700

----------------------------------------------------------------------
 .../datasources/json/JacksonGenerator.scala     | 10 +--
 .../sql/sources/JsonHadoopFsRelationSuite.scala | 78 ++++++++++++++++++++
 .../SimpleTextHadoopFsRelationSuite.scala       | 30 --------
 3 files changed, 83 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7035d880/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonGenerator.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonGenerator.scala
index 37c2b5a..99ac773 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonGenerator.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonGenerator.scala
@@ -107,12 +107,12 @@ private[sql] object JacksonGenerator {
         v.foreach(ty, (_, value) => valWriter(ty, value))
         gen.writeEndArray()
 
-      case (MapType(kv, vv, _), v: Map[_, _]) =>
+      case (MapType(kt, vt, _), v: MapData) =>
         gen.writeStartObject()
-        v.foreach { p =>
-          gen.writeFieldName(p._1.toString)
-          valWriter(vv, p._2)
-        }
+        v.foreach(kt, vt, { (k, v) =>
+          gen.writeFieldName(k.toString)
+          valWriter(vt, v)
+        })
         gen.writeEndObject()
 
       case (StructType(ty), v: InternalRow) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/7035d880/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
new file mode 100644
index 0000000..ed6d512
--- /dev/null
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+
+class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
+  override val dataSourceName: String = "json"
+
+  import sqlContext._
+
+  test("save()/load() - partitioned table - simple queries - partition columns 
in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
+          .saveAsTextFile(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, 
nullable = true))
+
+      checkQueries(
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchemaWithPartition.json)
+          .load(file.getCanonicalPath))
+    }
+  }
+
+  test("SPARK-9894: save complex types to JSON") {
+    withTempDir { file =>
+      file.delete()
+
+      val schema =
+        new StructType()
+          .add("array", ArrayType(LongType))
+          .add("map", MapType(StringType, new StructType().add("innerField", 
LongType)))
+
+      val data =
+        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
+          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
+      val df = createDataFrame(sparkContext.parallelize(data), schema)
+
+      // Write the data out.
+      df.write.format(dataSourceName).save(file.getCanonicalPath)
+
+      // Read it back and check the result.
+      checkAnswer(
+        read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
+        df
+      )
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/7035d880/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
index 48c37a1..e8975e5 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
@@ -50,33 +50,3 @@ class SimpleTextHadoopFsRelationSuite extends 
HadoopFsRelationTest {
     }
   }
 }
-
-class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
-  override val dataSourceName: String =
-    
classOf[org.apache.spark.sql.execution.datasources.json.DefaultSource].getCanonicalName
-
-  import sqlContext._
-
-  test("save()/load() - partitioned table - simple queries - partition columns 
in data") {
-    withTempDir { file =>
-      val basePath = new Path(file.getCanonicalPath)
-      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
-      val qualifiedBasePath = fs.makeQualified(basePath)
-
-      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
-        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
-        sparkContext
-          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
-          .saveAsTextFile(partitionDir.toString)
-      }
-
-      val dataSchemaWithPartition =
-        StructType(dataSchema.fields :+ StructField("p1", IntegerType, 
nullable = true))
-
-      checkQueries(
-        read.format(dataSourceName)
-          .option("dataSchema", dataSchemaWithPartition.json)
-          .load(file.getCanonicalPath))
-    }
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9894] [SQL] Json writer should handle MapData.

Reply via email to