Repository: spark
Updated Branches:
  refs/heads/master c939c70ac -> 139c15b62


[SPARK-11694][SQL] Parquet logical types are not being tested properly

All the physical types are properly tested at `ParquetIOSuite` but logical type 
mapping is not being tested.

Author: hyukjinkwon <gurwls...@gmail.com>
Author: Hyukjin Kwon <gurwls...@gmail.com>

Closes #9660 from HyukjinKwon/SPARK-11694.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/139c15b6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/139c15b6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/139c15b6

Branch: refs/heads/master
Commit: 139c15b624c88b376ffdd05d78795295c8c4fc17
Parents: c939c70
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Sat Nov 14 18:36:01 2015 +0800
Committer: Cheng Lian <l...@databricks.com>
Committed: Sat Nov 14 18:36:01 2015 +0800

----------------------------------------------------------------------
 .../datasources/parquet/ParquetIOSuite.scala    | 39 +++++++++++++++-----
 .../datasources/parquet/ParquetTest.scala       | 17 +++++++++
 2 files changed, 47 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/139c15b6/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 82a42d6..78df363 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -91,6 +91,33 @@ class ParquetIOSuite extends QueryTest with ParquetTest with 
SharedSQLContext {
     }
   }
 
+  test("SPARK-11694 Parquet logical types are not being tested properly") {
+    val parquetSchema = MessageTypeParser.parseMessageType(
+      """message root {
+        |  required int32 a(INT_8);
+        |  required int32 b(INT_16);
+        |  required int32 c(DATE);
+        |  required int32 d(DECIMAL(1,0));
+        |  required int64 e(DECIMAL(10,0));
+        |  required binary f(UTF8);
+        |  required binary g(ENUM);
+        |  required binary h(DECIMAL(32,0));
+        |  required fixed_len_byte_array(32) i(DECIMAL(32,0));
+        |}
+      """.stripMargin)
+
+    val expectedSparkTypes = Seq(ByteType, ShortType, DateType, DecimalType(1, 
0),
+      DecimalType(10, 0), StringType, StringType, DecimalType(32, 0), 
DecimalType(32, 0))
+
+    withTempPath { location =>
+      val path = new Path(location.getCanonicalPath)
+      val conf = sparkContext.hadoopConfiguration
+      writeMetadata(parquetSchema, path, conf)
+      val sparkTypes = 
sqlContext.read.parquet(path.toString).schema.map(_.dataType)
+      assert(sparkTypes === expectedSparkTypes)
+    }
+  }
+
   test("string") {
     val data = (1 to 4).map(i => Tuple1(i.toString))
     // Property spark.sql.parquet.binaryAsString shouldn't affect Parquet 
files written by Spark SQL
@@ -374,16 +401,10 @@ class ParquetIOSuite extends QueryTest with ParquetTest 
with SharedSQLContext {
       """.stripMargin)
 
     withTempPath { location =>
-      val extraMetadata = Collections.singletonMap(
-        CatalystReadSupport.SPARK_METADATA_KEY, sparkSchema.toString)
-      val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, 
"Spark")
+      val extraMetadata = Map(CatalystReadSupport.SPARK_METADATA_KEY -> 
sparkSchema.toString)
       val path = new Path(location.getCanonicalPath)
-
-      ParquetFileWriter.writeMetadataFile(
-        sparkContext.hadoopConfiguration,
-        path,
-        Collections.singletonList(
-          new Footer(path, new ParquetMetadata(fileMetadata, 
Collections.emptyList()))))
+      val conf = sparkContext.hadoopConfiguration
+      writeMetadata(parquetSchema, path, conf, extraMetadata)
 
       assertResult(sqlContext.read.parquet(path.toString).schema) {
         StructType(

http://git-wip-us.apache.org/repos/asf/spark/blob/139c15b6/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
index 8ffb01f..fdd7697 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
 
+import org.apache.parquet.schema.MessageType
+
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
@@ -117,6 +119,21 @@ private[sql] trait ParquetTest extends SQLTestUtils {
     ParquetFileWriter.writeMetadataFile(configuration, path, 
Seq(footer).asJava)
   }
 
+  /**
+   * This is an overloaded version of `writeMetadata` above to allow writing 
customized
+   * Parquet schema.
+   */
+  protected def writeMetadata(
+      parquetSchema: MessageType, path: Path, configuration: Configuration,
+      extraMetadata: Map[String, String] = Map.empty[String, String]): Unit = {
+    val extraMetadataAsJava = extraMetadata.asJava
+    val createdBy = s"Apache Spark ${org.apache.spark.SPARK_VERSION}"
+    val fileMetadata = new FileMetaData(parquetSchema, extraMetadataAsJava, 
createdBy)
+    val parquetMetadata = new ParquetMetadata(fileMetadata, 
Seq.empty[BlockMetaData].asJava)
+    val footer = new Footer(path, parquetMetadata)
+    ParquetFileWriter.writeMetadataFile(configuration, path, 
Seq(footer).asJava)
+  }
+
   protected def readAllFootersWithoutSummaryFiles(
       path: Path, configuration: Configuration): Seq[Footer] = {
     val fs = path.getFileSystem(configuration)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to