spark git commit: [SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types

marmbrus Thu, 12 Nov 2015 12:30:20 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 ecf027edd -> 68fa5c713



[SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types

Parquet supports some unsigned datatypes. However, Since Spark does not support 
unsigned datatypes, it needs to emit an exception with a clear message rather 
then with the one saying illegal datatype.

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #9646 from HyukjinKwon/SPARK-10113.

(cherry picked from commit f5a9526fec284cccd0755d190c91e8d9999f7877)
Signed-off-by: Michael Armbrust <mich...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68fa5c71
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68fa5c71
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68fa5c71

Branch: refs/heads/branch-1.6
Commit: 68fa5c7133f511de16a3bde3e4614405ff49019b
Parents: ecf027e
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Thu Nov 12 12:29:50 2015 -0800
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Thu Nov 12 12:30:03 2015 -0800

----------------------------------------------------------------------
 .../parquet/CatalystSchemaConverter.scala       |  7 ++++++
 .../datasources/parquet/ParquetIOSuite.scala    | 24 ++++++++++++++++++++
 2 files changed, 31 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/68fa5c71/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
index 7f3394c..f28a18e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
@@ -108,6 +108,9 @@ private[parquet] class CatalystSchemaConverter(
     def typeString =
       if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
 
+    def typeNotSupported() =
+      throw new AnalysisException(s"Parquet type not supported: $typeString")
+
     def typeNotImplemented() =
       throw new AnalysisException(s"Parquet type not yet supported: 
$typeString")
 
@@ -142,6 +145,9 @@ private[parquet] class CatalystSchemaConverter(
           case INT_32 | null => IntegerType
           case DATE => DateType
           case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT32)
+          case UINT_8 => typeNotSupported()
+          case UINT_16 => typeNotSupported()
+          case UINT_32 => typeNotSupported()
           case TIME_MILLIS => typeNotImplemented()
           case _ => illegalType()
         }
@@ -150,6 +156,7 @@ private[parquet] class CatalystSchemaConverter(
         originalType match {
           case INT_64 | null => LongType
           case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT64)
+          case UINT_64 => typeNotSupported()
           case TIMESTAMP_MILLIS => typeNotImplemented()
           case _ => illegalType()
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/68fa5c71/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 7274479..82a42d6 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -206,6 +206,30 @@ class ParquetIOSuite extends QueryTest with ParquetTest 
with SharedSQLContext {
     }
   }
 
+  test("SPARK-10113 Support for unsigned Parquet logical types") {
+    val parquetSchema = MessageTypeParser.parseMessageType(
+      """message root {
+        |  required int32 c(UINT_32);
+        |}
+      """.stripMargin)
+
+    withTempPath { location =>
+      val extraMetadata = Map.empty[String, String].asJava
+      val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, 
"Spark")
+      val path = new Path(location.getCanonicalPath)
+      val footer = List(
+        new Footer(path, new ParquetMetadata(fileMetadata, 
Collections.emptyList()))
+      ).asJava
+
+      ParquetFileWriter.writeMetadataFile(sparkContext.hadoopConfiguration, 
path, footer)
+
+      val errorMessage = intercept[Throwable] {
+        sqlContext.read.parquet(path.toString).printSchema()
+      }.toString
+      assert(errorMessage.contains("Parquet type not supported"))
+    }
+  }
+
   test("compression codec") {
     def compressionCodecFor(path: String, codecName: String): String = {
       val codecs = for {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types

Reply via email to