Repository: spark
Updated Branches:
  refs/heads/master 912634b00 -> 2a4dd6f06


[SPARK-24681][SQL] Verify nested column names in Hive metastore

## What changes were proposed in this pull request?
This pr added code to check if nested column names do not include ',', ':', and 
';' because Hive metastore can't handle these characters in nested column names;
ref: 
https://github.com/apache/hive/blob/release-1.2.1/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java#L239

## How was this patch tested?
Added tests in `HiveDDLSuite`.

Author: Takeshi Yamamuro <yamam...@apache.org>

Closes #21711 from maropu/SPARK-24681.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a4dd6f0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a4dd6f0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a4dd6f0

Branch: refs/heads/master
Commit: 2a4dd6f06cfd2f58fda9786c88809e6de695444e
Parents: 912634b
Author: Takeshi Yamamuro <yamam...@apache.org>
Authored: Tue Jul 17 14:15:30 2018 -0700
Committer: Xiao Li <gatorsm...@gmail.com>
Committed: Tue Jul 17 14:15:30 2018 -0700

----------------------------------------------------------------------
 .../spark/sql/hive/HiveExternalCatalog.scala    | 34 ++++++++++++++++----
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 19 +++++++++++
 2 files changed, 46 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2a4dd6f0/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 44480ce..7f28fc4 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -138,17 +138,37 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
   }
 
   /**
-   * Checks the validity of data column names. Hive metastore disallows the 
table to use comma in
-   * data column names. Partition columns do not have such a restriction. 
Views do not have such
-   * a restriction.
+   * Checks the validity of data column names. Hive metastore disallows the 
table to use some
+   * special characters (',', ':', and ';') in data column names, including 
nested column names.
+   * Partition columns do not have such a restriction. Views do not have such 
a restriction.
    */
   private def verifyDataSchema(
       tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: 
StructType): Unit = {
     if (tableType != VIEW) {
-      dataSchema.map(_.name).foreach { colName =>
-        if (colName.contains(",")) {
-          throw new AnalysisException("Cannot create a table having a column 
whose name contains " +
-            s"commas in Hive metastore. Table: $tableName; Column: $colName")
+      val invalidChars = Seq(",", ":", ";")
+      def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { 
f =>
+        f.dataType match {
+          case st: StructType => verifyNestedColumnNames(st)
+          case _ if invalidChars.exists(f.name.contains) =>
+            val invalidCharsString = invalidChars.map(c => 
s"'$c'").mkString(", ")
+            val errMsg = "Cannot create a table having a nested column whose 
name contains " +
+              s"invalid characters ($invalidCharsString) in Hive metastore. 
Table: $tableName; " +
+              s"Column: ${f.name}"
+            throw new AnalysisException(errMsg)
+          case _ =>
+        }
+      }
+
+      dataSchema.foreach { f =>
+        f.dataType match {
+          // Checks top-level column names
+          case _ if f.name.contains(",") =>
+            throw new AnalysisException("Cannot create a table having a column 
whose name " +
+              s"contains commas in Hive metastore. Table: $tableName; Column: 
${f.name}")
+          // Checks nested column names
+          case st: StructType =>
+            verifyNestedColumnNames(st)
+          case _ =>
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/2a4dd6f0/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 0341c3b..31fd4c5 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, 
TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, 
CONVERT_METASTORE_PARQUET}
 import org.apache.spark.sql.hive.orc.OrcFileOperator
@@ -2248,4 +2249,22 @@ class HiveDDLSuite
       checkAnswer(spark.table("t4"), Row(0, 0))
     }
   }
+
+  test("SPARK-24681 checks if nested column names do not include ',', ':', and 
';'") {
+    val expectedMsg = "Cannot create a table having a nested column whose name 
contains invalid " +
+      "characters (',', ':', ';') in Hive metastore."
+
+    Seq("nested,column", "nested:column", "nested;column").foreach { 
nestedColumnName =>
+      withTable("t") {
+        val e = intercept[AnalysisException] {
+          spark.range(1)
+            .select(struct(lit(0).as(nestedColumnName)).as("toplevel"))
+            .write
+            .format("hive")
+            .saveAsTable("t")
+        }.getMessage
+        assert(e.contains(expectedMsg))
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to