spark git commit: [SPARK-25884][SQL] Add TBLPROPERTIES and COMMENT, and use LOCATION when SHOW CREATE TABLE.

wenchen Wed, 31 Oct 2018 19:00:41 -0700

Repository: spark
Updated Branches:
  refs/heads/master c9667aff4 -> cc82b9fed



[SPARK-25884][SQL] Add TBLPROPERTIES and COMMENT, and use LOCATION when SHOW 
CREATE TABLE.

## What changes were proposed in this pull request?

When `SHOW CREATE TABLE` for Datasource tables, we are missing `TBLPROPERTIES` 
and `COMMENT`, and we should use `LOCATION` instead of path in `OPTION`.

## How was this patch tested?

Splitted `ShowCreateTableSuite` to confirm to work with both `InMemoryCatalog` 
and `HiveExternalCatalog`, and  added some tests.

Closes #22892 from ueshin/issues/SPARK-25884/show_create_table.

Authored-by: Takuya UESHIN <ues...@databricks.com>
Signed-off-by: Wenchen Fan <wenc...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc82b9fe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc82b9fe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc82b9fe

Branch: refs/heads/master
Commit: cc82b9fed857503448c9ae6bd74ee2fe1ba9ba0b
Parents: c9667aff
Author: Takuya UESHIN <ues...@databricks.com>
Authored: Thu Nov 1 10:00:14 2018 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Nov 1 10:00:14 2018 +0800

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    |  36 +-
 sql/core/src/test/resources/sample.json         |   2 +
 .../sql-tests/inputs/show-create-table.sql      |  61 ++++
 .../sql-tests/results/show-create-table.sql.out | 222 ++++++++++++
 .../apache/spark/sql/ShowCreateTableSuite.scala | 229 ++++++++++++
 .../spark/sql/hive/HiveExternalCatalog.scala    |   5 +-
 sql/hive/src/test/resources/sample.json         |   2 -
 .../sql/hive/HiveShowCreateTableSuite.scala     | 198 ++++++++++
 .../spark/sql/hive/ShowCreateTableSuite.scala   | 360 -------------------
 9 files changed, 734 insertions(+), 381 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 64831e5..871eba4 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -957,9 +957,11 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
       builder ++= metadata.viewText.mkString(" AS\n", "", "\n")
     } else {
       showHiveTableHeader(metadata, builder)
+      showTableComment(metadata, builder)
       showHiveTableNonDataColumns(metadata, builder)
       showHiveTableStorageInfo(metadata, builder)
-      showHiveTableProperties(metadata, builder)
+      showTableLocation(metadata, builder)
+      showTableProperties(metadata, builder)
     }
 
     builder.toString()
@@ -973,14 +975,8 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
     if (columns.nonEmpty) {
       builder ++= columns.mkString("(", ", ", ")\n")
     }
-
-    metadata
-      .comment
-      .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
-      .foreach(builder.append)
   }
 
-
   private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
     if (metadata.partitionColumnNames.nonEmpty) {
       val partCols = metadata.partitionSchema.map(_.toDDL)
@@ -1023,15 +1019,24 @@ case class ShowCreateTableCommand(table: 
TableIdentifier) extends RunnableComman
         builder ++= s"  OUTPUTFORMAT '${escapeSingleQuotedString(format)}'\n"
       }
     }
+  }
 
+  private def showTableLocation(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
     if (metadata.tableType == EXTERNAL) {
-      storage.locationUri.foreach { uri =>
-        builder ++= s"LOCATION '$uri'\n"
+      metadata.storage.locationUri.foreach { location =>
+        builder ++= s"LOCATION 
'${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
       }
     }
   }
 
-  private def showHiveTableProperties(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
+  private def showTableComment(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
+    metadata
+      .comment
+      .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
+      .foreach(builder.append)
+  }
+
+  private def showTableProperties(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
     if (metadata.properties.nonEmpty) {
       val props = metadata.properties.map { case (key, value) =>
         s"'${escapeSingleQuotedString(key)}' = 
'${escapeSingleQuotedString(value)}'"
@@ -1048,6 +1053,9 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
     showDataSourceTableDataColumns(metadata, builder)
     showDataSourceTableOptions(metadata, builder)
     showDataSourceTableNonDataColumns(metadata, builder)
+    showTableComment(metadata, builder)
+    showTableLocation(metadata, builder)
+    showTableProperties(metadata, builder)
 
     builder.toString()
   }
@@ -1063,14 +1071,6 @@ case class ShowCreateTableCommand(table: 
TableIdentifier) extends RunnableComman
 
     val dataSourceOptions = metadata.storage.properties.map {
       case (key, value) => s"${quoteIdentifier(key)} 
'${escapeSingleQuotedString(value)}'"
-    } ++ metadata.storage.locationUri.flatMap { location =>
-      if (metadata.tableType == MANAGED) {
-        // If it's a managed table, omit PATH option. Spark SQL always creates 
external table
-        // when the table creation DDL contains the PATH option.
-        None
-      } else {
-        Some(s"path 
'${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'")
-      }
     }
 
     if (dataSourceOptions.nonEmpty) {

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sample.json
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sample.json 
b/sql/core/src/test/resources/sample.json
new file mode 100644
index 0000000..a2c2ffd
--- /dev/null
+++ b/sql/core/src/test/resources/sample.json
@@ -0,0 +1,2 @@
+{"a" : "2" ,"b" : "blah", "c_!@(3)":1}
+{"<d>" : {"d!" : [4, 5], "=" : [{"Dd2": null}, {"Dd2" : true}]}}

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql 
b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
new file mode 100644
index 0000000..852bfbd
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
@@ -0,0 +1,61 @@
+-- simple
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet;
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- options
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+OPTIONS ('a' 1);
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- path option
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+OPTIONS ('path' '/path/to/table');
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- location
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+LOCATION '/path/to/table';
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- partition by
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+PARTITIONED BY (a);
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- clustered by
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS;
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- comment
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+COMMENT 'This is a comment';
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- tblproperties
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+TBLPROPERTIES ('a' = '1');
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out 
b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
new file mode 100644
index 0000000..1faf16c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -0,0 +1,222 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 24
+
+
+-- !query 0
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SHOW CREATE TABLE tbl
+-- !query 1 schema
+struct<createtab_stmt:string>
+-- !query 1 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+
+
+-- !query 2
+DROP TABLE tbl
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+OPTIONS ('a' 1)
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+SHOW CREATE TABLE tbl
+-- !query 4 schema
+struct<createtab_stmt:string>
+-- !query 4 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+OPTIONS (
+  `a` '1'
+)
+
+
+-- !query 5
+DROP TABLE tbl
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+OPTIONS ('path' '/path/to/table')
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+SHOW CREATE TABLE tbl
+-- !query 7 schema
+struct<createtab_stmt:string>
+-- !query 7 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+LOCATION 'file:/path/to/table'
+
+
+-- !query 8
+DROP TABLE tbl
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+LOCATION '/path/to/table'
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+SHOW CREATE TABLE tbl
+-- !query 10 schema
+struct<createtab_stmt:string>
+-- !query 10 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+LOCATION 'file:/path/to/table'
+
+
+-- !query 11
+DROP TABLE tbl
+-- !query 11 schema
+struct<>
+-- !query 11 output
+
+
+
+-- !query 12
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+PARTITIONED BY (a)
+-- !query 12 schema
+struct<>
+-- !query 12 output
+
+
+
+-- !query 13
+SHOW CREATE TABLE tbl
+-- !query 13 schema
+struct<createtab_stmt:string>
+-- !query 13 output
+CREATE TABLE `tbl` (`b` STRING, `c` INT, `a` INT)
+USING parquet
+PARTITIONED BY (a)
+
+
+-- !query 14
+DROP TABLE tbl
+-- !query 14 schema
+struct<>
+-- !query 14 output
+
+
+
+-- !query 15
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
+-- !query 15 schema
+struct<>
+-- !query 15 output
+
+
+
+-- !query 16
+SHOW CREATE TABLE tbl
+-- !query 16 schema
+struct<createtab_stmt:string>
+-- !query 16 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+CLUSTERED BY (a)
+SORTED BY (b)
+INTO 2 BUCKETS
+
+
+-- !query 17
+DROP TABLE tbl
+-- !query 17 schema
+struct<>
+-- !query 17 output
+
+
+
+-- !query 18
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+COMMENT 'This is a comment'
+-- !query 18 schema
+struct<>
+-- !query 18 output
+
+
+
+-- !query 19
+SHOW CREATE TABLE tbl
+-- !query 19 schema
+struct<createtab_stmt:string>
+-- !query 19 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+COMMENT 'This is a comment'
+
+
+-- !query 20
+DROP TABLE tbl
+-- !query 20 schema
+struct<>
+-- !query 20 output
+
+
+
+-- !query 21
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+TBLPROPERTIES ('a' = '1')
+-- !query 21 schema
+struct<>
+-- !query 21 output
+
+
+
+-- !query 22
+SHOW CREATE TABLE tbl
+-- !query 22 schema
+struct<createtab_stmt:string>
+-- !query 22 output
+CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+USING parquet
+TBLPROPERTIES (
+  'a' = '1'
+)
+
+
+-- !query 23
+DROP TABLE tbl
+-- !query 23 schema
+struct<>
+-- !query 23 output
+

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
new file mode 100644
index 0000000..5c347d2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.util.Utils
+
+class SimpleShowCreateTableSuite extends ShowCreateTableSuite with 
SharedSQLContext
+
+abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
+  import testImplicits._
+
+  test("data source table with user specified schema") {
+    withTable("ddl_test") {
+      val jsonFilePath = 
Utils.getSparkClassLoader.getResource("sample.json").getFile
+
+      sql(
+        s"""CREATE TABLE ddl_test (
+           |  a STRING,
+           |  b STRING,
+           |  `extra col` ARRAY<INT>,
+           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
+           |)
+           |USING json
+           |OPTIONS (
+           | PATH '$jsonFilePath'
+           |)
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("data source table CTAS") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("partitioned data source table") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |PARTITIONED BY (b)
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("bucketed data source table") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("partitioned bucketed data source table") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |PARTITIONED BY (c)
+           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("data source table with a comment") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |COMMENT 'This is a comment'
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("data source table with table properties") {
+    withTable("ddl_test") {
+      sql(
+        s"""CREATE TABLE ddl_test
+           |USING json
+           |TBLPROPERTIES ('a' = '1')
+           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
+         """.stripMargin
+      )
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("data source table using Dataset API") {
+    withTable("ddl_test") {
+      spark
+        .range(3)
+        .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd, 'id as 'e)
+        .write
+        .mode("overwrite")
+        .partitionBy("a", "b")
+        .bucketBy(2, "c", "d")
+        .saveAsTable("ddl_test")
+
+      checkCreateTable("ddl_test")
+    }
+  }
+
+  test("view") {
+    withView("v1") {
+      sql("CREATE VIEW v1 AS SELECT 1 AS a")
+      checkCreateView("v1")
+    }
+  }
+
+  test("view with output columns") {
+    withView("v1") {
+      sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
+      checkCreateView("v1")
+    }
+  }
+
+  test("SPARK-24911: keep quotes for nested fields") {
+    withTable("t1") {
+      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
+      sql(s"$createTable USING json")
+      val shownDDL = sql(s"SHOW CREATE TABLE t1")
+        .head()
+        .getString(0)
+        .split("\n")
+        .head
+      assert(shownDDL == createTable)
+
+      checkCreateTable("t1")
+    }
+  }
+
+  protected def checkCreateTable(table: String): Unit = {
+    checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE")
+  }
+
+  protected def checkCreateView(table: String): Unit = {
+    checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW")
+  }
+
+  private def checkCreateTableOrView(table: TableIdentifier, checkType: 
String): Unit = {
+    val db = table.database.getOrElse("default")
+    val expected = spark.sharedState.externalCatalog.getTable(db, table.table)
+    val shownDDL = sql(s"SHOW CREATE TABLE 
${table.quotedString}").head().getString(0)
+    sql(s"DROP $checkType ${table.quotedString}")
+
+    try {
+      sql(shownDDL)
+      val actual = spark.sharedState.externalCatalog.getTable(db, table.table)
+      checkCatalogTables(expected, actual)
+    } finally {
+      sql(s"DROP $checkType IF EXISTS ${table.table}")
+    }
+  }
+
+  private def checkCatalogTables(expected: CatalogTable, actual: 
CatalogTable): Unit = {
+    def normalize(table: CatalogTable): CatalogTable = {
+      val nondeterministicProps = Set(
+        "CreateTime",
+        "transient_lastDdlTime",
+        "grantTime",
+        "lastUpdateTime",
+        "last_modified_by",
+        "last_modified_time",
+        "Owner:",
+        // The following are hive specific schema parameters which we do not 
need to match exactly.
+        "totalNumberFiles",
+        "maxFileSize",
+        "minFileSize"
+      )
+
+      table.copy(
+        createTime = 0L,
+        lastAccessTime = 0L,
+        properties = 
table.properties.filterKeys(!nondeterministicProps.contains(_)),
+        stats = None,
+        ignoredProperties = Map.empty
+      )
+    }
+    assert(normalize(actual) == normalize(expected))
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 445161d..c1178ad 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -27,6 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DDL_TIME
 import org.apache.hadoop.hive.ql.metadata.HiveException
 import org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT
 import org.apache.thrift.TException
@@ -821,7 +822,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
       schema = reorderedSchema,
       partitionColumnNames = partColumnNames,
       bucketSpec = getBucketSpecFromTableProperties(table),
-      tracksPartitionsInCatalog = partitionProvider == 
Some(TABLE_PARTITION_PROVIDER_CATALOG))
+      tracksPartitionsInCatalog = partitionProvider == 
Some(TABLE_PARTITION_PROVIDER_CATALOG),
+      properties = 
table.properties.filterKeys(!HIVE_GENERATED_TABLE_PROPERTIES(_)))
   }
 
   override def tableExists(db: String, table: String): Boolean = withClient {
@@ -1328,6 +1330,7 @@ object HiveExternalCatalog {
 
   val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version"
 
+  val HIVE_GENERATED_TABLE_PROPERTIES = Set(DDL_TIME)
   val HIVE_GENERATED_STORAGE_PROPERTIES = Set(SERIALIZATION_FORMAT)
 
   // When storing data source tables in hive metastore, we need to set data 
schema to empty if the

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/resources/sample.json
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/sample.json 
b/sql/hive/src/test/resources/sample.json
deleted file mode 100644
index a2c2ffd..0000000
--- a/sql/hive/src/test/resources/sample.json
+++ /dev/null
@@ -1,2 +0,0 @@
-{"a" : "2" ,"b" : "blah", "c_!@(3)":1}
-{"<d>" : {"d!" : [4, 5], "=" : [{"Dd2": null}, {"Dd2" : true}]}}

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
new file mode 100644
index 0000000..0386dc7
--- /dev/null
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveShowCreateTableSuite extends ShowCreateTableSuite with 
TestHiveSingleton {
+
+  test("simple hive table") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |TBLPROPERTIES (
+           |  'prop1' = 'value1',
+           |  'prop2' = 'value2'
+           |)
+         """.stripMargin
+      )
+
+      checkCreateTable("t1")
+    }
+  }
+
+  test("simple external hive table") {
+    withTempDir { dir =>
+      withTable("t1") {
+        sql(
+          s"""CREATE TABLE t1 (
+             |  c1 INT COMMENT 'bla',
+             |  c2 STRING
+             |)
+             |LOCATION '${dir.toURI}'
+             |TBLPROPERTIES (
+             |  'prop1' = 'value1',
+             |  'prop2' = 'value2'
+             |)
+           """.stripMargin
+        )
+
+        checkCreateTable("t1")
+      }
+    }
+  }
+
+  test("partitioned hive table") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |COMMENT 'bla'
+           |PARTITIONED BY (
+           |  p1 BIGINT COMMENT 'bla',
+           |  p2 STRING
+           |)
+         """.stripMargin
+      )
+
+      checkCreateTable("t1")
+    }
+  }
+
+  test("hive table with explicit storage info") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+           |COLLECTION ITEMS TERMINATED BY '@'
+           |MAP KEYS TERMINATED BY '#'
+           |NULL DEFINED AS 'NaN'
+         """.stripMargin
+      )
+
+      checkCreateTable("t1")
+    }
+  }
+
+  test("hive table with STORED AS clause") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |STORED AS PARQUET
+         """.stripMargin
+      )
+
+      checkCreateTable("t1")
+    }
+  }
+
+  test("hive table with serde info") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |ROW FORMAT SERDE 
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+           |WITH SERDEPROPERTIES (
+           |  'mapkey.delim' = ',',
+           |  'field.delim' = ','
+           |)
+           |STORED AS
+           |  INPUTFORMAT 
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+           |  OUTPUTFORMAT 
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+         """.stripMargin
+      )
+
+      checkCreateTable("t1")
+    }
+  }
+
+  test("hive bucketing is supported") {
+    withTable("t1") {
+      sql(
+        s"""CREATE TABLE t1 (a INT, b STRING)
+           |CLUSTERED BY (a)
+           |SORTED BY (b)
+           |INTO 2 BUCKETS
+         """.stripMargin
+      )
+      checkCreateTable("t1")
+    }
+  }
+
+  test("hive partitioned view is not supported") {
+    withTable("t1") {
+      withView("v1") {
+        sql(
+          s"""
+             |CREATE TABLE t1 (c1 INT, c2 STRING)
+             |PARTITIONED BY (
+             |  p1 BIGINT COMMENT 'bla',
+             |  p2 STRING )
+           """.stripMargin)
+
+        createRawHiveTable(
+          s"""
+             |CREATE VIEW v1
+             |PARTITIONED ON (p1, p2)
+             |AS SELECT * from t1
+           """.stripMargin
+        )
+
+        val cause = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1")
+        }
+
+        assert(cause.getMessage.contains(" - partitioned view"))
+      }
+    }
+  }
+
+  test("SPARK-24911: keep quotes for nested fields in hive") {
+    withTable("t1") {
+      val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)"
+      sql(createTable)
+      val shownDDL = sql(s"SHOW CREATE TABLE t1")
+        .head()
+        .getString(0)
+        .split("\n")
+        .head
+      assert(shownDDL == createTable)
+
+      checkCreateTable("t1")
+    }
+  }
+
+  private def createRawHiveTable(ddl: String): Unit = {
+    
hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
+      .client.runSqlHive(ddl)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
deleted file mode 100644
index 34ca790..0000000
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.apache.spark.sql.{AnalysisException, QueryTest}
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.util.Utils
-
-class ShowCreateTableSuite extends QueryTest with SQLTestUtils with 
TestHiveSingleton {
-  import testImplicits._
-
-  test("data source table with user specified schema") {
-    withTable("ddl_test") {
-      val jsonFilePath = 
Utils.getSparkClassLoader.getResource("sample.json").getFile
-
-      sql(
-        s"""CREATE TABLE ddl_test (
-           |  a STRING,
-           |  b STRING,
-           |  `extra col` ARRAY<INT>,
-           |  `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
-           |)
-           |USING json
-           |OPTIONS (
-           | PATH '$jsonFilePath'
-           |)
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table CTAS") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("partitioned data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |PARTITIONED BY (b)
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("bucketed data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
-           |AS SELECT 1 AS a, "foo" AS b
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("partitioned bucketed data source table") {
-    withTable("ddl_test") {
-      sql(
-        s"""CREATE TABLE ddl_test
-           |USING json
-           |PARTITIONED BY (c)
-           |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS
-           |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c
-         """.stripMargin
-      )
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("data source table using Dataset API") {
-    withTable("ddl_test") {
-      spark
-        .range(3)
-        .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd, 'id as 'e)
-        .write
-        .mode("overwrite")
-        .partitionBy("a", "b")
-        .bucketBy(2, "c", "d")
-        .saveAsTable("ddl_test")
-
-      checkCreateTable("ddl_test")
-    }
-  }
-
-  test("simple hive table") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (
-           |  c1 INT COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |TBLPROPERTIES (
-           |  'prop1' = 'value1',
-           |  'prop2' = 'value2'
-           |)
-         """.stripMargin
-      )
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("simple external hive table") {
-    withTempDir { dir =>
-      withTable("t1") {
-        sql(
-          s"""CREATE TABLE t1 (
-             |  c1 INT COMMENT 'bla',
-             |  c2 STRING
-             |)
-             |LOCATION '${dir.toURI}'
-             |TBLPROPERTIES (
-             |  'prop1' = 'value1',
-             |  'prop2' = 'value2'
-             |)
-           """.stripMargin
-        )
-
-        checkCreateTable("t1")
-      }
-    }
-  }
-
-  test("partitioned hive table") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (
-           |  c1 INT COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |COMMENT 'bla'
-           |PARTITIONED BY (
-           |  p1 BIGINT COMMENT 'bla',
-           |  p2 STRING
-           |)
-         """.stripMargin
-      )
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("hive table with explicit storage info") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (
-           |  c1 INT COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-           |COLLECTION ITEMS TERMINATED BY '@'
-           |MAP KEYS TERMINATED BY '#'
-           |NULL DEFINED AS 'NaN'
-         """.stripMargin
-      )
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("hive table with STORED AS clause") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (
-           |  c1 INT COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |STORED AS PARQUET
-         """.stripMargin
-      )
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("hive table with serde info") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (
-           |  c1 INT COMMENT 'bla',
-           |  c2 STRING
-           |)
-           |ROW FORMAT SERDE 
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
-           |WITH SERDEPROPERTIES (
-           |  'mapkey.delim' = ',',
-           |  'field.delim' = ','
-           |)
-           |STORED AS
-           |  INPUTFORMAT 
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
-           |  OUTPUTFORMAT 
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
-         """.stripMargin
-      )
-
-      checkCreateTable("t1")
-    }
-  }
-
-  test("hive view") {
-    withView("v1") {
-      sql("CREATE VIEW v1 AS SELECT 1 AS a")
-      checkCreateView("v1")
-    }
-  }
-
-  test("hive view with output columns") {
-    withView("v1") {
-      sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
-      checkCreateView("v1")
-    }
-  }
-
-  test("hive bucketing is supported") {
-    withTable("t1") {
-      sql(
-        s"""CREATE TABLE t1 (a INT, b STRING)
-           |CLUSTERED BY (a)
-           |SORTED BY (b)
-           |INTO 2 BUCKETS
-         """.stripMargin
-      )
-      checkCreateTable("t1")
-    }
-  }
-
-  test("hive partitioned view is not supported") {
-    withTable("t1") {
-      withView("v1") {
-        sql(
-          s"""
-             |CREATE TABLE t1 (c1 INT, c2 STRING)
-             |PARTITIONED BY (
-             |  p1 BIGINT COMMENT 'bla',
-             |  p2 STRING )
-           """.stripMargin)
-
-        createRawHiveTable(
-          s"""
-             |CREATE VIEW v1
-             |PARTITIONED ON (p1, p2)
-             |AS SELECT * from t1
-           """.stripMargin
-        )
-
-        val cause = intercept[AnalysisException] {
-          sql("SHOW CREATE TABLE v1")
-        }
-
-        assert(cause.getMessage.contains(" - partitioned view"))
-      }
-    }
-  }
-
-  test("SPARK-24911: keep quotes for nested fields") {
-    withTable("t1") {
-      val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)"
-      sql(createTable)
-      val shownDDL = sql(s"SHOW CREATE TABLE t1")
-        .head()
-        .getString(0)
-        .split("\n")
-        .head
-      assert(shownDDL == createTable)
-
-      checkCreateTable("t1")
-    }
-  }
-
-  private def createRawHiveTable(ddl: String): Unit = {
-    
hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
-      .client.runSqlHive(ddl)
-  }
-
-  private def checkCreateTable(table: String): Unit = {
-    checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE")
-  }
-
-  private def checkCreateView(table: String): Unit = {
-    checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW")
-  }
-
-  private def checkCreateTableOrView(table: TableIdentifier, checkType: 
String): Unit = {
-    val db = table.database.getOrElse("default")
-    val expected = spark.sharedState.externalCatalog.getTable(db, table.table)
-    val shownDDL = sql(s"SHOW CREATE TABLE 
${table.quotedString}").head().getString(0)
-    sql(s"DROP $checkType ${table.quotedString}")
-
-    try {
-      sql(shownDDL)
-      val actual = spark.sharedState.externalCatalog.getTable(db, table.table)
-      checkCatalogTables(expected, actual)
-    } finally {
-      sql(s"DROP $checkType IF EXISTS ${table.table}")
-    }
-  }
-
-  private def checkCatalogTables(expected: CatalogTable, actual: 
CatalogTable): Unit = {
-    def normalize(table: CatalogTable): CatalogTable = {
-      val nondeterministicProps = Set(
-        "CreateTime",
-        "transient_lastDdlTime",
-        "grantTime",
-        "lastUpdateTime",
-        "last_modified_by",
-        "last_modified_time",
-        "Owner:",
-        // The following are hive specific schema parameters which we do not 
need to match exactly.
-        "totalNumberFiles",
-        "maxFileSize",
-        "minFileSize"
-      )
-
-      table.copy(
-        createTime = 0L,
-        lastAccessTime = 0L,
-        properties = 
table.properties.filterKeys(!nondeterministicProps.contains(_)),
-        stats = None,
-        ignoredProperties = Map.empty
-      )
-    }
-    assert(normalize(actual) == normalize(expected))
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25884][SQL] Add TBLPROPERTIES and COMMENT, and use LOCATION when SHOW CREATE TABLE.

Reply via email to