Repository: spark Updated Branches: refs/heads/master c9667aff4 -> cc82b9fed
[SPARK-25884][SQL] Add TBLPROPERTIES and COMMENT, and use LOCATION when SHOW CREATE TABLE. ## What changes were proposed in this pull request? When `SHOW CREATE TABLE` for Datasource tables, we are missing `TBLPROPERTIES` and `COMMENT`, and we should use `LOCATION` instead of path in `OPTION`. ## How was this patch tested? Splitted `ShowCreateTableSuite` to confirm to work with both `InMemoryCatalog` and `HiveExternalCatalog`, and added some tests. Closes #22892 from ueshin/issues/SPARK-25884/show_create_table. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc82b9fe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc82b9fe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc82b9fe Branch: refs/heads/master Commit: cc82b9fed857503448c9ae6bd74ee2fe1ba9ba0b Parents: c9667aff Author: Takuya UESHIN <ues...@databricks.com> Authored: Thu Nov 1 10:00:14 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Nov 1 10:00:14 2018 +0800 ---------------------------------------------------------------------- .../spark/sql/execution/command/tables.scala | 36 +- sql/core/src/test/resources/sample.json | 2 + .../sql-tests/inputs/show-create-table.sql | 61 ++++ .../sql-tests/results/show-create-table.sql.out | 222 ++++++++++++ .../apache/spark/sql/ShowCreateTableSuite.scala | 229 ++++++++++++ .../spark/sql/hive/HiveExternalCatalog.scala | 5 +- sql/hive/src/test/resources/sample.json | 2 - .../sql/hive/HiveShowCreateTableSuite.scala | 198 ++++++++++ .../spark/sql/hive/ShowCreateTableSuite.scala | 360 ------------------- 9 files changed, 734 insertions(+), 381 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 64831e5..871eba4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -957,9 +957,11 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman builder ++= metadata.viewText.mkString(" AS\n", "", "\n") } else { showHiveTableHeader(metadata, builder) + showTableComment(metadata, builder) showHiveTableNonDataColumns(metadata, builder) showHiveTableStorageInfo(metadata, builder) - showHiveTableProperties(metadata, builder) + showTableLocation(metadata, builder) + showTableProperties(metadata, builder) } builder.toString() @@ -973,14 +975,8 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman if (columns.nonEmpty) { builder ++= columns.mkString("(", ", ", ")\n") } - - metadata - .comment - .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") - .foreach(builder.append) } - private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.partitionColumnNames.nonEmpty) { val partCols = metadata.partitionSchema.map(_.toDDL) @@ -1023,15 +1019,24 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman builder ++= s" OUTPUTFORMAT '${escapeSingleQuotedString(format)}'\n" } } + } + private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.tableType == EXTERNAL) { - storage.locationUri.foreach { uri => - builder ++= s"LOCATION '$uri'\n" + metadata.storage.locationUri.foreach { location => + builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n" } } } - private def showHiveTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { + private def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = { + metadata + .comment + .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") + .foreach(builder.append) + } + + private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.properties.nonEmpty) { val props = metadata.properties.map { case (key, value) => s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" @@ -1048,6 +1053,9 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman showDataSourceTableDataColumns(metadata, builder) showDataSourceTableOptions(metadata, builder) showDataSourceTableNonDataColumns(metadata, builder) + showTableComment(metadata, builder) + showTableLocation(metadata, builder) + showTableProperties(metadata, builder) builder.toString() } @@ -1063,14 +1071,6 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman val dataSourceOptions = metadata.storage.properties.map { case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" - } ++ metadata.storage.locationUri.flatMap { location => - if (metadata.tableType == MANAGED) { - // If it's a managed table, omit PATH option. Spark SQL always creates external table - // when the table creation DDL contains the PATH option. - None - } else { - Some(s"path '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'") - } } if (dataSourceOptions.nonEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sample.json ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sample.json b/sql/core/src/test/resources/sample.json new file mode 100644 index 0000000..a2c2ffd --- /dev/null +++ b/sql/core/src/test/resources/sample.json @@ -0,0 +1,2 @@ +{"a" : "2" ,"b" : "blah", "c_!@(3)":1} +{"<d>" : {"d!" : [4, 5], "=" : [{"Dd2": null}, {"Dd2" : true}]}} http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql new file mode 100644 index 0000000..852bfbd --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql @@ -0,0 +1,61 @@ +-- simple +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet; + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- options +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +OPTIONS ('a' 1); + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- path option +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +OPTIONS ('path' '/path/to/table'); + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- location +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +LOCATION '/path/to/table'; + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- partition by +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +PARTITIONED BY (a); + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- clustered by +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS; + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- comment +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +COMMENT 'This is a comment'; + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; + + +-- tblproperties +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +TBLPROPERTIES ('a' = '1'); + +SHOW CREATE TABLE tbl; +DROP TABLE tbl; http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out new file mode 100644 index 0000000..1faf16c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out @@ -0,0 +1,222 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 24 + + +-- !query 0 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SHOW CREATE TABLE tbl +-- !query 1 schema +struct<createtab_stmt:string> +-- !query 1 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet + + +-- !query 2 +DROP TABLE tbl +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +OPTIONS ('a' 1) +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 +SHOW CREATE TABLE tbl +-- !query 4 schema +struct<createtab_stmt:string> +-- !query 4 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +OPTIONS ( + `a` '1' +) + + +-- !query 5 +DROP TABLE tbl +-- !query 5 schema +struct<> +-- !query 5 output + + + +-- !query 6 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +OPTIONS ('path' '/path/to/table') +-- !query 6 schema +struct<> +-- !query 6 output + + + +-- !query 7 +SHOW CREATE TABLE tbl +-- !query 7 schema +struct<createtab_stmt:string> +-- !query 7 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +LOCATION 'file:/path/to/table' + + +-- !query 8 +DROP TABLE tbl +-- !query 8 schema +struct<> +-- !query 8 output + + + +-- !query 9 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +LOCATION '/path/to/table' +-- !query 9 schema +struct<> +-- !query 9 output + + + +-- !query 10 +SHOW CREATE TABLE tbl +-- !query 10 schema +struct<createtab_stmt:string> +-- !query 10 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +LOCATION 'file:/path/to/table' + + +-- !query 11 +DROP TABLE tbl +-- !query 11 schema +struct<> +-- !query 11 output + + + +-- !query 12 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +PARTITIONED BY (a) +-- !query 12 schema +struct<> +-- !query 12 output + + + +-- !query 13 +SHOW CREATE TABLE tbl +-- !query 13 schema +struct<createtab_stmt:string> +-- !query 13 output +CREATE TABLE `tbl` (`b` STRING, `c` INT, `a` INT) +USING parquet +PARTITIONED BY (a) + + +-- !query 14 +DROP TABLE tbl +-- !query 14 schema +struct<> +-- !query 14 output + + + +-- !query 15 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS +-- !query 15 schema +struct<> +-- !query 15 output + + + +-- !query 16 +SHOW CREATE TABLE tbl +-- !query 16 schema +struct<createtab_stmt:string> +-- !query 16 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +CLUSTERED BY (a) +SORTED BY (b) +INTO 2 BUCKETS + + +-- !query 17 +DROP TABLE tbl +-- !query 17 schema +struct<> +-- !query 17 output + + + +-- !query 18 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +COMMENT 'This is a comment' +-- !query 18 schema +struct<> +-- !query 18 output + + + +-- !query 19 +SHOW CREATE TABLE tbl +-- !query 19 schema +struct<createtab_stmt:string> +-- !query 19 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +COMMENT 'This is a comment' + + +-- !query 20 +DROP TABLE tbl +-- !query 20 schema +struct<> +-- !query 20 output + + + +-- !query 21 +CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet +TBLPROPERTIES ('a' = '1') +-- !query 21 schema +struct<> +-- !query 21 output + + + +-- !query 22 +SHOW CREATE TABLE tbl +-- !query 22 schema +struct<createtab_stmt:string> +-- !query 22 output +CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT) +USING parquet +TBLPROPERTIES ( + 'a' = '1' +) + + +-- !query 23 +DROP TABLE tbl +-- !query 23 schema +struct<> +-- !query 23 output + http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala new file mode 100644 index 0000000..5c347d2 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils} +import org.apache.spark.util.Utils + +class SimpleShowCreateTableSuite extends ShowCreateTableSuite with SharedSQLContext + +abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { + import testImplicits._ + + test("data source table with user specified schema") { + withTable("ddl_test") { + val jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile + + sql( + s"""CREATE TABLE ddl_test ( + | a STRING, + | b STRING, + | `extra col` ARRAY<INT>, + | `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>> + |) + |USING json + |OPTIONS ( + | PATH '$jsonFilePath' + |) + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("data source table CTAS") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |AS SELECT 1 AS a, "foo" AS b + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("partitioned data source table") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |PARTITIONED BY (b) + |AS SELECT 1 AS a, "foo" AS b + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("bucketed data source table") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS + |AS SELECT 1 AS a, "foo" AS b + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("partitioned bucketed data source table") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |PARTITIONED BY (c) + |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS + |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("data source table with a comment") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |COMMENT 'This is a comment' + |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("data source table with table properties") { + withTable("ddl_test") { + sql( + s"""CREATE TABLE ddl_test + |USING json + |TBLPROPERTIES ('a' = '1') + |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c + """.stripMargin + ) + + checkCreateTable("ddl_test") + } + } + + test("data source table using Dataset API") { + withTable("ddl_test") { + spark + .range(3) + .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd, 'id as 'e) + .write + .mode("overwrite") + .partitionBy("a", "b") + .bucketBy(2, "c", "d") + .saveAsTable("ddl_test") + + checkCreateTable("ddl_test") + } + } + + test("view") { + withView("v1") { + sql("CREATE VIEW v1 AS SELECT 1 AS a") + checkCreateView("v1") + } + } + + test("view with output columns") { + withView("v1") { + sql("CREATE VIEW v1 (b) AS SELECT 1 AS a") + checkCreateView("v1") + } + } + + test("SPARK-24911: keep quotes for nested fields") { + withTable("t1") { + val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)" + sql(s"$createTable USING json") + val shownDDL = sql(s"SHOW CREATE TABLE t1") + .head() + .getString(0) + .split("\n") + .head + assert(shownDDL == createTable) + + checkCreateTable("t1") + } + } + + protected def checkCreateTable(table: String): Unit = { + checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE") + } + + protected def checkCreateView(table: String): Unit = { + checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW") + } + + private def checkCreateTableOrView(table: TableIdentifier, checkType: String): Unit = { + val db = table.database.getOrElse("default") + val expected = spark.sharedState.externalCatalog.getTable(db, table.table) + val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0) + sql(s"DROP $checkType ${table.quotedString}") + + try { + sql(shownDDL) + val actual = spark.sharedState.externalCatalog.getTable(db, table.table) + checkCatalogTables(expected, actual) + } finally { + sql(s"DROP $checkType IF EXISTS ${table.table}") + } + } + + private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { + def normalize(table: CatalogTable): CatalogTable = { + val nondeterministicProps = Set( + "CreateTime", + "transient_lastDdlTime", + "grantTime", + "lastUpdateTime", + "last_modified_by", + "last_modified_time", + "Owner:", + // The following are hive specific schema parameters which we do not need to match exactly. + "totalNumberFiles", + "maxFileSize", + "minFileSize" + ) + + table.copy( + createTime = 0L, + lastAccessTime = 0L, + properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), + stats = None, + ignoredProperties = Map.empty + ) + } + assert(normalize(actual) == normalize(expected)) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 445161d..c1178ad 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -27,6 +27,7 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DDL_TIME import org.apache.hadoop.hive.ql.metadata.HiveException import org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT import org.apache.thrift.TException @@ -821,7 +822,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat schema = reorderedSchema, partitionColumnNames = partColumnNames, bucketSpec = getBucketSpecFromTableProperties(table), - tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG)) + tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG), + properties = table.properties.filterKeys(!HIVE_GENERATED_TABLE_PROPERTIES(_))) } override def tableExists(db: String, table: String): Boolean = withClient { @@ -1328,6 +1330,7 @@ object HiveExternalCatalog { val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version" + val HIVE_GENERATED_TABLE_PROPERTIES = Set(DDL_TIME) val HIVE_GENERATED_STORAGE_PROPERTIES = Set(SERIALIZATION_FORMAT) // When storing data source tables in hive metastore, we need to set data schema to empty if the http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/resources/sample.json ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/resources/sample.json b/sql/hive/src/test/resources/sample.json deleted file mode 100644 index a2c2ffd..0000000 --- a/sql/hive/src/test/resources/sample.json +++ /dev/null @@ -1,2 +0,0 @@ -{"a" : "2" ,"b" : "blah", "c_!@(3)":1} -{"<d>" : {"d!" : [4, 5], "=" : [{"Dd2": null}, {"Dd2" : true}]}} http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala new file mode 100644 index 0000000..0386dc7 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite} +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton { + + test("simple hive table") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |TBLPROPERTIES ( + | 'prop1' = 'value1', + | 'prop2' = 'value2' + |) + """.stripMargin + ) + + checkCreateTable("t1") + } + } + + test("simple external hive table") { + withTempDir { dir => + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |LOCATION '${dir.toURI}' + |TBLPROPERTIES ( + | 'prop1' = 'value1', + | 'prop2' = 'value2' + |) + """.stripMargin + ) + + checkCreateTable("t1") + } + } + } + + test("partitioned hive table") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |COMMENT 'bla' + |PARTITIONED BY ( + | p1 BIGINT COMMENT 'bla', + | p2 STRING + |) + """.stripMargin + ) + + checkCreateTable("t1") + } + } + + test("hive table with explicit storage info") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |COLLECTION ITEMS TERMINATED BY '@' + |MAP KEYS TERMINATED BY '#' + |NULL DEFINED AS 'NaN' + """.stripMargin + ) + + checkCreateTable("t1") + } + } + + test("hive table with STORED AS clause") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |STORED AS PARQUET + """.stripMargin + ) + + checkCreateTable("t1") + } + } + + test("hive table with serde info") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + |WITH SERDEPROPERTIES ( + | 'mapkey.delim' = ',', + | 'field.delim' = ',' + |) + |STORED AS + | INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' + """.stripMargin + ) + + checkCreateTable("t1") + } + } + + test("hive bucketing is supported") { + withTable("t1") { + sql( + s"""CREATE TABLE t1 (a INT, b STRING) + |CLUSTERED BY (a) + |SORTED BY (b) + |INTO 2 BUCKETS + """.stripMargin + ) + checkCreateTable("t1") + } + } + + test("hive partitioned view is not supported") { + withTable("t1") { + withView("v1") { + sql( + s""" + |CREATE TABLE t1 (c1 INT, c2 STRING) + |PARTITIONED BY ( + | p1 BIGINT COMMENT 'bla', + | p2 STRING ) + """.stripMargin) + + createRawHiveTable( + s""" + |CREATE VIEW v1 + |PARTITIONED ON (p1, p2) + |AS SELECT * from t1 + """.stripMargin + ) + + val cause = intercept[AnalysisException] { + sql("SHOW CREATE TABLE v1") + } + + assert(cause.getMessage.contains(" - partitioned view")) + } + } + } + + test("SPARK-24911: keep quotes for nested fields in hive") { + withTable("t1") { + val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)" + sql(createTable) + val shownDDL = sql(s"SHOW CREATE TABLE t1") + .head() + .getString(0) + .split("\n") + .head + assert(shownDDL == createTable) + + checkCreateTable("t1") + } + } + + private def createRawHiveTable(ddl: String): Unit = { + hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog] + .client.runSqlHive(ddl) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/cc82b9fe/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala deleted file mode 100644 index 34ca790..0000000 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import org.apache.spark.sql.{AnalysisException, QueryTest} -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.util.Utils - -class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { - import testImplicits._ - - test("data source table with user specified schema") { - withTable("ddl_test") { - val jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile - - sql( - s"""CREATE TABLE ddl_test ( - | a STRING, - | b STRING, - | `extra col` ARRAY<INT>, - | `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>> - |) - |USING json - |OPTIONS ( - | PATH '$jsonFilePath' - |) - """.stripMargin - ) - - checkCreateTable("ddl_test") - } - } - - test("data source table CTAS") { - withTable("ddl_test") { - sql( - s"""CREATE TABLE ddl_test - |USING json - |AS SELECT 1 AS a, "foo" AS b - """.stripMargin - ) - - checkCreateTable("ddl_test") - } - } - - test("partitioned data source table") { - withTable("ddl_test") { - sql( - s"""CREATE TABLE ddl_test - |USING json - |PARTITIONED BY (b) - |AS SELECT 1 AS a, "foo" AS b - """.stripMargin - ) - - checkCreateTable("ddl_test") - } - } - - test("bucketed data source table") { - withTable("ddl_test") { - sql( - s"""CREATE TABLE ddl_test - |USING json - |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS - |AS SELECT 1 AS a, "foo" AS b - """.stripMargin - ) - - checkCreateTable("ddl_test") - } - } - - test("partitioned bucketed data source table") { - withTable("ddl_test") { - sql( - s"""CREATE TABLE ddl_test - |USING json - |PARTITIONED BY (c) - |CLUSTERED BY (a) SORTED BY (b) INTO 2 BUCKETS - |AS SELECT 1 AS a, "foo" AS b, 2.5 AS c - """.stripMargin - ) - - checkCreateTable("ddl_test") - } - } - - test("data source table using Dataset API") { - withTable("ddl_test") { - spark - .range(3) - .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd, 'id as 'e) - .write - .mode("overwrite") - .partitionBy("a", "b") - .bucketBy(2, "c", "d") - .saveAsTable("ddl_test") - - checkCreateTable("ddl_test") - } - } - - test("simple hive table") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |TBLPROPERTIES ( - | 'prop1' = 'value1', - | 'prop2' = 'value2' - |) - """.stripMargin - ) - - checkCreateTable("t1") - } - } - - test("simple external hive table") { - withTempDir { dir => - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |LOCATION '${dir.toURI}' - |TBLPROPERTIES ( - | 'prop1' = 'value1', - | 'prop2' = 'value2' - |) - """.stripMargin - ) - - checkCreateTable("t1") - } - } - } - - test("partitioned hive table") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |COMMENT 'bla' - |PARTITIONED BY ( - | p1 BIGINT COMMENT 'bla', - | p2 STRING - |) - """.stripMargin - ) - - checkCreateTable("t1") - } - } - - test("hive table with explicit storage info") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |COLLECTION ITEMS TERMINATED BY '@' - |MAP KEYS TERMINATED BY '#' - |NULL DEFINED AS 'NaN' - """.stripMargin - ) - - checkCreateTable("t1") - } - } - - test("hive table with STORED AS clause") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |STORED AS PARQUET - """.stripMargin - ) - - checkCreateTable("t1") - } - } - - test("hive table with serde info") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 ( - | c1 INT COMMENT 'bla', - | c2 STRING - |) - |ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' - |WITH SERDEPROPERTIES ( - | 'mapkey.delim' = ',', - | 'field.delim' = ',' - |) - |STORED AS - | INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' - | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' - """.stripMargin - ) - - checkCreateTable("t1") - } - } - - test("hive view") { - withView("v1") { - sql("CREATE VIEW v1 AS SELECT 1 AS a") - checkCreateView("v1") - } - } - - test("hive view with output columns") { - withView("v1") { - sql("CREATE VIEW v1 (b) AS SELECT 1 AS a") - checkCreateView("v1") - } - } - - test("hive bucketing is supported") { - withTable("t1") { - sql( - s"""CREATE TABLE t1 (a INT, b STRING) - |CLUSTERED BY (a) - |SORTED BY (b) - |INTO 2 BUCKETS - """.stripMargin - ) - checkCreateTable("t1") - } - } - - test("hive partitioned view is not supported") { - withTable("t1") { - withView("v1") { - sql( - s""" - |CREATE TABLE t1 (c1 INT, c2 STRING) - |PARTITIONED BY ( - | p1 BIGINT COMMENT 'bla', - | p2 STRING ) - """.stripMargin) - - createRawHiveTable( - s""" - |CREATE VIEW v1 - |PARTITIONED ON (p1, p2) - |AS SELECT * from t1 - """.stripMargin - ) - - val cause = intercept[AnalysisException] { - sql("SHOW CREATE TABLE v1") - } - - assert(cause.getMessage.contains(" - partitioned view")) - } - } - } - - test("SPARK-24911: keep quotes for nested fields") { - withTable("t1") { - val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)" - sql(createTable) - val shownDDL = sql(s"SHOW CREATE TABLE t1") - .head() - .getString(0) - .split("\n") - .head - assert(shownDDL == createTable) - - checkCreateTable("t1") - } - } - - private def createRawHiveTable(ddl: String): Unit = { - hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog] - .client.runSqlHive(ddl) - } - - private def checkCreateTable(table: String): Unit = { - checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE") - } - - private def checkCreateView(table: String): Unit = { - checkCreateTableOrView(TableIdentifier(table, Some("default")), "VIEW") - } - - private def checkCreateTableOrView(table: TableIdentifier, checkType: String): Unit = { - val db = table.database.getOrElse("default") - val expected = spark.sharedState.externalCatalog.getTable(db, table.table) - val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0) - sql(s"DROP $checkType ${table.quotedString}") - - try { - sql(shownDDL) - val actual = spark.sharedState.externalCatalog.getTable(db, table.table) - checkCatalogTables(expected, actual) - } finally { - sql(s"DROP $checkType IF EXISTS ${table.table}") - } - } - - private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { - def normalize(table: CatalogTable): CatalogTable = { - val nondeterministicProps = Set( - "CreateTime", - "transient_lastDdlTime", - "grantTime", - "lastUpdateTime", - "last_modified_by", - "last_modified_time", - "Owner:", - // The following are hive specific schema parameters which we do not need to match exactly. - "totalNumberFiles", - "maxFileSize", - "minFileSize" - ) - - table.copy( - createTime = 0L, - lastAccessTime = 0L, - properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), - stats = None, - ignoredProperties = Map.empty - ) - } - assert(normalize(actual) == normalize(expected)) - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org