Repository: spark Updated Branches: refs/heads/master e02ac303c -> 6e45b547c
[SPARK-19115][SQL] Supporting Create Table Like Location What changes were proposed in this pull request? Support CREATE [EXTERNAL] TABLE LIKE LOCATION... syntax for Hive serde and datasource tables. In this PR,we follow SparkSQL design rules : supporting create table like view or physical table or temporary view with location. creating a table with location,this table will be an external table other than managed table. How was this patch tested? Add new test cases and update existing test cases Author: ouyangxiaochen <ou.yangxiaoc...@zte.com.cn> Closes #16868 from ouyangxiaochen/spark19115. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e45b547 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e45b547 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e45b547 Branch: refs/heads/master Commit: 6e45b547ceadbbe8394bf149945b7942df82660a Parents: e02ac30 Author: ouyangxiaochen <ou.yangxiaoc...@zte.com.cn> Authored: Mon Feb 13 19:41:44 2017 -0800 Committer: Xiao Li <gatorsm...@gmail.com> Committed: Mon Feb 13 19:41:44 2017 -0800 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/execution/SparkSqlParser.scala | 5 +- .../spark/sql/execution/command/tables.scala | 14 +- .../spark/sql/hive/HiveDDLCommandSuite.scala | 32 ++++- .../spark/sql/hive/execution/HiveDDLSuite.scala | 143 +++++++++++++++---- 5 files changed, 159 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c95c1f5..3123998 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -82,7 +82,7 @@ statement (TBLPROPERTIES tablePropertyList)? (AS? query)? #createHiveTable | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier - LIKE source=tableIdentifier #createTableLike + LIKE source=tableIdentifier locationSpec? #createTableLike | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS (identifier | FOR COLUMNS identifierSeq)? #analyze | ALTER (TABLE | VIEW) from=tableIdentifier http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 41768d4..ca76a10 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1141,13 +1141,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { * For example: * {{{ * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name - * LIKE [other_db_name.]existing_table_name + * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan = withOrigin(ctx) { val targetTable = visitTableIdentifier(ctx.target) val sourceTable = visitTableIdentifier(ctx.source) - CreateTableLikeCommand(targetTable, sourceTable, ctx.EXISTS != null) + val location = Option(ctx.locationSpec).map(visitLocationSpec) + CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index bc4b5b6..d646a21 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils /** - * A command to create a MANAGED table with the same definition of the given existing table. + * A command to create a table with the same definition of the given existing table. * In the target table definition, the table comment is always empty but the column comments * are identical to the ones defined in the source table. * @@ -52,12 +52,13 @@ import org.apache.spark.util.Utils * The syntax of using this command in SQL is: * {{{ * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name - * LIKE [other_db_name.]existing_table_name + * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ case class CreateTableLikeCommand( targetTable: TableIdentifier, sourceTable: TableIdentifier, + location: Option[String], ifNotExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { @@ -70,12 +71,15 @@ case class CreateTableLikeCommand( sourceTableDesc.provider } + // If the location is specified, we create an external table internally. + // Otherwise create a managed table. + val tblType = if (location.isEmpty) CatalogTableType.MANAGED else CatalogTableType.EXTERNAL + val newTableDesc = CatalogTable( identifier = targetTable, - tableType = CatalogTableType.MANAGED, - // We are creating a new managed table, which should not have custom table location. - storage = sourceTableDesc.storage.copy(locationUri = None), + tableType = tblType, + storage = sourceTableDesc.storage.copy(locationUri = location), schema = sourceTableDesc.schema, provider = newProvider, partitionColumnNames = sourceTableDesc.partitionColumnNames, http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 0bd0887..6d7a1c3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -524,24 +524,48 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle test("create table like") { val v1 = "CREATE TABLE table1 LIKE table2" - val (target, source, exists) = parser.parsePlan(v1).collect { - case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) + val (target, source, location, exists) = parser.parsePlan(v1).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) }.head assert(exists == false) assert(target.database.isEmpty) assert(target.table == "table1") assert(source.database.isEmpty) assert(source.table == "table2") + assert(location.isEmpty) val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2" - val (target2, source2, exists2) = parser.parsePlan(v2).collect { - case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) + val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) }.head assert(exists2) assert(target2.database.isEmpty) assert(target2.table == "table1") assert(source2.database.isEmpty) assert(source2.table == "table2") + assert(location2.isEmpty) + + val v3 = "CREATE TABLE table1 LIKE table2 LOCATION '/spark/warehouse'" + val (target3, source3, location3, exists3) = parser.parsePlan(v3).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) + }.head + assert(!exists3) + assert(target3.database.isEmpty) + assert(target3.table == "table1") + assert(source3.database.isEmpty) + assert(source3.table == "table2") + assert(location3 == Some("/spark/warehouse")) + + val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 LOCATION '/spark/warehouse'" + val (target4, source4, location4, exists4) = parser.parsePlan(v4).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) + }.head + assert(exists4) + assert(target4.database.isEmpty) + assert(target4.table == "table1") + assert(source4.database.isEmpty) + assert(source4.table == "table2") + assert(location4 == Some("/spark/warehouse")) } test("load data") { http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 9d9f3a6..c04b9ee 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -833,54 +833,95 @@ class HiveDDLSuite } test("CREATE TABLE LIKE a temporary view") { + // CREATE TABLE LIKE a temporary view. + withCreateTableLikeTempView(location = None) + + // CREATE TABLE LIKE a temporary view location ... + withTempDir { tmpDir => + withCreateTableLikeTempView(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeTempView(location : Option[String]): Unit = { val sourceViewName = "tab1" val targetTabName = "tab2" + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED withTempView(sourceViewName) { withTable(targetTabName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .createTempView(sourceViewName) - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") + + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause") val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( TableIdentifier(sourceViewName)) val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, tableType) } } } test("CREATE TABLE LIKE a data source table") { + // CREATE TABLE LIKE a data source table. + withCreateTableLikeDSTable(location = None) + + // CREATE TABLE LIKE a data source table location ... + withTempDir { tmpDir => + withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeDSTable(location : Option[String]): Unit = { val sourceTabName = "tab1" val targetTabName = "tab2" + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED withTable(sourceTabName, targetTabName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") val sourceTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) val targetTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) // The table type of the source table should be a Hive-managed data source table assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.MANAGED) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, tableType) } } test("CREATE TABLE LIKE an external data source table") { + // CREATE TABLE LIKE an external data source table. + withCreateTableLikeExtDSTable(location = None) + + // CREATE TABLE LIKE an external data source table location ... + withTempDir { tmpDir => + withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = { val sourceTabName = "tab1" val targetTabName = "tab2" + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED withTable(sourceTabName, targetTabName) { withTempPath { dir => val path = dir.getCanonicalPath spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("parquet").save(path) sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") // The source table should be an external data source table val sourceTable = spark.sessionState.catalog.getTableMetadata( @@ -891,30 +932,56 @@ class HiveDDLSuite assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, tableType) } } } test("CREATE TABLE LIKE a managed Hive serde table") { - val catalog = spark.sessionState.catalog + // CREATE TABLE LIKE a managed Hive serde table. + withCreateTableLikeManagedHiveTable(location = None) + + // CREATE TABLE LIKE a managed Hive serde table location ... + withTempDir { tmpDir => + withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeManagedHiveTable(location : Option[String]): Unit = { val sourceTabName = "tab1" val targetTabName = "tab2" + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED + val catalog = spark.sessionState.catalog withTable(sourceTabName, targetTabName) { sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) assert(sourceTable.tableType == CatalogTableType.MANAGED) assert(sourceTable.properties.get("prop1").nonEmpty) - val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, tableType) } } test("CREATE TABLE LIKE an external Hive serde table") { + // CREATE TABLE LIKE an external Hive serde table. + withCreateTableLikeExtHiveTable(location = None) + + // CREATE TABLE LIKE an external Hive serde table location ... + withTempDir { tmpDir => + withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = { val catalog = spark.sessionState.catalog + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED withTempDir { tmpDir => val basePath = tmpDir.toURI val sourceTabName = "tab1" @@ -936,28 +1003,45 @@ class HiveDDLSuite |SELECT 1, 'a' """.stripMargin) } - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause") + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) assert(sourceTable.comment == Option("Apache Spark")) - val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, tableType) } } } test("CREATE TABLE LIKE a view") { + // CREATE TABLE LIKE a view. + withCreateTableLikeView(location = None) + + // CREATE TABLE LIKE a view location ... + withTempDir { tmpDir => + withCreateTableLikeView(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeView(location : Option[String]): Unit = { val sourceTabName = "tab1" val sourceViewName = "view" val targetTabName = "tab2" + val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED withTable(sourceTabName, targetTabName) { withView(sourceViewName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") + + val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else "" + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause") val sourceView = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceViewName, Some("default"))) @@ -969,15 +1053,19 @@ class HiveDDLSuite val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceView, targetTable) + checkCreateTableLike(sourceView, targetTable, tableType) } } } - private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { - // The created table should be a MANAGED table with empty view text and original text. - assert(targetTable.tableType == CatalogTableType.MANAGED, - "the created table must be a Hive managed table") + private def checkCreateTableLike( + sourceTable: CatalogTable, + targetTable: CatalogTable, + tableType: CatalogTableType): Unit = { + // The created table should be a MANAGED table or EXTERNAL table with empty view text + // and original text. + assert(targetTable.tableType == tableType, + s"the created table must be a/an ${tableType.name} table") assert(targetTable.viewText.isEmpty, "the view text in the created table must be empty") assert(targetTable.viewDefaultDatabase.isEmpty, @@ -1027,8 +1115,13 @@ class HiveDDLSuite } assert(targetTable.storage.locationUri.nonEmpty, "target table path should not be empty") - assert(sourceTable.storage.locationUri != targetTable.storage.locationUri, - "source table/view path should be different from target table path") + + // User-specified location and sourceTable's location can be same or different, + // when we creating an external table. So we don't need to do this check + if (tableType != CatalogTableType.EXTERNAL) { + assert(sourceTable.storage.locationUri != targetTable.storage.locationUri, + "source table/view path should be different from target table path") + } // The source table contents should not been seen in the target table. assert(spark.table(sourceTable.identifier).count() != 0, "the source table should be nonempty") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org