Repository: spark
Updated Branches:
  refs/heads/master e02ac303c -> 6e45b547c


[SPARK-19115][SQL] Supporting Create Table Like Location

What changes were proposed in this pull request?

Support CREATE [EXTERNAL] TABLE LIKE LOCATION... syntax for Hive serde and 
datasource tables.
In this PR,we follow SparkSQL design rules :

    supporting create table like view or physical table or temporary view with 
location.
    creating a table with location,this table will be an external table other 
than managed table.

How was this patch tested?

Add new test cases and update existing test cases

Author: ouyangxiaochen <ou.yangxiaoc...@zte.com.cn>

Closes #16868 from ouyangxiaochen/spark19115.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e45b547
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e45b547
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e45b547

Branch: refs/heads/master
Commit: 6e45b547ceadbbe8394bf149945b7942df82660a
Parents: e02ac30
Author: ouyangxiaochen <ou.yangxiaoc...@zte.com.cn>
Authored: Mon Feb 13 19:41:44 2017 -0800
Committer: Xiao Li <gatorsm...@gmail.com>
Committed: Mon Feb 13 19:41:44 2017 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |   2 +-
 .../spark/sql/execution/SparkSqlParser.scala    |   5 +-
 .../spark/sql/execution/command/tables.scala    |  14 +-
 .../spark/sql/hive/HiveDDLCommandSuite.scala    |  32 ++++-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 143 +++++++++++++++----
 5 files changed, 159 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c95c1f5..3123998 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -82,7 +82,7 @@ statement
         (TBLPROPERTIES tablePropertyList)?
         (AS? query)?                                                   
#createHiveTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
-        LIKE source=tableIdentifier                                    
#createTableLike
+        LIKE source=tableIdentifier locationSpec?                      
#createTableLike
     | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS
         (identifier | FOR COLUMNS identifierSeq)?                      #analyze
     | ALTER (TABLE | VIEW) from=tableIdentifier

http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 41768d4..ca76a10 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1141,13 +1141,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
    * For example:
    * {{{
    *   CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
-   *   LIKE [other_db_name.]existing_table_name
+   *   LIKE [other_db_name.]existing_table_name [locationSpec]
    * }}}
    */
   override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan 
= withOrigin(ctx) {
     val targetTable = visitTableIdentifier(ctx.target)
     val sourceTable = visitTableIdentifier(ctx.source)
-    CreateTableLikeCommand(targetTable, sourceTable, ctx.EXISTS != null)
+    val location = Option(ctx.locationSpec).map(visitLocationSpec)
+    CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != 
null)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index bc4b5b6..d646a21 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 /**
- * A command to create a MANAGED table with the same definition of the given 
existing table.
+ * A command to create a table with the same definition of the given existing 
table.
  * In the target table definition, the table comment is always empty but the 
column comments
  * are identical to the ones defined in the source table.
  *
@@ -52,12 +52,13 @@ import org.apache.spark.util.Utils
  * The syntax of using this command in SQL is:
  * {{{
  *   CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
- *   LIKE [other_db_name.]existing_table_name
+ *   LIKE [other_db_name.]existing_table_name [locationSpec]
  * }}}
  */
 case class CreateTableLikeCommand(
     targetTable: TableIdentifier,
     sourceTable: TableIdentifier,
+    location: Option[String],
     ifNotExists: Boolean) extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
@@ -70,12 +71,15 @@ case class CreateTableLikeCommand(
       sourceTableDesc.provider
     }
 
+    // If the location is specified, we create an external table internally.
+    // Otherwise create a managed table.
+    val tblType = if (location.isEmpty) CatalogTableType.MANAGED else 
CatalogTableType.EXTERNAL
+
     val newTableDesc =
       CatalogTable(
         identifier = targetTable,
-        tableType = CatalogTableType.MANAGED,
-        // We are creating a new managed table, which should not have custom 
table location.
-        storage = sourceTableDesc.storage.copy(locationUri = None),
+        tableType = tblType,
+        storage = sourceTableDesc.storage.copy(locationUri = location),
         schema = sourceTableDesc.schema,
         provider = newProvider,
         partitionColumnNames = sourceTableDesc.partitionColumnNames,

http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 0bd0887..6d7a1c3 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -524,24 +524,48 @@ class HiveDDLCommandSuite extends PlanTest with 
SQLTestUtils with TestHiveSingle
 
   test("create table like") {
     val v1 = "CREATE TABLE table1 LIKE table2"
-    val (target, source, exists) = parser.parsePlan(v1).collect {
-      case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
+    val (target, source, location, exists) = parser.parsePlan(v1).collect {
+      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, 
allowExisting)
     }.head
     assert(exists == false)
     assert(target.database.isEmpty)
     assert(target.table == "table1")
     assert(source.database.isEmpty)
     assert(source.table == "table2")
+    assert(location.isEmpty)
 
     val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2"
-    val (target2, source2, exists2) = parser.parsePlan(v2).collect {
-      case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
+    val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect {
+      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, 
allowExisting)
     }.head
     assert(exists2)
     assert(target2.database.isEmpty)
     assert(target2.table == "table1")
     assert(source2.database.isEmpty)
     assert(source2.table == "table2")
+    assert(location2.isEmpty)
+
+    val v3 = "CREATE TABLE table1 LIKE table2 LOCATION '/spark/warehouse'"
+    val (target3, source3, location3, exists3) = parser.parsePlan(v3).collect {
+      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, 
allowExisting)
+    }.head
+    assert(!exists3)
+    assert(target3.database.isEmpty)
+    assert(target3.table == "table1")
+    assert(source3.database.isEmpty)
+    assert(source3.table == "table2")
+    assert(location3 == Some("/spark/warehouse"))
+
+    val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2  LOCATION 
'/spark/warehouse'"
+    val (target4, source4, location4, exists4) = parser.parsePlan(v4).collect {
+      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, 
allowExisting)
+    }.head
+    assert(exists4)
+    assert(target4.database.isEmpty)
+    assert(target4.table == "table1")
+    assert(source4.database.isEmpty)
+    assert(source4.table == "table2")
+    assert(location4 == Some("/spark/warehouse"))
   }
 
   test("load data") {

http://git-wip-us.apache.org/repos/asf/spark/blob/6e45b547/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9d9f3a6..c04b9ee 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -833,54 +833,95 @@ class HiveDDLSuite
   }
 
   test("CREATE TABLE LIKE a temporary view") {
+    // CREATE TABLE LIKE a temporary view.
+    withCreateTableLikeTempView(location = None)
+
+    // CREATE TABLE LIKE a temporary view location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeTempView(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeTempView(location : Option[String]): Unit = {
     val sourceViewName = "tab1"
     val targetTabName = "tab2"
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
     withTempView(sourceViewName) {
       withTable(targetTabName) {
         spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
           .createTempView(sourceViewName)
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName")
+
+        val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName 
$locationClause")
 
         val sourceTable = 
spark.sessionState.catalog.getTempViewOrPermanentTableMetadata(
           TableIdentifier(sourceViewName))
         val targetTable = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(targetTabName, Some("default")))
 
-        checkCreateTableLike(sourceTable, targetTable)
+        checkCreateTableLike(sourceTable, targetTable, tableType)
       }
     }
   }
 
   test("CREATE TABLE LIKE a data source table") {
+    // CREATE TABLE LIKE a data source table.
+    withCreateTableLikeDSTable(location = None)
+
+    // CREATE TABLE LIKE a data source table location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeDSTable(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeDSTable(location : Option[String]): Unit = {
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
       spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
         .write.format("json").saveAsTable(sourceTabName)
-      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
+
+      val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
 
       val sourceTable =
-        
spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, 
Some("default")))
+        spark.sessionState.catalog.getTableMetadata(
+          TableIdentifier(sourceTabName, Some("default")))
       val targetTable =
-        
spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, 
Some("default")))
+        spark.sessionState.catalog.getTableMetadata(
+          TableIdentifier(targetTabName, Some("default")))
       // The table type of the source table should be a Hive-managed data 
source table
       assert(DDLUtils.isDatasourceTable(sourceTable))
       assert(sourceTable.tableType == CatalogTableType.MANAGED)
 
-      checkCreateTableLike(sourceTable, targetTable)
+      checkCreateTableLike(sourceTable, targetTable, tableType)
     }
   }
 
   test("CREATE TABLE LIKE an external data source table") {
+    // CREATE TABLE LIKE an external data source table.
+    withCreateTableLikeExtDSTable(location = None)
+
+    // CREATE TABLE LIKE an external data source table location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = 
{
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
         spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
           .write.format("parquet").save(path)
         sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH 
'${dir.toURI}')")
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
+
+        val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
 
         // The source table should be an external data source table
         val sourceTable = spark.sessionState.catalog.getTableMetadata(
@@ -891,30 +932,56 @@ class HiveDDLSuite
         assert(DDLUtils.isDatasourceTable(sourceTable))
         assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
 
-        checkCreateTableLike(sourceTable, targetTable)
+        checkCreateTableLike(sourceTable, targetTable, tableType)
       }
     }
   }
 
   test("CREATE TABLE LIKE a managed Hive serde table") {
-    val catalog = spark.sessionState.catalog
+    // CREATE TABLE LIKE a managed Hive serde table.
+    withCreateTableLikeManagedHiveTable(location = None)
+
+    // CREATE TABLE LIKE a managed Hive serde table location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeManagedHiveTable(location : Option[String]): 
Unit = {
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
+    val catalog = spark.sessionState.catalog
     withTable(sourceTabName, targetTabName) {
       sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS 
SELECT 1 key, 'a'")
-      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
 
-      val sourceTable = 
catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default")))
+      val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+
+      val sourceTable = catalog.getTableMetadata(
+        TableIdentifier(sourceTabName, Some("default")))
       assert(sourceTable.tableType == CatalogTableType.MANAGED)
       assert(sourceTable.properties.get("prop1").nonEmpty)
-      val targetTable = 
catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))
+      val targetTable = catalog.getTableMetadata(
+        TableIdentifier(targetTabName, Some("default")))
 
-      checkCreateTableLike(sourceTable, targetTable)
+      checkCreateTableLike(sourceTable, targetTable, tableType)
     }
   }
 
   test("CREATE TABLE LIKE an external Hive serde table") {
+    // CREATE TABLE LIKE an external Hive serde table.
+    withCreateTableLikeExtHiveTable(location = None)
+
+    // CREATE TABLE LIKE an external Hive serde table location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit 
= {
     val catalog = spark.sessionState.catalog
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
     withTempDir { tmpDir =>
       val basePath = tmpDir.toURI
       val sourceTabName = "tab1"
@@ -936,28 +1003,45 @@ class HiveDDLSuite
                |SELECT 1, 'a'
              """.stripMargin)
         }
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
 
-        val sourceTable = 
catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default")))
+        val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+
+        val sourceTable = catalog.getTableMetadata(
+          TableIdentifier(sourceTabName, Some("default")))
         assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
         assert(sourceTable.comment == Option("Apache Spark"))
-        val targetTable = 
catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))
+        val targetTable = catalog.getTableMetadata(
+          TableIdentifier(targetTabName, Some("default")))
 
-        checkCreateTableLike(sourceTable, targetTable)
+        checkCreateTableLike(sourceTable, targetTable, tableType)
       }
     }
   }
 
   test("CREATE TABLE LIKE a view") {
+    // CREATE TABLE LIKE a view.
+    withCreateTableLikeView(location = None)
+
+    // CREATE TABLE LIKE a view location ...
+    withTempDir { tmpDir =>
+      withCreateTableLikeView(Some(tmpDir.toURI.toString))
+    }
+  }
+
+  private def withCreateTableLikeView(location : Option[String]): Unit = {
     val sourceTabName = "tab1"
     val sourceViewName = "view"
     val targetTabName = "tab2"
+    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else 
CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
       withView(sourceViewName) {
         spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
           .write.format("json").saveAsTable(sourceTabName)
         sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName")
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName")
+
+        val locationClause = if (location.nonEmpty) s"LOCATION 
'${location.getOrElse("")}'" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName 
$locationClause")
 
         val sourceView = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(sourceViewName, Some("default")))
@@ -969,15 +1053,19 @@ class HiveDDLSuite
         val targetTable = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(targetTabName, Some("default")))
 
-        checkCreateTableLike(sourceView, targetTable)
+        checkCreateTableLike(sourceView, targetTable, tableType)
       }
     }
   }
 
-  private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: 
CatalogTable): Unit = {
-    // The created table should be a MANAGED table with empty view text and 
original text.
-    assert(targetTable.tableType == CatalogTableType.MANAGED,
-      "the created table must be a Hive managed table")
+  private def checkCreateTableLike(
+    sourceTable: CatalogTable,
+    targetTable: CatalogTable,
+    tableType: CatalogTableType): Unit = {
+    // The created table should be a MANAGED table or EXTERNAL table with 
empty view text
+    // and original text.
+    assert(targetTable.tableType == tableType,
+      s"the created table must be a/an ${tableType.name} table")
     assert(targetTable.viewText.isEmpty,
       "the view text in the created table must be empty")
     assert(targetTable.viewDefaultDatabase.isEmpty,
@@ -1027,8 +1115,13 @@ class HiveDDLSuite
     }
 
     assert(targetTable.storage.locationUri.nonEmpty, "target table path should 
not be empty")
-    assert(sourceTable.storage.locationUri != targetTable.storage.locationUri,
-      "source table/view path should be different from target table path")
+
+    // User-specified location and sourceTable's location can be same or 
different,
+    // when we creating an external table. So we don't need to do this check
+    if (tableType != CatalogTableType.EXTERNAL) {
+      assert(sourceTable.storage.locationUri != 
targetTable.storage.locationUri,
+        "source table/view path should be different from target table path")
+    }
 
     // The source table contents should not been seen in the target table.
     assert(spark.table(sourceTable.identifier).count() != 0, "the source table 
should be nonempty")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to