Repository: spark
Updated Branches:
  refs/heads/master b9cf617a6 -> f14c4ba00


[SPARK-15276][SQL] CREATE TABLE with LOCATION should imply EXTERNAL

## What changes were proposed in this pull request?

Before:
```sql
-- uses that location but issues a warning
CREATE TABLE my_tab LOCATION /some/path
-- deletes any existing data in the specified location
DROP TABLE my_tab
```

After:
```sql
-- uses that location but creates an EXTERNAL table instead
CREATE TABLE my_tab LOCATION /some/path
-- does not delete the data at /some/path
DROP TABLE my_tab
```

This patch essentially makes the `EXTERNAL` field optional. This is related to 
#13032.

## How was this patch tested?

New test in `DDLCommandSuite`.

Author: Andrew Or <and...@databricks.com>

Closes #13060 from andrewor14/location-implies-external.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f14c4ba0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f14c4ba0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f14c4ba0

Branch: refs/heads/master
Commit: f14c4ba001fbdbcc9faa46896f1f9d08a7d06609
Parents: b9cf617
Author: Andrew Or <and...@databricks.com>
Authored: Wed May 11 17:29:58 2016 -0700
Committer: Andrew Or <and...@databricks.com>
Committed: Wed May 11 17:29:58 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/SparkSqlParser.scala | 12 +++++++-----
 .../spark/sql/execution/command/DDLCommandSuite.scala   | 12 ++++++++++++
 .../apache/spark/sql/hive/execution/HiveDDLSuite.scala  |  8 +++-----
 .../apache/spark/sql/hive/execution/SQLQuerySuite.scala |  5 +----
 4 files changed, 23 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f14c4ba0/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a51665f..53aba1f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -745,11 +745,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder 
{
     if (ctx.bucketSpec != null) {
       throw operationNotAllowed("CREATE TABLE ... CLUSTERED BY", ctx)
     }
-    val tableType = if (external) {
-      CatalogTableType.EXTERNAL
-    } else {
-      CatalogTableType.MANAGED
-    }
     val comment = Option(ctx.STRING).map(string)
     val partitionCols = 
Option(ctx.partitionColumns).toSeq.flatMap(visitCatalogColumns)
     val cols = Option(ctx.columns).toSeq.flatMap(visitCatalogColumns)
@@ -791,6 +786,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder 
{
       serde = 
rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde),
       compressed = false,
       serdeProperties = rowStorage.serdeProperties ++ 
fileStorage.serdeProperties)
+    // If location is defined, we'll assume this is an external table.
+    // Otherwise, we may accidentally delete existing data.
+    val tableType = if (external || location.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
 
     // TODO support the sql text - have a proper location for this!
     val tableDesc = CatalogTable(

http://git-wip-us.apache.org/repos/asf/spark/blob/f14c4ba0/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index fa8dabf..aeb613a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -227,6 +227,18 @@ class DDLCommandSuite extends PlanTest {
     }
   }
 
+  test("create table - location implies external") {
+    val query = "CREATE TABLE my_tab LOCATION '/something/anything'"
+    parser.parsePlan(query) match {
+      case ct: CreateTable =>
+        assert(ct.table.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.table.storage.locationUri == Some("/something/anything"))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from 
query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
   // ALTER TABLE table_name RENAME TO new_table_name;
   // ALTER VIEW view_name RENAME TO new_view_name;
   test("alter table/view: rename table/view") {

http://git-wip-us.apache.org/repos/asf/spark/blob/f14c4ba0/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 8b60802..ae61322 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -72,7 +72,7 @@ class HiveDDLSuite
     }
   }
 
-  test("drop managed tables in default database") {
+  test("drop external tables in default database") {
     withTempDir { tmpDir =>
       val tabName = "tab1"
       withTable(tabName) {
@@ -88,13 +88,11 @@ class HiveDDLSuite
         val hiveTable =
           hiveContext.sessionState.catalog
             .getTableMetadata(TableIdentifier(tabName, Some("default")))
-        // It is a managed table, although it uses external in SQL
-        assert(hiveTable.tableType == CatalogTableType.MANAGED)
+        assert(hiveTable.tableType == CatalogTableType.EXTERNAL)
 
         assert(tmpDir.listFiles.nonEmpty)
         sql(s"DROP TABLE $tabName")
-        // The data are deleted since the table type is not EXTERNAL
-        assert(tmpDir.listFiles == null)
+        assert(tmpDir.listFiles.nonEmpty)
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f14c4ba0/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 6ce5051..ac9a393 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1534,10 +1534,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
     assert(fs.listStatus(new Path(path, "part=1")).nonEmpty)
 
     sql("drop table test_table")
-    assert(
-      !fs.exists(path),
-      "Once a managed table has been dropped, " +
-        "dirs of this table should also have been deleted.")
+    assert(fs.exists(path), "This is an external table, so the data should not 
have been dropped")
   }
 
   test("SPARK-14981: DESC not supported for sorting columns") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to