spark git commit: [SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

hvanhovell Thu, 16 Jun 2016 13:48:23 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 e11c27918 -> 5b003c9bc



[SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

## What changes were proposed in this pull request?
`TRUNCATE TABLE` is currently broken for Spark specific datasource tables 
(json, csv, ...). This PR correctly sets the location for these datasources 
which allows them to be truncated.

## How was this patch tested?
Extended the datasources `TRUNCATE TABLE` tests in `DDLSuite`.

Author: Herman van Hovell <hvanhov...@databricks.com>

Closes #13697 from hvanhovell/SPARK-15977.

(cherry picked from commit f9bf15d9bde4df2178f7a8f932c883bb77c46149)
Signed-off-by: Herman van Hovell <hvanhov...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b003c9b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b003c9b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b003c9b

Branch: refs/heads/branch-2.0
Commit: 5b003c9bcf43709408ed8f68d17b249675f50fbc
Parents: e11c279
Author: Herman van Hovell <hvanhov...@databricks.com>
Authored: Thu Jun 16 13:47:36 2016 -0700
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Thu Jun 16 13:47:55 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    |  4 ++-
 .../spark/sql/execution/command/DDLSuite.scala  | 28 +++++++++++++-------
 2 files changed, 21 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5b003c9b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 58bb5cd..3eb93a2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -348,7 +348,9 @@ case class TruncateTableCommand(
         s"for tables that are not partitioned: '$tableName'")
     }
     val locations =
-      if (isDatasourceTable || table.partitionColumnNames.isEmpty) {
+      if (isDatasourceTable) {
+        Seq(table.storage.serdeProperties.get("path"))
+      } else if (table.partitionColumnNames.isEmpty) {
         Seq(table.storage.locationUri)
       } else {
         catalog.listPartitions(tableName, 
partitionSpec).map(_.storage.locationUri)

http://git-wip-us.apache.org/repos/asf/spark/blob/5b003c9b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e15fcf4..7eb2fff 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1280,17 +1280,25 @@ class DDLSuite extends QueryTest with SharedSQLContext 
with BeforeAndAfterEach {
   test("truncate table - datasource table") {
     import testImplicits._
     val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
-    data.write.saveAsTable("rectangles")
-    spark.catalog.cacheTable("rectangles")
-    assume(spark.table("rectangles").collect().nonEmpty, "bad test; table was 
empty to begin with")
-    assume(spark.catalog.isCached("rectangles"), "bad test; table was not 
cached to begin with")
-    sql("TRUNCATE TABLE rectangles")
-    assert(spark.table("rectangles").collect().isEmpty)
-    assert(!spark.catalog.isCached("rectangles"))
+
+    // Test both a Hive compatible and incompatible code path.
+    Seq("json", "parquet").foreach { format =>
+      withTable("rectangles") {
+        data.write.format(format).saveAsTable("rectangles")
+        assume(spark.table("rectangles").collect().nonEmpty,
+          "bad test; table was empty to begin with")
+        sql("TRUNCATE TABLE rectangles")
+        assert(spark.table("rectangles").collect().isEmpty)
+      }
+    }
+
     // truncating partitioned data source tables is not supported
-    data.write.partitionBy("length").saveAsTable("rectangles2")
-    assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
-    assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+    withTable("rectangles", "rectangles2") {
+      data.write.saveAsTable("rectangles")
+      data.write.partitionBy("length").saveAsTable("rectangles2")
+      assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
+      assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+    }
   }
 
   test("truncate table - external table, temporary table, view (not allowed)") 
{


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

Reply via email to