This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 35a8e1e  [SPARK-31061][SQL] Provide ability to alter the provider of a 
table
35a8e1e is described below

commit 35a8e1ed7bb2f9493742d7401e32eab4621fd2d0
Author: Burak Yavuz <brk...@gmail.com>
AuthorDate: Fri Mar 6 15:40:44 2020 +0800

    [SPARK-31061][SQL] Provide ability to alter the provider of a table
    
    This PR adds functionality to HiveExternalCatalog to be able to change the 
provider of a table.
    
    This is useful for catalogs in Spark 3.0 to be able to use alterTable to 
change the provider of a table as part of an atomic REPLACE TABLE function.
    
    No
    
    Unit tests
    
    Closes #27822 from brkyvz/externalCat.
    
    Authored-by: Burak Yavuz <brk...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 2e3adadc6a53fe044b286a6a59529a94e7eeda6c)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala       | 10 +++++-
 .../spark/sql/hive/HiveExternalCatalogSuite.scala  | 40 ++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ca292f6..be6d824 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -634,7 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
         k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
           k.startsWith(CREATED_SPARK_VERSION)
       }
-      val newTableProps = propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp
+      val newFormatIfExists = tableDefinition.provider.flatMap { p =>
+        if (DDLUtils.isDatasourceTable(tableDefinition)) {
+          Some(DATASOURCE_PROVIDER -> p)
+        } else {
+          None
+        }
+      }
+      val newTableProps =
+        propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp ++ newFormatIfExists
 
       // // Add old table's owner if we need to restore
       val owner = 
Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 0a88898..473a93b 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import java.net.URI
+
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
@@ -178,4 +180,42 @@ class HiveExternalCatalogSuite extends 
ExternalCatalogSuite {
     assertThrows[QueryExecutionException](client.runSqlHive(
       "INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a"))
   }
+
+  test("SPARK-31061: alterTable should be able to change table provider") {
+    val catalog = newBasicCatalog()
+    val parquetTable = CatalogTable(
+      identifier = TableIdentifier("parq_tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat.copy(locationUri = Some(new 
URI("file:/some/path"))),
+      schema = new StructType().add("col1", "int").add("col2", "string"),
+      provider = Some("parquet"))
+    catalog.createTable(parquetTable, ignoreIfExists = false)
+
+    val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+    assert(rawTable.provider === Some("parquet"))
+
+    val fooTable = parquetTable.copy(provider = Some("foo"))
+    catalog.alterTable(fooTable)
+    val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+    assert(alteredTable.provider === Some("foo"))
+  }
+
+  test("SPARK-31061: alterTable should be able to change table provider from 
hive") {
+    val catalog = newBasicCatalog()
+    val hiveTable = CatalogTable(
+      identifier = TableIdentifier("parq_tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat,
+      schema = new StructType().add("col1", "int").add("col2", "string"),
+      provider = Some("hive"))
+    catalog.createTable(hiveTable, ignoreIfExists = false)
+
+    val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+    assert(rawTable.provider === Some("hive"))
+
+    val fooTable = rawTable.copy(provider = Some("foo"))
+    catalog.alterTable(fooTable)
+    val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+    assert(alteredTable.provider === Some("foo"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to