This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 35a8e1e [SPARK-31061][SQL] Provide ability to alter the provider of a table 35a8e1e is described below commit 35a8e1ed7bb2f9493742d7401e32eab4621fd2d0 Author: Burak Yavuz <brk...@gmail.com> AuthorDate: Fri Mar 6 15:40:44 2020 +0800 [SPARK-31061][SQL] Provide ability to alter the provider of a table This PR adds functionality to HiveExternalCatalog to be able to change the provider of a table. This is useful for catalogs in Spark 3.0 to be able to use alterTable to change the provider of a table as part of an atomic REPLACE TABLE function. No Unit tests Closes #27822 from brkyvz/externalCat. Authored-by: Burak Yavuz <brk...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 2e3adadc6a53fe044b286a6a59529a94e7eeda6c) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/hive/HiveExternalCatalog.scala | 10 +++++- .../spark/sql/hive/HiveExternalCatalogSuite.scala | 40 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index ca292f6..be6d824 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -634,7 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) || k.startsWith(CREATED_SPARK_VERSION) } - val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp + val newFormatIfExists = tableDefinition.provider.flatMap { p => + if (DDLUtils.isDatasourceTable(tableDefinition)) { + Some(DATASOURCE_PROVIDER -> p) + } else { + None + } + } + val newTableProps = + propsFromOldTable ++ tableDefinition.properties + partitionProviderProp ++ newFormatIfExists // // Add old table's owner if we need to restore val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala index 0a88898..473a93b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive +import java.net.URI + import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf @@ -178,4 +180,42 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite { assertThrows[QueryExecutionException](client.runSqlHive( "INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a")) } + + test("SPARK-31061: alterTable should be able to change table provider") { + val catalog = newBasicCatalog() + val parquetTable = CatalogTable( + identifier = TableIdentifier("parq_tbl", Some("db1")), + tableType = CatalogTableType.MANAGED, + storage = storageFormat.copy(locationUri = Some(new URI("file:/some/path"))), + schema = new StructType().add("col1", "int").add("col2", "string"), + provider = Some("parquet")) + catalog.createTable(parquetTable, ignoreIfExists = false) + + val rawTable = externalCatalog.getTable("db1", "parq_tbl") + assert(rawTable.provider === Some("parquet")) + + val fooTable = parquetTable.copy(provider = Some("foo")) + catalog.alterTable(fooTable) + val alteredTable = externalCatalog.getTable("db1", "parq_tbl") + assert(alteredTable.provider === Some("foo")) + } + + test("SPARK-31061: alterTable should be able to change table provider from hive") { + val catalog = newBasicCatalog() + val hiveTable = CatalogTable( + identifier = TableIdentifier("parq_tbl", Some("db1")), + tableType = CatalogTableType.MANAGED, + storage = storageFormat, + schema = new StructType().add("col1", "int").add("col2", "string"), + provider = Some("hive")) + catalog.createTable(hiveTable, ignoreIfExists = false) + + val rawTable = externalCatalog.getTable("db1", "parq_tbl") + assert(rawTable.provider === Some("hive")) + + val fooTable = rawTable.copy(provider = Some("foo")) + catalog.alterTable(fooTable) + val alteredTable = externalCatalog.getTable("db1", "parq_tbl") + assert(alteredTable.provider === Some("foo")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org