This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fd8d17a136eb [SPARK-46469][SQL] Clean up useless local variables in `InsertIntoHiveTable` fd8d17a136eb is described below commit fd8d17a136eb785d642faba3cc8c198a727c0563 Author: yangjie01 <yangji...@baidu.com> AuthorDate: Thu Dec 21 12:06:10 2023 +0800 [SPARK-46469][SQL] Clean up useless local variables in `InsertIntoHiveTable` ### What changes were proposed in this pull request? This pr aims to cleans up the unused local variables `partitionPath`, `hiveVersion`, and `doHiveOverwrite` in `InsertIntoHiveTable`. The code that used these variables has already been cleaned up in SPARK-45309 | https://github.com/apache/spark/pull/43098. ### Why are the changes needed? Code cleanup. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions ### Was this patch authored or co-authored using generative AI tooling? No Closes #44433 from LuciferYang/insert-into-hive-table. Lead-authored-by: yangjie01 <yangji...@baidu.com> Co-authored-by: YangJie <yangji...@baidu.com> Signed-off-by: Kent Yao <y...@apache.org> --- .../sql/hive/execution/InsertIntoHiveTable.scala | 30 +--------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index ee4a6a3e1eb9..74d131d6664f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -33,7 +33,6 @@ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources.{FileFormat, V1WriteCommand, V1WritesUtils} -import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.client.HiveClientImpl @@ -207,34 +206,7 @@ case class InsertIntoHiveTable( table.database, table.identifier.table, partitionSpec) - - var doHiveOverwrite = overwrite - if (oldPart.isEmpty || !ifPartitionNotExists) { - // SPARK-29295: When insert overwrite to a Hive external table partition, if the - // partition does not exist, Hive will not check if the external partition directory - // exists or not before copying files. So if users drop the partition, and then do - // insert overwrite to the same partition, the partition will have both old and new - // data. We construct partition path. If the path exists, we delete it manually. - val partitionPath = if (oldPart.isEmpty && overwrite - && table.tableType == CatalogTableType.EXTERNAL) { - val partitionColumnNames = table.partitionColumnNames - val tablePath = new Path(table.location) - Some(ExternalCatalogUtils.generatePartitionPath(partitionSpec, - partitionColumnNames, tablePath)) - } else { - oldPart.flatMap(_.storage.locationUri.map(uri => new Path(uri))) - } - - // SPARK-18107: Insert overwrite runs much slower than hive-client. - // Newer Hive largely improves insert overwrite performance. As Spark uses older Hive - // version and we may not want to catch up new Hive version every time. We delete the - // Hive partition first and then load data file into the Hive partition. - val hiveVersion = externalCatalog.asInstanceOf[ExternalCatalogWithListener] - .unwrapped.asInstanceOf[HiveExternalCatalog] - .client - .version - // inheritTableSpecs is set to true. It should be set to false for an IMPORT query // which is currently considered as a Hive native command. val inheritTableSpecs = true @@ -243,7 +215,7 @@ case class InsertIntoHiveTable( table.identifier.table, tmpLocation.toString, partitionSpec, - isOverwrite = doHiveOverwrite, + isOverwrite = overwrite, inheritTableSpecs = inheritTableSpecs, isSrcLocal = false) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org