This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c66ec43 [SPARK-27555][SQL] HiveSerDe should fall back to hadoopconf if hive.default.fileformat is not found in SQLConf c66ec43 is described below commit c66ec439456c5a160e3849e23c2ce3970d4c6ec7 Author: sandeep katta <sandeep.katta2...@gmail.com> AuthorDate: Sat May 4 09:02:12 2019 +0900 [SPARK-27555][SQL] HiveSerDe should fall back to hadoopconf if hive.default.fileformat is not found in SQLConf ## What changes were proposed in this pull request? SQLConf does not load hive-site.xml.So HiveSerDe should fall back to hadoopconf if hive.default.fileformat is not found in SQLConf ## How was this patch tested? Tested manually. Added UT Closes #24489 from sandeep-katta/spark-27555. Authored-by: sandeep katta <sandeep.katta2...@gmail.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- docs/sql-migration-guide-upgrade.md | 2 ++ .../org/apache/spark/sql/internal/HiveSerDe.scala | 12 +++++++++++- .../spark/sql/hive/execution/HiveSerDeSuite.scala | 22 +++++++++++++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index 54512ae..5fe7c7c 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -126,6 +126,8 @@ license: | - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior. + - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala index bd25a64..4921e3c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.internal import java.util.Locale +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.catalog.CatalogStorageFormat case class HiveSerDe( @@ -88,7 +89,16 @@ object HiveSerDe { } def getDefaultStorage(conf: SQLConf): CatalogStorageFormat = { - val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile") + // To respect hive-site.xml, it peeks Hadoop configuration from existing Spark session, + // as an easy workaround. See SPARK-27555. + val defaultFormatKey = "hive.default.fileformat" + val defaultValue = { + val defaultFormatValue = "textfile" + SparkSession.getActiveSession.map { session => + session.sessionState.newHadoopConf().get(defaultFormatKey, defaultFormatValue) + }.getOrElse(defaultFormatValue) + } + val defaultStorageType = conf.getConfString("hive.default.fileformat", defaultValue) val defaultHiveSerde = sourceToSerDe(defaultStorageType) CatalogStorageFormat.empty.copy( inputFormat = defaultHiveSerde.flatMap(_.inputFormat) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index d7752e9..ed4304b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -21,13 +21,14 @@ import java.net.URI import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{AnalysisException, SaveMode} +import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils} import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types.StructType /** @@ -210,4 +211,23 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte val e8 = intercept[IllegalArgumentException](analyzeCreateTable(v8)) assert(e8.getMessage.contains("invalid fileFormat: 'wrong'")) } + + test("SPARK-27555: fall back to hive-site.xml if hive.default.fileformat " + + "is not found in SQLConf ") { + val testSession = SparkSession.getActiveSession.get + try { + testSession.sparkContext.hadoopConfiguration.set("hive.default.fileformat", "parquetfile") + val sqlConf = new SQLConf() + var storageFormat = HiveSerDe.getDefaultStorage(sqlConf) + assert(storageFormat.serde. + contains("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + // should take orc as it is present in sqlConf + sqlConf.setConfString("hive.default.fileformat", "orc") + storageFormat = HiveSerDe.getDefaultStorage(sqlConf) + assert(storageFormat.serde.contains("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + } + finally { + testSession.sparkContext.hadoopConfiguration.unset("hive.default.fileformat") + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org