This is an automated email from the ASF dual-hosted git repository. lixiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2926890 [SPARK-27970][SQL] Support Hive 3.0 metastore 2926890 is described below commit 2926890ffbcf3a92c7e0863c69e31c3d22191112 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Fri Jun 7 15:24:07 2019 -0700 [SPARK-27970][SQL] Support Hive 3.0 metastore ## What changes were proposed in this pull request? It seems that some users are using Hive 3.0.0. This pr makes it support Hive 3.0 metastore. ## How was this patch tested? unit tests Closes #24688 from wangyum/SPARK-26145. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: gatorsmile <gatorsm...@gmail.com> --- docs/sql-data-sources-hive-tables.md | 2 +- docs/sql-migration-guide-hive-compatibility.md | 2 +- .../main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 2 +- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 3 ++- .../scala/org/apache/spark/sql/hive/client/HiveShim.scala | 4 +++- .../spark/sql/hive/client/IsolatedClientLoader.scala | 1 + .../scala/org/apache/spark/sql/hive/client/package.scala | 13 ++++++++++++- .../apache/spark/sql/hive/execution/SaveAsHiveFile.scala | 2 +- .../apache/spark/sql/hive/client/HiveClientVersions.scala | 3 ++- .../apache/spark/sql/hive/client/HiveVersionSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/client/VersionsSuite.scala | 15 +++++++-------- 11 files changed, 33 insertions(+), 18 deletions(-) diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 3d58e94..5688011 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used <td><code>1.2.1</code></td> <td> Version of the Hive metastore. Available - options are <code>0.12.0</code> through <code>2.3.5</code> and <code>3.1.0</code> through <code>3.1.1</code>. + options are <code>0.12.0</code> through <code>2.3.5</code> and <code>3.0.0</code> through <code>3.1.1</code>. </td> </tr> <tr> diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md index 4a8076d..f955e31 100644 --- a/docs/sql-migration-guide-hive-compatibility.md +++ b/docs/sql-migration-guide-hive-compatibility.md @@ -25,7 +25,7 @@ license: | Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on Hive 1.2.1, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.5 and 3.1.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.5 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 38ad061..c3ae3d5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -64,7 +64,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + "<code>0.12.0</code> through <code>2.3.5</code> and " + - "<code>3.1.0</code> through <code>3.1.1</code>.") + "<code>3.0.0</code> through <code>3.1.1</code>.") .stringConf .createWithDefault(builtinHiveVersion) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index b8d5f21..2b80165 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -107,6 +107,7 @@ private[hive] class HiveClientImpl( case hive.v2_1 => new Shim_v2_1() case hive.v2_2 => new Shim_v2_2() case hive.v2_3 => new Shim_v2_3() + case hive.v3_0 => new Shim_v3_0() case hive.v3_1 => new Shim_v3_1() } @@ -744,7 +745,7 @@ private[hive] class HiveClientImpl( // Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type changed // and the CommandProcessorFactory.clean function removed. driver.getClass.getMethod("close").invoke(driver) - if (version != hive.v3_1) { + if (version != hive.v3_0 && version != hive.v3_1) { CommandProcessorFactory.clean(conf) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 18f8c53..203bd2b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -1181,7 +1181,7 @@ private[client] class Shim_v2_2 extends Shim_v2_1 private[client] class Shim_v2_3 extends Shim_v2_1 -private[client] class Shim_v3_1 extends Shim_v2_3 { +private[client] class Shim_v3_0 extends Shim_v2_3 { // Spark supports only non-ACID operations protected lazy val isAcidIUDoperation = JBoolean.FALSE @@ -1305,3 +1305,5 @@ private[client] class Shim_v3_1 extends Shim_v2_3 { replace: JBoolean) } } + +private[client] class Shim_v3_1 extends Shim_v3_0 diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index f5268ad..3217880 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -102,6 +102,7 @@ private[hive] object IsolatedClientLoader extends Logging { case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 case "2.2" | "2.2.0" => hive.v2_2 case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" => hive.v2_3 + case "3.0" | "3.0.0" => hive.v3_0 case "3.1" | "3.1.0" | "3.1.1" => hive.v3_1 case version => throw new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " + diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index e9c13e8..31a060f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -97,6 +97,16 @@ package object client { // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings // Since HIVE-14496, Hive.java uses calcite-core + case object v3_0 extends HiveVersion("3.0.0", + extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", + "org.apache.derby:derby:10.14.1.0"), + exclusions = Seq("org.apache.calcite:calcite-druid", + "org.apache.calcite.avatica:avatica", + "org.apache.curator:*", + "org.pentaho:pentaho-aggdesigner-algorithm")) + + // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings + // Since HIVE-14496, Hive.java uses calcite-core case object v3_1 extends HiveVersion("3.1.1", extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0", "org.apache.derby:derby:10.14.1.0"), @@ -105,7 +115,8 @@ package object client { "org.apache.curator:*", "org.pentaho:pentaho-aggdesigner-algorithm")) - val allSupportedHiveVersions = Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1) + val allSupportedHiveVersions = + Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_0, v3_1) } // scalastyle:on diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala index 22b1117..4be3cd4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala @@ -114,7 +114,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand { // be removed by Hive when Hive is trying to empty the table directory. val hiveVersionsUsingOldExternalTempPath: Set[HiveVersion] = Set(v12, v13, v14, v1_0) val hiveVersionsUsingNewExternalTempPath: Set[HiveVersion] = - Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_1) + Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_0, v3_1) // Ensure all the supported versions are considered here. assert(hiveVersionsUsingNewExternalTempPath ++ hiveVersionsUsingOldExternalTempPath == diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala index 9b9af79..e9eebb4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala @@ -23,5 +23,6 @@ import org.apache.spark.SparkFunSuite private[client] trait HiveClientVersions { protected val versions = - IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.1") + IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.0", + "3.1") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala index a45ad1f..dd58c30 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala @@ -35,12 +35,12 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu // hive.metastore.schema.verification from false to true since 2.0 // For details, see the JIRA HIVE-6113 and HIVE-12463 if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" || - version == "3.1") { + version == "3.0" || version == "3.1") { hadoopConf.set("datanucleus.schema.autoCreateAll", "true") hadoopConf.set("hive.metastore.schema.verification", "false") } // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`. - if (version == "3.1") { + if (version == "3.0" || version == "3.1") { hadoopConf.set("hive.in.test", "true") } HiveClientBuilder.buildClient( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 9861a0a..5a624a0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -103,7 +103,7 @@ class VersionsSuite extends SparkFunSuite with Logging { } private val versions = - Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.1") + Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.0", "3.1") private var client: HiveClient = null @@ -119,11 +119,11 @@ class VersionsSuite extends SparkFunSuite with Logging { // hive.metastore.schema.verification from false to true since 2.0 // For details, see the JIRA HIVE-6113 and HIVE-12463 if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" || - version == "3.1") { + version == "3.0" || version == "3.1") { hadoopConf.set("datanucleus.schema.autoCreateAll", "true") hadoopConf.set("hive.metastore.schema.verification", "false") } - if (version == "3.1") { + if (version == "3.0" || version == "3.1") { // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`. hadoopConf.set("hive.in.test", "true") // Since HIVE-17626(Hive 3.0.0), need to set hive.query.reexecution.enabled=false. @@ -577,7 +577,7 @@ class VersionsSuite extends SparkFunSuite with Logging { test(s"$version: sql create index and reset") { // HIVE-18448 Since Hive 3.0, INDEX is not supported. - if (version != "3.1") { + if (version != "3.0" && version != "3.1") { client.runSqlHive("CREATE TABLE indexed_table (key INT)") client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " + "as 'COMPACT' WITH DEFERRED REBUILD") @@ -586,10 +586,9 @@ class VersionsSuite extends SparkFunSuite with Logging { test(s"$version: sql read hive materialized view") { // HIVE-14249 Since Hive 2.3.0, materialized view is supported. - if (version == "2.3" || version == "3.1") { - // Since HIVE-14498(Hive 3.0), Automatic rewriting for materialized view cannot be enabled - // if the materialized view uses non-transactional tables. - val disableRewrite = if (version == "2.3") "" else "DISABLE REWRITE" + if (version == "2.3" || version == "3.0" || version == "3.1") { + // Since HIVE-18394(Hive 3.1), "Create Materialized View" should default to rewritable ones + val disableRewrite = if (version == "2.3" || version == "3.0") "" else "DISABLE REWRITE" client.runSqlHive("CREATE TABLE materialized_view_tbl (c1 INT)") client.runSqlHive( s"CREATE MATERIALIZED VIEW mv1 $disableRewrite AS SELECT * FROM materialized_view_tbl") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org