This is an automated email from the ASF dual-hosted git repository. sunchao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 20e07bf51a9 [SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader 20e07bf51a9 is described below commit 20e07bf51a9b797be76e7921297ac0d4319a4be8 Author: Cheng Pan <cheng...@apache.org> AuthorDate: Thu Apr 20 13:10:15 2023 -0700 [SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader ### What changes were proposed in this pull request? Remove Hadoop2 support in `IsolatedClientLoader`. ### Why are the changes needed? Clean up Hadoop2 related code since SPARK-42452 removed support for Hadoop2. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated test cases introduced in SPARK-32256, pass GA. Closes #40870 from pan3793/SPARK-43211. Authored-by: Cheng Pan <cheng...@apache.org> Signed-off-by: Chao Sun <sunc...@apple.com> --- .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala | 3 ++- .../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala | 8 +------- .../apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala | 9 ++++----- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 90e8f9b9d0e..5cd3b9c3abf 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -62,7 +62,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat import CatalogTableType._ // SPARK-32256: Make sure `VersionInfo` is initialized before touching the isolated classloader. - // This is to ensure Hive can get the Hadoop version when using the isolated classloader. + // This is a workaround for HADOOP-14067, to ensure Hive can get the Hadoop version when using + // the isolated classloader. org.apache.hadoop.util.VersionInfo.getVersion() /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 7a122f22e3d..64718a9d35c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -55,8 +55,6 @@ private[hive] object IsolatedClientLoader extends Logging { sharedPrefixes: Seq[String] = Seq.empty, barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized { val resolvedVersion = hiveVersion(hiveMetastoreVersion) - // We will use Hadoop 3.x if we can't resolve the hadoop artifact - // when builtin hadoop is Hadoop 3. Otherwise we will use Hadoop 2.x. val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) { resolvedVersions((resolvedVersion, hadoopVersion)) } else { @@ -68,11 +66,7 @@ private[hive] object IsolatedClientLoader extends Logging { case e: RuntimeException if e.getMessage.contains("hadoop") => // If the error message contains hadoop, it is probably because the hadoop // version cannot be resolved. - val fallbackVersion = if (VersionUtils.isHadoop3) { - "3.3.5" - } else { - "2.7.4" - } + val fallbackVersion = "3.3.5" logWarning(s"Failed to resolve Hadoop artifacts for the version $hadoopVersion. We " + s"will change the hadoop version from $hadoopVersion to $fallbackVersion and try " + "again. It is recommended to set jars used by Hive metastore client through " + diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala index 6ada46412bf..2a921c3fd85 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -46,7 +46,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite { // Download jars for Hive 2.0 val client = IsolatedClientLoader.forVersion( hiveMetastoreVersion = "2.0", - hadoopVersion = "2.7.4", + // 3.0.x is chosen because that HADOOP-14067 got fixed in 3.1.0 + hadoopVersion = "3.0.3", sparkConf = new SparkConf(), hadoopConf = hadoopConf, config = HiveClientBuilder.buildConf(Map.empty), @@ -81,10 +82,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite { } } - test("SPARK-32212: built-in Hadoop version should support shaded client if it is not hadoop 2") { + test("SPARK-32212: built-in Hadoop version should support shaded client") { val hadoopVersion = VersionInfo.getVersion - if (!hadoopVersion.startsWith("2")) { - assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion)) - } + assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion)) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org