[spark] branch master updated: [SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader

sunchao Thu, 20 Apr 2023 13:10:37 -0700

This is an automated email from the ASF dual-hosted git repository.

sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 20e07bf51a9 [SPARK-43211][HIVE] Remove Hadoop2 support in 
IsolatedClientLoader
20e07bf51a9 is described below

commit 20e07bf51a9b797be76e7921297ac0d4319a4be8
Author: Cheng Pan <cheng...@apache.org>
AuthorDate: Thu Apr 20 13:10:15 2023 -0700

    [SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader
    
    ### What changes were proposed in this pull request?
    
    Remove Hadoop2 support in `IsolatedClientLoader`.
    
    ### Why are the changes needed?
    
    Clean up Hadoop2 related code since SPARK-42452 removed support for Hadoop2.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Updated test cases introduced in SPARK-32256, pass GA.
    
    Closes #40870 from pan3793/SPARK-43211.
    
    Authored-by: Cheng Pan <cheng...@apache.org>
    Signed-off-by: Chao Sun <sunc...@apple.com>
---
 .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala    | 3 ++-
 .../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala  | 8 +-------
 .../apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala    | 9 ++++-----
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 90e8f9b9d0e..5cd3b9c3abf 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -62,7 +62,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
   import CatalogTableType._
 
   // SPARK-32256: Make sure `VersionInfo` is initialized before touching the 
isolated classloader.
-  // This is to ensure Hive can get the Hadoop version when using the isolated 
classloader.
+  // This is a workaround for HADOOP-14067, to ensure Hive can get the Hadoop 
version when using
+  // the isolated classloader.
   org.apache.hadoop.util.VersionInfo.getVersion()
 
   /**
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 7a122f22e3d..64718a9d35c 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -55,8 +55,6 @@ private[hive] object IsolatedClientLoader extends Logging {
       sharedPrefixes: Seq[String] = Seq.empty,
       barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = 
synchronized {
     val resolvedVersion = hiveVersion(hiveMetastoreVersion)
-    // We will use Hadoop 3.x if we can't resolve the hadoop artifact
-    // when builtin hadoop is Hadoop 3. Otherwise we will use Hadoop 2.x.
     val files = if (resolvedVersions.contains((resolvedVersion, 
hadoopVersion))) {
       resolvedVersions((resolvedVersion, hadoopVersion))
     } else {
@@ -68,11 +66,7 @@ private[hive] object IsolatedClientLoader extends Logging {
           case e: RuntimeException if e.getMessage.contains("hadoop") =>
             // If the error message contains hadoop, it is probably because 
the hadoop
             // version cannot be resolved.
-            val fallbackVersion = if (VersionUtils.isHadoop3) {
-              "3.3.5"
-            } else {
-              "2.7.4"
-            }
+            val fallbackVersion = "3.3.5"
             logWarning(s"Failed to resolve Hadoop artifacts for the version 
$hadoopVersion. We " +
               s"will change the hadoop version from $hadoopVersion to 
$fallbackVersion and try " +
               "again. It is recommended to set jars used by Hive metastore 
client through " +
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
index 6ada46412bf..2a921c3fd85 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
@@ -46,7 +46,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
       // Download jars for Hive 2.0
       val client = IsolatedClientLoader.forVersion(
         hiveMetastoreVersion = "2.0",
-        hadoopVersion = "2.7.4",
+        // 3.0.x is chosen because that HADOOP-14067 got fixed in 3.1.0
+        hadoopVersion = "3.0.3",
         sparkConf = new SparkConf(),
         hadoopConf = hadoopConf,
         config = HiveClientBuilder.buildConf(Map.empty),
@@ -81,10 +82,8 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
     }
   }
 
-  test("SPARK-32212: built-in Hadoop version should support shaded client if 
it is not hadoop 2") {
+  test("SPARK-32212: built-in Hadoop version should support shaded client") {
     val hadoopVersion = VersionInfo.getVersion
-    if (!hadoopVersion.startsWith("2")) {
-      assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion))
-    }
+    assert(IsolatedClientLoader.supportsHadoopShadedClient(hadoopVersion))
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-43211][HIVE] Remove Hadoop2 support in IsolatedClientLoader

Reply via email to