This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c65f9b2  [SPARK-26839][SQL] Work around classloader changes in Java 9 
for Hive isolation
c65f9b2 is described below

commit c65f9b2bc35c2926bb3658f65fe4f8a0b8e9fe4a
Author: Sean Owen <sean.o...@databricks.com>
AuthorDate: Wed Mar 20 09:12:52 2019 -0500

    [SPARK-26839][SQL] Work around classloader changes in Java 9 for Hive 
isolation
    
    Note, this doesn't really resolve the JIRA, but makes the changes we can 
make so far that would be required to solve it.
    
    ## What changes were proposed in this pull request?
    
    Java 9+ changed how ClassLoaders work. The two most salient points:
    - The boot classloader no longer 'sees' the platform classes. A new 
'platform classloader' does and should be the parent of new ClassLoaders
    - The system classloader is no longer a URLClassLoader, so we can't get the 
URLs of JARs in its classpath
    
    ## How was this patch tested?
    
    We'll see whether Java 8 tests still pass here. Java 11 tests do not fully 
pass at this point; more notes below. This does make progress on the failures 
though.
    
    (NB: to test with Java 11, you need to build with Java 8 first, setting 
JAVA_HOME and java's executable correctly, then switch both to Java 11 for 
testing.)
    
    Closes #24057 from srowen/SPARK-26839.
    
    Authored-by: Sean Owen <sean.o...@databricks.com>
    Signed-off-by: Sean Owen <sean.o...@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala      | 20 +++---
 .../sql/hive/client/IsolatedClientLoader.scala     | 77 +++++++++++++---------
 2 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 38bbe64..a7f40c6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 import java.net.{URL, URLClassLoader}
-import java.nio.charset.StandardCharsets
-import java.sql.Timestamp
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
@@ -28,12 +26,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 import scala.language.implicitConversions
 
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 import org.apache.hadoop.util.VersionInfo
 
 import org.apache.spark.{SparkConf, SparkContext}
@@ -329,10 +326,17 @@ private[spark] object HiveUtils extends Logging {
 
       val classLoader = Utils.getContextOrSparkClassLoader
       val jars = allJars(classLoader)
-      if (jars.length == 0) {
-        throw new IllegalArgumentException(
-          "Unable to locate hive jars to connect to metastore. " +
-            s"Please set ${HIVE_METASTORE_JARS.key}.")
+      if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+        // Do nothing. The system classloader is no longer a URLClassLoader in 
Java 9,
+        // so it won't match the case in allJars above. It no longer exposes 
URLs of
+        // the system classpath
+      } else {
+        // Verify at least one jar was found
+        if (jars.length == 0) {
+          throw new IllegalArgumentException(
+            "Unable to locate hive jars to connect to metastore. " +
+              s"Please set ${HIVE_METASTORE_JARS.key}.")
+        }
       }
 
       logInfo(
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index efa97b2..98999eb 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -25,6 +25,7 @@ import java.util
 import scala.util.Try
 
 import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
@@ -157,7 +158,6 @@ private[hive] object IsolatedClientLoader extends Logging {
  * @param isolationOn When true, custom versions of barrier classes will be 
constructed.  Must be
  *                    true unless loading the version of hive that is on 
Sparks classloader.
  * @param sharesHadoopClasses When true, we will share Hadoop classes between 
Spark and
- * @param rootClassLoader The system root classloader. Must not know about 
Hive classes.
  * @param baseClassLoader The spark classloader that is used to load shared 
classes.
  */
 private[hive] class IsolatedClientLoader(
@@ -168,15 +168,11 @@ private[hive] class IsolatedClientLoader(
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
     val sharesHadoopClasses: Boolean = true,
-    val rootClassLoader: ClassLoader = 
ClassLoader.getSystemClassLoader.getParent.getParent,
     val baseClassLoader: ClassLoader = 
Thread.currentThread().getContextClassLoader,
     val sharedPrefixes: Seq[String] = Seq.empty,
     val barrierPrefixes: Seq[String] = Seq.empty)
   extends Logging {
 
-  // Check to make sure that the root classloader does not know about Hive.
-  
assert(Try(rootClassLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
-
   /** All jars used by the hive specific classloader. */
   protected def allJars = execJars.toArray
 
@@ -191,8 +187,8 @@ private[hive] class IsolatedClientLoader(
     (sharesHadoopClasses && isHadoopClass) ||
     name.startsWith("scala.") ||
     (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) ||
-    name.startsWith("java.lang.") ||
-    name.startsWith("java.net") ||
+    name.startsWith("java.") ||
+    name.startsWith("javax.sql.") ||
     sharedPrefixes.exists(name.startsWith)
   }
 
@@ -214,30 +210,51 @@ private[hive] class IsolatedClientLoader(
   private[hive] val classLoader: MutableURLClassLoader = {
     val isolatedClassLoader =
       if (isolationOn) {
-        new URLClassLoader(allJars, rootClassLoader) {
-          override def loadClass(name: String, resolve: Boolean): Class[_] = {
-            val loaded = findLoadedClass(name)
-            if (loaded == null) doLoadClass(name, resolve) else loaded
-          }
-          def doLoadClass(name: String, resolve: Boolean): Class[_] = {
-            val classFileName = name.replaceAll("\\.", "/") + ".class"
-            if (isBarrierClass(name)) {
-              // For barrier classes, we construct a new copy of the class.
-              val bytes = 
IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
-              logDebug(s"custom defining: $name - 
${util.Arrays.hashCode(bytes)}")
-              defineClass(name, bytes, 0, bytes.length)
-            } else if (!isSharedClass(name)) {
-              logDebug(s"hive class: $name - 
${getResource(classToPath(name))}")
-              super.loadClass(name, resolve)
+        if (allJars.isEmpty) {
+          // See HiveUtils; this is the Java 9+ + builtin mode scenario
+          baseClassLoader
+        } else {
+          val rootClassLoader: ClassLoader =
+            if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+              // In Java 9, the boot classloader can see few JDK classes. The 
intended parent
+              // classloader for delegation is now the platform classloader.
+              // See http://java9.wtf/class-loading/
+              val platformCL =
+              classOf[ClassLoader].getMethod("getPlatformClassLoader").
+                invoke(null).asInstanceOf[ClassLoader]
+              // Check to make sure that the root classloader does not know 
about Hive.
+              
assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
+              platformCL
             } else {
-              // For shared classes, we delegate to baseClassLoader, but fall 
back in case the
-              // class is not found.
-              logDebug(s"shared class: $name")
-              try {
-                baseClassLoader.loadClass(name)
-              } catch {
-                case _: ClassNotFoundException =>
-                  super.loadClass(name, resolve)
+              // The boot classloader is represented by null (the instance 
itself isn't accessible)
+              // and before Java 9 can see all JDK classes
+              null
+            }
+          new URLClassLoader(allJars, rootClassLoader) {
+            override def loadClass(name: String, resolve: Boolean): Class[_] = 
{
+              val loaded = findLoadedClass(name)
+              if (loaded == null) doLoadClass(name, resolve) else loaded
+            }
+            def doLoadClass(name: String, resolve: Boolean): Class[_] = {
+              val classFileName = name.replaceAll("\\.", "/") + ".class"
+              if (isBarrierClass(name)) {
+                // For barrier classes, we construct a new copy of the class.
+                val bytes = 
IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
+                logDebug(s"custom defining: $name - 
${util.Arrays.hashCode(bytes)}")
+                defineClass(name, bytes, 0, bytes.length)
+              } else if (!isSharedClass(name)) {
+                logDebug(s"hive class: $name - 
${getResource(classToPath(name))}")
+                super.loadClass(name, resolve)
+              } else {
+                // For shared classes, we delegate to baseClassLoader, but 
fall back in case the
+                // class is not found.
+                logDebug(s"shared class: $name")
+                try {
+                  baseClassLoader.loadClass(name)
+                } catch {
+                  case _: ClassNotFoundException =>
+                    super.loadClass(name, resolve)
+                }
               }
             }
           }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to