This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c65f9b2 [SPARK-26839][SQL] Work around classloader changes in Java 9 for Hive isolation c65f9b2 is described below commit c65f9b2bc35c2926bb3658f65fe4f8a0b8e9fe4a Author: Sean Owen <sean.o...@databricks.com> AuthorDate: Wed Mar 20 09:12:52 2019 -0500 [SPARK-26839][SQL] Work around classloader changes in Java 9 for Hive isolation Note, this doesn't really resolve the JIRA, but makes the changes we can make so far that would be required to solve it. ## What changes were proposed in this pull request? Java 9+ changed how ClassLoaders work. The two most salient points: - The boot classloader no longer 'sees' the platform classes. A new 'platform classloader' does and should be the parent of new ClassLoaders - The system classloader is no longer a URLClassLoader, so we can't get the URLs of JARs in its classpath ## How was this patch tested? We'll see whether Java 8 tests still pass here. Java 11 tests do not fully pass at this point; more notes below. This does make progress on the failures though. (NB: to test with Java 11, you need to build with Java 8 first, setting JAVA_HOME and java's executable correctly, then switch both to Java 11 for testing.) Closes #24057 from srowen/SPARK-26839. Authored-by: Sean Owen <sean.o...@databricks.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../org/apache/spark/sql/hive/HiveUtils.scala | 20 +++--- .../sql/hive/client/IsolatedClientLoader.scala | 77 +++++++++++++--------- 2 files changed, 59 insertions(+), 38 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 38bbe64..a7f40c6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -19,8 +19,6 @@ package org.apache.spark.sql.hive import java.io.File import java.net.{URL, URLClassLoader} -import java.nio.charset.StandardCharsets -import java.sql.Timestamp import java.util.Locale import java.util.concurrent.TimeUnit @@ -28,12 +26,11 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap import scala.language.implicitConversions +import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.hive.common.`type`.HiveDecimal import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.session.SessionState -import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable} import org.apache.hadoop.util.VersionInfo import org.apache.spark.{SparkConf, SparkContext} @@ -329,10 +326,17 @@ private[spark] object HiveUtils extends Logging { val classLoader = Utils.getContextOrSparkClassLoader val jars = allJars(classLoader) - if (jars.length == 0) { - throw new IllegalArgumentException( - "Unable to locate hive jars to connect to metastore. " + - s"Please set ${HIVE_METASTORE_JARS.key}.") + if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { + // Do nothing. The system classloader is no longer a URLClassLoader in Java 9, + // so it won't match the case in allJars above. It no longer exposes URLs of + // the system classpath + } else { + // Verify at least one jar was found + if (jars.length == 0) { + throw new IllegalArgumentException( + "Unable to locate hive jars to connect to metastore. " + + s"Please set ${HIVE_METASTORE_JARS.key}.") + } } logInfo( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index efa97b2..98999eb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -25,6 +25,7 @@ import java.util import scala.util.Try import org.apache.commons.io.{FileUtils, IOUtils} +import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf.ConfVars @@ -157,7 +158,6 @@ private[hive] object IsolatedClientLoader extends Logging { * @param isolationOn When true, custom versions of barrier classes will be constructed. Must be * true unless loading the version of hive that is on Sparks classloader. * @param sharesHadoopClasses When true, we will share Hadoop classes between Spark and - * @param rootClassLoader The system root classloader. Must not know about Hive classes. * @param baseClassLoader The spark classloader that is used to load shared classes. */ private[hive] class IsolatedClientLoader( @@ -168,15 +168,11 @@ private[hive] class IsolatedClientLoader( val config: Map[String, String] = Map.empty, val isolationOn: Boolean = true, val sharesHadoopClasses: Boolean = true, - val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent, val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader, val sharedPrefixes: Seq[String] = Seq.empty, val barrierPrefixes: Seq[String] = Seq.empty) extends Logging { - // Check to make sure that the root classloader does not know about Hive. - assert(Try(rootClassLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure) - /** All jars used by the hive specific classloader. */ protected def allJars = execJars.toArray @@ -191,8 +187,8 @@ private[hive] class IsolatedClientLoader( (sharesHadoopClasses && isHadoopClass) || name.startsWith("scala.") || (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) || - name.startsWith("java.lang.") || - name.startsWith("java.net") || + name.startsWith("java.") || + name.startsWith("javax.sql.") || sharedPrefixes.exists(name.startsWith) } @@ -214,30 +210,51 @@ private[hive] class IsolatedClientLoader( private[hive] val classLoader: MutableURLClassLoader = { val isolatedClassLoader = if (isolationOn) { - new URLClassLoader(allJars, rootClassLoader) { - override def loadClass(name: String, resolve: Boolean): Class[_] = { - val loaded = findLoadedClass(name) - if (loaded == null) doLoadClass(name, resolve) else loaded - } - def doLoadClass(name: String, resolve: Boolean): Class[_] = { - val classFileName = name.replaceAll("\\.", "/") + ".class" - if (isBarrierClass(name)) { - // For barrier classes, we construct a new copy of the class. - val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName)) - logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}") - defineClass(name, bytes, 0, bytes.length) - } else if (!isSharedClass(name)) { - logDebug(s"hive class: $name - ${getResource(classToPath(name))}") - super.loadClass(name, resolve) + if (allJars.isEmpty) { + // See HiveUtils; this is the Java 9+ + builtin mode scenario + baseClassLoader + } else { + val rootClassLoader: ClassLoader = + if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { + // In Java 9, the boot classloader can see few JDK classes. The intended parent + // classloader for delegation is now the platform classloader. + // See http://java9.wtf/class-loading/ + val platformCL = + classOf[ClassLoader].getMethod("getPlatformClassLoader"). + invoke(null).asInstanceOf[ClassLoader] + // Check to make sure that the root classloader does not know about Hive. + assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure) + platformCL } else { - // For shared classes, we delegate to baseClassLoader, but fall back in case the - // class is not found. - logDebug(s"shared class: $name") - try { - baseClassLoader.loadClass(name) - } catch { - case _: ClassNotFoundException => - super.loadClass(name, resolve) + // The boot classloader is represented by null (the instance itself isn't accessible) + // and before Java 9 can see all JDK classes + null + } + new URLClassLoader(allJars, rootClassLoader) { + override def loadClass(name: String, resolve: Boolean): Class[_] = { + val loaded = findLoadedClass(name) + if (loaded == null) doLoadClass(name, resolve) else loaded + } + def doLoadClass(name: String, resolve: Boolean): Class[_] = { + val classFileName = name.replaceAll("\\.", "/") + ".class" + if (isBarrierClass(name)) { + // For barrier classes, we construct a new copy of the class. + val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName)) + logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}") + defineClass(name, bytes, 0, bytes.length) + } else if (!isSharedClass(name)) { + logDebug(s"hive class: $name - ${getResource(classToPath(name))}") + super.loadClass(name, resolve) + } else { + // For shared classes, we delegate to baseClassLoader, but fall back in case the + // class is not found. + logDebug(s"shared class: $name") + try { + baseClassLoader.loadClass(name) + } catch { + case _: ClassNotFoundException => + super.loadClass(name, resolve) + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org