Github user vanzin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/5876#discussion_r29906552
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala ---
    @@ -93,9 +100,113 @@ class HiveContext(sc: SparkContext) extends 
SQLContext(sc) {
       protected[sql] def convertCTAS: Boolean =
         getConf("spark.sql.hive.convertCTAS", "false").toBoolean
     
    +  /**
    +   * The version of the hive client that will be used to communicate with 
the metastore.  Note that
    +   * this does not necessarily need to be the same version of Hive that is 
used internally by
    +   * Spark SQL for execution.
    +   */
    +  protected[hive] def hiveMetastoreVersion: String =
    +    getConf(HIVE_METASTORE_VERSION, hiveExecutionVersion)
    +
    +  /**
    +   * The location of the jars that should be used to instantiate the 
HiveMetastoreClient.  This
    +   * property can be one of three options:
    +   *  - a classpath in the standard format for both hive and hadoop.
    +   *  - builtin - attempt to discover the jars that were used to load 
Spark SQL and use those. This
    +   *              option is only valid when using the execution version of 
Hive.
    +   *  - maven - download the correct version of hive on demand from maven.
    +   */
    +  protected[hive] def hiveMetastoreJars: String =
    +    getConf(HIVE_METASTORE_JARS, "builtin")
    +
       @transient
       protected[sql] lazy val substitutor = new VariableSubstitution()
     
    +  /**
    +   * The copy of the hive client that is used for execution.  Currently 
this must always be
    +   * Hive 13 as this is the version of Hive that is packaged with Spark 
SQL.  This copy of the
    +   * client is used for execution related tasks like registering temporary 
functions or ensuring
    +   * that the ThreadLocal SessionState is correctly populated.  This copy 
of Hive is *not* used
    +   * for storing peristent metadata, and only point to a dummy metastore 
in a temporary directory.
    +   */
    +  @transient
    +  protected[hive] lazy val executionHive: ClientWrapper = {
    +    logInfo(s"Initilizing execution hive, version $hiveExecutionVersion")
    +    new ClientWrapper(
    +      version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
    +      config = newTemporaryConfiguration())
    +  }
    +  SessionState.setCurrentSessionState(executionHive.state)
    +
    +  /**
    +   * The copy of the Hive client that is used to retrieve metadata from 
the Hive MetaStore.
    +   * The version of the Hive client that is used here must match the 
metastore that is configured
    +   * in the hive-site.xml file.
    +   */
    +  @transient
    +  protected[hive] lazy val metadataHive: ClientInterface = {
    +    val metaVersion = 
IsolatedClientLoader.hiveVersion(hiveMetastoreVersion)
    +
    +    // We instantiate a HiveConf here to read in the hive-site.xml file 
and then pass the options
    +    // into the isolated client loader
    +    val metadataConf = new HiveConf()
    +    // `configure` goes second to override other settings.
    +    val allConfig = metadataConf.iterator.map(e => e.getKey -> 
e.getValue).toMap ++ configure
    +
    +    val isolatedLoader = if (hiveMetastoreJars == "builtin") {
    +      if (hiveExecutionVersion != hiveMetastoreVersion) {
    +        throw new IllegalArgumentException(
    +          "Builtin jars can only be used when hive execution version == 
hive metastore version. " +
    +          s"Execution: ${hiveExecutionVersion} != Metastore: 
${hiveMetastoreVersion}. " +
    +          "Specify a vaild path to the correct hive jars using 
$HIVE_METASTORE_JARS " +
    +          s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
    +      }
    +      val jars = getClass.getClassLoader match {
    +        case urlClassLoader: java.net.URLClassLoader => 
urlClassLoader.getURLs
    +        case other =>
    +          throw new IllegalArgumentException(
    +            "Unable to locate hive jars to connect to metastore " +
    +            s"using classloader ${other.getClass.getName}. " +
    +            "Please set spark.sql.hive.metastore.jars")
    +      }
    +
    +      logInfo(
    +        s"Initializing HiveMetastoreConnection version 
$hiveMetastoreVersion using Spark classes.")
    +      new IsolatedClientLoader(
    +        version = metaVersion,
    +        execJars = jars.toSeq,
    +        config = allConfig,
    +        isolationOn = true)
    +    } else if (hiveMetastoreJars == "maven") {
    +      // TODO: Support for loading the jars from an already downloaded 
location.
    +      logInfo(
    +        s"Initializing HiveMetastoreConnection version 
$hiveMetastoreVersion using maven.")
    +      IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
    +    } else {
    +      // Convert to files and expand any directories.
    +      val jars =
    +        hiveMetastoreJars
    +          .split(File.pathSeparator)
    +          .flatMap {
    +            case path if path.endsWith("*") =>
    --- End diff --
    
    If you want to be really, really correct here, probably should be:
    
        case path if new File(path).getName() == "*" =>
          new 
File(path).getParentFile().listFiles().filter(_.getName().toLower().endsWith(".jar"))
    
    Also, should probably check whether the parent directory actually exists, 
otherwise you'll get an NPE since `listFiles()` will return `null`.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to