Repository: spark
Updated Branches:
  refs/heads/branch-1.4 9c2c6b4a6 -> 8f4a86eaa


[SPARK-7853] [SQL] Fix HiveContext in Spark Shell

https://issues.apache.org/jira/browse/SPARK-7853

This fixes the problem introduced by my change in 
https://github.com/apache/spark/pull/6435, which causes that Hive Context fails 
to create in spark shell because of the class loader issue.

Author: Yin Huai <yh...@databricks.com>

Closes #6459 from yhuai/SPARK-7853 and squashes the following commits:

37ad33e [Yin Huai] Do not use hiveQlTable at all.
47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf.
005649b [Yin Huai] Update comment.
35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally 
use any metastore utility functions.
3737766 [Yin Huai] Recursively find all jars.

(cherry picked from commit 572b62cafe4bc7b1d464c9dcfb449c9d53456826)
Signed-off-by: Yin Huai <yh...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f4a86ea
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f4a86ea
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f4a86ea

Branch: refs/heads/branch-1.4
Commit: 8f4a86eaa1cad9a2a7607fd5446105c93e5e424e
Parents: 9c2c6b4
Author: Yin Huai <yh...@databricks.com>
Authored: Thu May 28 17:12:30 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Thu May 28 17:12:38 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/HiveContext.scala | 35 +++++++++++---------
 .../spark/sql/hive/HiveMetastoreCatalog.scala   | 12 +++----
 2 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8f4a86ea/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 9ab98fd..2ed71d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends 
SQLContext(sc) {
           "Specify a vaild path to the correct hive jars using 
$HIVE_METASTORE_JARS " +
           s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
       }
-      // We recursively add all jars in the class loader chain,
-      // starting from the given urlClassLoader.
-      def addJars(urlClassLoader: URLClassLoader): Array[URL] = {
-        val jarsInParent = urlClassLoader.getParent match {
-          case parent: URLClassLoader => addJars(parent)
-          case other => Array.empty[URL]
-        }
 
-        urlClassLoader.getURLs ++ jarsInParent
+      // We recursively find all jars in the class loader chain,
+      // starting from the given classLoader.
+      def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
+        case null => Array.empty[URL]
+        case urlClassLoader: URLClassLoader =>
+          urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
+        case other => allJars(other.getParent)
       }
 
-      val jars = Utils.getContextOrSparkClassLoader match {
-        case urlClassLoader: URLClassLoader => addJars(urlClassLoader)
-        case other =>
-          throw new IllegalArgumentException(
-            "Unable to locate hive jars to connect to metastore " +
-            s"using classloader ${other.getClass.getName}. " +
-            "Please set spark.sql.hive.metastore.jars")
+      val classLoader = Utils.getContextOrSparkClassLoader
+      val jars = allJars(classLoader)
+      if (jars.length == 0) {
+        throw new IllegalArgumentException(
+          "Unable to locate hive jars to connect to metastore. " +
+            "Please set spark.sql.hive.metastore.jars.")
       }
 
       logInfo(
@@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) 
{
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
-    hiveconf.set(key, value)
     executionHive.runSqlHive(s"SET $key=$value")
     metadataHive.runSqlHive(s"SET $key=$value")
+    // If users put any Spark SQL setting in the spark conf (e.g. 
spark-defaults.conf),
+    // this setConf will be called in the constructor of the SQLContext.
+    // Also, calling hiveconf will create a default session containing a 
HiveConf, which
+    // will interfer with the creation of executionHive (which is a lazy val). 
So,
+    // we put hiveconf.set at the end of this method.
+    hiveconf.set(key, value)
   }
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */

http://git-wip-us.apache.org/repos/asf/spark/blob/8f4a86ea/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 425a400..95117f7 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -707,20 +707,20 @@ private[hive] case class MetastoreRelation
     hiveQlTable.getMetadata
   )
 
-  implicit class SchemaAttribute(f: FieldSchema) {
+  implicit class SchemaAttribute(f: HiveColumn) {
     def toAttribute: AttributeReference = AttributeReference(
-      f.getName,
-      HiveMetastoreTypes.toDataType(f.getType),
+      f.name,
+      HiveMetastoreTypes.toDataType(f.hiveType),
       // Since data can be dumped in randomly with no validation, everything 
is nullable.
       nullable = true
     )(qualifiers = Seq(alias.getOrElse(tableName)))
   }
 
-  // Must be a stable value since new attributes are born here.
-  val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
+  /** PartitionKey attributes */
+  val partitionKeys = table.partitionColumns.map(_.toAttribute)
 
   /** Non-partitionKey attributes */
-  val attributes = hiveQlTable.getCols.map(_.toAttribute)
+  val attributes = table.schema.map(_.toAttribute)
 
   val output = attributes ++ partitionKeys
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to