spark git commit: [SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use InMemoryCatalog

andrewor14 Tue, 31 May 2016 17:44:01 -0700

Repository: spark
Updated Branches:
  refs/heads/master 85d6b0db9 -> 04f925ede



[SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use 
InMemoryCatalog

## What changes were proposed in this pull request?
This PR change REPL/Main to check this property 
`spark.sql.catalogImplementation` to decide if `enableHiveSupport `should be 
called.

If `spark.sql.catalogImplementation` is set to `hive`, and hive classes are 
built, Spark will use Hive support.
Other wise, Spark will create a SparkSession with in-memory catalog support.

## How was this patch tested?
Run the REPL component test.

Author: xin Wu <xi...@us.ibm.com>
Author: Xin Wu <xi...@us.ibm.com>

Closes #13088 from xwu0226/SPARK-15236.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04f925ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04f925ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04f925ed

Branch: refs/heads/master
Commit: 04f925ede851fc77add9ef1cacb79fb3a617f650
Parents: 85d6b0d
Author: xin Wu <xi...@us.ibm.com>
Authored: Tue May 31 17:42:47 2016 -0700
Committer: Andrew Or <and...@databricks.com>
Committed: Tue May 31 17:42:47 2016 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/repl/Main.scala | 20 ++++++--
 .../scala/org/apache/spark/repl/ReplSuite.scala | 50 +++++++++++++++++++-
 2 files changed, 66 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/04f925ed/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
----------------------------------------------------------------------
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala 
b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 005edda..771670f 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -22,6 +22,7 @@ import java.io.File
 import scala.tools.nsc.GenericRunnerSettings
 
 import org.apache.spark._
+import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.Utils
@@ -88,10 +89,23 @@ object Main extends Logging {
     }
 
     val builder = SparkSession.builder.config(conf)
-    if (SparkSession.hiveClassesArePresent) {
-      sparkSession = builder.enableHiveSupport().getOrCreate()
-      logInfo("Created Spark session with Hive support")
+    if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
+      if (SparkSession.hiveClassesArePresent) {
+        // In the case that the property is not set at all, builder's config
+        // does not have this value set to 'hive' yet. The original default
+        // behavior is that when there are hive classes, we use hive catalog.
+        sparkSession = builder.enableHiveSupport().getOrCreate()
+        logInfo("Created Spark session with Hive support")
+      } else {
+        // Need to change it back to 'in-memory' if no hive classes are found
+        // in the case that the property is set to hive in spark-defaults.conf
+        builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
+        sparkSession = builder.getOrCreate()
+        logInfo("Created Spark session")
+      }
     } else {
+      // In the case that the property is set but not to 'hive', the internal
+      // default is 'in-memory'. So the sparkSession will use in-memory 
catalog.
       sparkSession = builder.getOrCreate()
       logInfo("Created Spark session")
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/04f925ed/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
----------------------------------------------------------------------
diff --git 
a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index af82e7a..1256860 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -21,9 +21,11 @@ import java.io._
 import java.net.URLClassLoader
 
 import scala.collection.mutable.ArrayBuffer
-
 import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.log4j.{Level, LogManager}
 import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.Utils
 
 class ReplSuite extends SparkFunSuite {
@@ -99,6 +101,52 @@ class ReplSuite extends SparkFunSuite {
     System.clearProperty("spark.driver.port")
   }
 
+  test("SPARK-15236: use Hive catalog") {
+    // turn on the INFO log so that it is possible the code will dump INFO
+    // entry for using "HiveMetastore"
+    val rootLogger = LogManager.getRootLogger()
+    val logLevel = rootLogger.getLevel
+    rootLogger.setLevel(Level.INFO)
+    try {
+      Main.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
+      val output = runInterpreter("local",
+        """
+      |spark.sql("drop table if exists t_15236")
+    """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      // only when the config is set to hive and
+      // hive classes are built, we will use hive catalog.
+      // Then log INFO entry will show things using HiveMetastore
+      if (SparkSession.hiveClassesArePresent) {
+        assertContains("HiveMetaStore", output)
+      } else {
+        // If hive classes are not built, in-memory catalog will be used
+        assertDoesNotContain("HiveMetaStore", output)
+      }
+    } finally {
+      rootLogger.setLevel(logLevel)
+    }
+  }
+
+  test("SPARK-15236: use in-memory catalog") {
+    val rootLogger = LogManager.getRootLogger()
+    val logLevel = rootLogger.getLevel
+    rootLogger.setLevel(Level.INFO)
+    try {
+      Main.conf.set(CATALOG_IMPLEMENTATION.key, "in-memory")
+      val output = runInterpreter("local",
+        """
+          |spark.sql("drop table if exists t_16236")
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertDoesNotContain("HiveMetaStore", output)
+    } finally {
+      rootLogger.setLevel(logLevel)
+    }
+  }
+
   test("simple foreach with accumulator") {
     val output = runInterpreter("local",
       """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use InMemoryCatalog

Reply via email to