Repository: spark Updated Branches: refs/heads/master 3072b9602 -> 72cfb1398
[SPARK-2397][SQL] Deprecate LocalHiveContext LocalHiveContext is redundant with HiveContext. The only difference is it creates `./metastore` instead of `./metastore_db`. Author: Michael Armbrust <mich...@databricks.com> Closes #1641 from marmbrus/localHiveContext and squashes the following commits: e5ec497 [Michael Armbrust] Add deprecation version 626e056 [Michael Armbrust] Don't remove from imports yet 905cc5f [Michael Armbrust] Merge remote-tracking branch 'apache/master' into localHiveContext 1c2727e [Michael Armbrust] Deprecate LocalHiveContext Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72cfb139 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72cfb139 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72cfb139 Branch: refs/heads/master Commit: 72cfb13987bab07461266905930f84619b3a0068 Parents: 3072b96 Author: Michael Armbrust <mich...@databricks.com> Authored: Thu Jul 31 11:26:43 2014 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Thu Jul 31 11:26:43 2014 -0700 ---------------------------------------------------------------------- .../sbt_app_hive/src/main/scala/HiveApp.scala | 4 ++-- docs/sql-programming-guide.md | 6 +++--- .../spark/examples/sql/hive/HiveFromSpark.scala | 4 ++-- python/pyspark/sql.py | 6 ++++++ .../org/apache/spark/sql/hive/HiveContext.scala | 7 +++++-- .../scala/org/apache/spark/sql/hive/TestHive.scala | 15 ++++++++++++--- 6 files changed, 30 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala ---------------------------------------------------------------------- diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala index 7257d17..a21410f 100644 --- a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala +++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.apache.spark.sql.hive.LocalHiveContext +import org.apache.spark.sql.hive.HiveContext case class Person(name: String, age: Int) @@ -34,7 +34,7 @@ object SparkSqlExample { case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) - val hiveContext = new LocalHiveContext(sc) + val hiveContext = new HiveContext(sc) import hiveContext._ hql("DROP TABLE IF EXISTS src") http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/docs/sql-programming-guide.md ---------------------------------------------------------------------- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 156e0ae..a047d32 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -487,9 +487,9 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`. When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and adds support for finding tables in in the MetaStore and writing queries using HiveQL. Users who do -not have an existing Hive deployment can also experiment with the `LocalHiveContext`, -which is similar to `HiveContext`, but creates a local copy of the `metastore` and `warehouse` -automatically. +not have an existing Hive deployment can still create a HiveContext. When not configured by the +hive-site.xml, the context automatically creates `metastore_db` and `warehouse` in the current +directory. {% highlight scala %} // sc is an existing SparkContext. http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala index 66a23fa..dc5290f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala @@ -19,7 +19,7 @@ package org.apache.spark.examples.sql.hive import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql._ -import org.apache.spark.sql.hive.LocalHiveContext +import org.apache.spark.sql.hive.HiveContext object HiveFromSpark { case class Record(key: Int, value: String) @@ -31,7 +31,7 @@ object HiveFromSpark { // A local hive context creates an instance of the Hive Metastore in process, storing the // the warehouse data in the current directory. This location can be overridden by // specifying a second parameter to the constructor. - val hiveContext = new LocalHiveContext(sc) + val hiveContext = new HiveContext(sc) import hiveContext._ hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/python/pyspark/sql.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 13f0ed4..9388ead 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -15,6 +15,8 @@ # limitations under the License. # +import warnings + from pyspark.rdd import RDD, PipelinedRDD from pyspark.serializers import BatchedSerializer, PickleSerializer @@ -813,6 +815,10 @@ class LocalHiveContext(HiveContext): 130091 """ + def __init__(self, sparkContext, sqlContext=None): + HiveContext.__init__(self, sparkContext, sqlContext) + warnings.warn("LocalHiveContext is deprecated. Use HiveContext instead.", DeprecationWarning) + def _get_hive_ctx(self): return self._jvm.LocalHiveContext(self._jsc.sc()) http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index b413373..27b444d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -42,9 +42,12 @@ import org.apache.spark.sql.execution.{Command => PhysicalCommand} import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand /** - * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is - * created with data stored in ./metadata. Warehouse data is stored in in ./warehouse. + * DEPRECATED: Use HiveContext instead. */ +@deprecated(""" + Use HiveContext instead. It will still create a local metastore if one is not specified. + However, note that the default directory is ./metastore_db, not ./metastore + """, "1.1") class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) { lazy val metastorePath = new File("metastore").getCanonicalPath http://git-wip-us.apache.org/repos/asf/spark/blob/72cfb139/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala index 9386008..c50e8c4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala @@ -53,15 +53,24 @@ object TestHive * hive metastore seems to lead to weird non-deterministic failures. Therefore, the execution of * test cases that rely on TestHive must be serialized. */ -class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) { +class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { self => // By clearing the port we force Spark to pick a new one. This allows us to rerun tests // without restarting the JVM. System.clearProperty("spark.hostPort") - override lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath - override lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath + lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath + lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath + + /** Sets up the system initially or after a RESET command */ + protected def configure() { + set("javax.jdo.option.ConnectionURL", + s"jdbc:derby:;databaseName=$metastorePath;create=true") + set("hive.metastore.warehouse.dir", warehousePath) + } + + configure() // Must be called before initializing the catalog below. /** The location of the compiled hive distribution */ lazy val hiveHome = envVarToFile("HIVE_HOME")