spark git commit: [SPARK-5193][SQL] Tighten up HiveContext API

rxin Wed, 14 Jan 2015 20:31:52 -0800

Repository: spark
Updated Branches:
  refs/heads/master 6abc45e34 -> 4b325c77a



[SPARK-5193][SQL] Tighten up HiveContext API

1. Removed the deprecated LocalHiveContext
2. Made private[sql] fields protected[sql] so they don't show up in javadoc.
3. Added javadoc to refreshTable.
4. Added Experimental tag to analyze command.

Author: Reynold Xin <r...@databricks.com>

Closes #4054 from rxin/hivecontext-api and squashes the following commits:

25cc00a [Reynold Xin] Add implicit conversion back.
cbca886 [Reynold Xin] [SPARK-5193][SQL] Tighten up HiveContext API


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b325c77
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b325c77
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b325c77

Branch: refs/heads/master
Commit: 4b325c77a270ec32d6858d204313d4f161774fae
Parents: 6abc45e
Author: Reynold Xin <r...@databricks.com>
Authored: Wed Jan 14 20:31:02 2015 -0800
Committer: Reynold Xin <r...@databricks.com>
Committed: Wed Jan 14 20:31:02 2015 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/HiveContext.scala | 48 ++++++--------------
 1 file changed, 13 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4b325c77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index a9a20a5..4246b8b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
+import java.io.{BufferedReader, InputStreamReader, PrintStream}
 import java.sql.{Date, Timestamp}
 
 import scala.collection.JavaConversions._
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, 
EliminateAnalysisOperators, OverrideCatalog, OverrideFunctionRegistry}
@@ -43,28 +44,6 @@ import org.apache.spark.sql.sources.DataSourceStrategy
 import org.apache.spark.sql.types._
 
 /**
- * DEPRECATED: Use HiveContext instead.
- */
-@deprecated("""
-  Use HiveContext instead.  It will still create a local metastore if one is 
not specified.
-  However, note that the default directory is ./metastore_db, not ./metastore
-  """, "1.1")
-class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
-
-  lazy val metastorePath = new File("metastore").getCanonicalPath
-  lazy val warehousePath: String = new File("warehouse").getCanonicalPath
-
-  /** Sets up the system initially or after a RESET command */
-  protected def configure() {
-    setConf("javax.jdo.option.ConnectionURL",
-      s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    setConf("hive.metastore.warehouse.dir", warehousePath)
-  }
-
-  configure() // Must be called before initializing the catalog below.
-}
-
-/**
  * An instance of the Spark SQL execution engine that integrates with data 
stored in Hive.
  * Configuration for Hive is read from hive-site.xml on the classpath.
  */
@@ -80,7 +59,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * are automatically converted to use the Spark SQL parquet table scan, 
instead of the Hive
    * SerDe.
    */
-  private[spark] def convertMetastoreParquet: Boolean =
+  protected[sql] def convertMetastoreParquet: Boolean =
     getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true"
 
   override protected[sql] def executePlan(plan: LogicalPlan): 
this.QueryExecution =
@@ -97,14 +76,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     }
   }
 
-  @deprecated("hiveql() is deprecated as the sql function now parses using 
HiveQL by default. " +
-             s"The SQL dialect for parsing can be set using 
${SQLConf.DIALECT}", "1.1")
-  def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, 
HiveQl.parseSql(hqlQuery))
-
-  @deprecated("hql() is deprecated as the sql function now parses using HiveQL 
by default. " +
-             s"The SQL dialect for parsing can be set using 
${SQLConf.DIALECT}", "1.1")
-  def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
-
   /**
    * Creates a table using the schema of the given class.
    *
@@ -116,6 +87,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     catalog.createTable("default", tableName, 
ScalaReflection.attributesFor[A], allowExisting)
   }
 
+  /**
+   * Invalidate and refresh all the cached the metadata of the given table. 
For performance reasons,
+   * Spark SQL or the external data source library it uses might cache certain 
metadata about a
+   * table, such as the location of blocks. When those change outside of Spark 
SQL, users should
+   * call this function to invalidate the cache.
+   */
   def refreshTable(tableName: String): Unit = {
     // TODO: Database support...
     catalog.refreshTable("default", tableName)
@@ -133,6 +110,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * Right now, it only supports Hive tables and it only updates the size of a 
Hive table
    * in the Hive metastore.
    */
+  @Experimental
   def analyze(tableName: String) {
     val relation = 
EliminateAnalysisOperators(catalog.lookupRelation(Seq(tableName)))
 
@@ -289,7 +267,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     results
   }
 
-
   /**
    * Execute the command using Hive and return the results as a sequence. Each 
element
    * in the sequence is one row.
@@ -345,7 +322,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 
   @transient
-  val hivePlanner = new SparkPlanner with HiveStrategies {
+  private val hivePlanner = new SparkPlanner with HiveStrategies {
     val hiveContext = self
 
     override def strategies: Seq[Strategy] = experimental.extraStrategies ++ 
Seq(
@@ -410,7 +387,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 }
 
-object HiveContext {
+
+private object HiveContext {
   protected val primitiveTypes =
     Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, 
ByteType,
       ShortType, DateType, TimestampType, BinaryType)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-5193][SQL] Tighten up HiveContext API

Reply via email to