Repository: spark Updated Branches: refs/heads/master 6abc45e34 -> 4b325c77a
[SPARK-5193][SQL] Tighten up HiveContext API 1. Removed the deprecated LocalHiveContext 2. Made private[sql] fields protected[sql] so they don't show up in javadoc. 3. Added javadoc to refreshTable. 4. Added Experimental tag to analyze command. Author: Reynold Xin <r...@databricks.com> Closes #4054 from rxin/hivecontext-api and squashes the following commits: 25cc00a [Reynold Xin] Add implicit conversion back. cbca886 [Reynold Xin] [SPARK-5193][SQL] Tighten up HiveContext API Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b325c77 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b325c77 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b325c77 Branch: refs/heads/master Commit: 4b325c77a270ec32d6858d204313d4f161774fae Parents: 6abc45e Author: Reynold Xin <r...@databricks.com> Authored: Wed Jan 14 20:31:02 2015 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Wed Jan 14 20:31:02 2015 -0800 ---------------------------------------------------------------------- .../org/apache/spark/sql/hive/HiveContext.scala | 48 ++++++-------------- 1 file changed, 13 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4b325c77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index a9a20a5..4246b8b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive -import java.io.{BufferedReader, File, InputStreamReader, PrintStream} +import java.io.{BufferedReader, InputStreamReader, PrintStream} import java.sql.{Date, Timestamp} import scala.collection.JavaConversions._ @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable} import org.apache.spark.SparkContext +import org.apache.spark.annotation.Experimental import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators, OverrideCatalog, OverrideFunctionRegistry} @@ -43,28 +44,6 @@ import org.apache.spark.sql.sources.DataSourceStrategy import org.apache.spark.sql.types._ /** - * DEPRECATED: Use HiveContext instead. - */ -@deprecated(""" - Use HiveContext instead. It will still create a local metastore if one is not specified. - However, note that the default directory is ./metastore_db, not ./metastore - """, "1.1") -class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) { - - lazy val metastorePath = new File("metastore").getCanonicalPath - lazy val warehousePath: String = new File("warehouse").getCanonicalPath - - /** Sets up the system initially or after a RESET command */ - protected def configure() { - setConf("javax.jdo.option.ConnectionURL", - s"jdbc:derby:;databaseName=$metastorePath;create=true") - setConf("hive.metastore.warehouse.dir", warehousePath) - } - - configure() // Must be called before initializing the catalog below. -} - -/** * An instance of the Spark SQL execution engine that integrates with data stored in Hive. * Configuration for Hive is read from hive-site.xml on the classpath. */ @@ -80,7 +59,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive * SerDe. */ - private[spark] def convertMetastoreParquet: Boolean = + protected[sql] def convertMetastoreParquet: Boolean = getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true" override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution = @@ -97,14 +76,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { } } - @deprecated("hiveql() is deprecated as the sql function now parses using HiveQL by default. " + - s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1") - def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery)) - - @deprecated("hql() is deprecated as the sql function now parses using HiveQL by default. " + - s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1") - def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery) - /** * Creates a table using the schema of the given class. * @@ -116,6 +87,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { catalog.createTable("default", tableName, ScalaReflection.attributesFor[A], allowExisting) } + /** + * Invalidate and refresh all the cached the metadata of the given table. For performance reasons, + * Spark SQL or the external data source library it uses might cache certain metadata about a + * table, such as the location of blocks. When those change outside of Spark SQL, users should + * call this function to invalidate the cache. + */ def refreshTable(tableName: String): Unit = { // TODO: Database support... catalog.refreshTable("default", tableName) @@ -133,6 +110,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * Right now, it only supports Hive tables and it only updates the size of a Hive table * in the Hive metastore. */ + @Experimental def analyze(tableName: String) { val relation = EliminateAnalysisOperators(catalog.lookupRelation(Seq(tableName))) @@ -289,7 +267,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { results } - /** * Execute the command using Hive and return the results as a sequence. Each element * in the sequence is one row. @@ -345,7 +322,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { } @transient - val hivePlanner = new SparkPlanner with HiveStrategies { + private val hivePlanner = new SparkPlanner with HiveStrategies { val hiveContext = self override def strategies: Seq[Strategy] = experimental.extraStrategies ++ Seq( @@ -410,7 +387,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { } } -object HiveContext { + +private object HiveContext { protected val primitiveTypes = Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType, ShortType, DateType, TimestampType, BinaryType) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org