Repository: spark Updated Branches: refs/heads/branch-2.0 f208a9dcd -> 2b35ce818
[SPARK-15638][SQL] Audit Dataset, SparkSession, and SQLContext ## What changes were proposed in this pull request? This patch contains a list of changes as a result of my auditing Dataset, SparkSession, and SQLContext. The patch audits the categorization of experimental APIs, function groups, and deprecations. For the detailed list of changes, please see the diff. ## How was this patch tested? N/A Author: Reynold Xin <r...@databricks.com> Closes #13370 from rxin/SPARK-15638. (cherry picked from commit 675921040ee4802aa9914457de62af746bc3657d) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b35ce81 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b35ce81 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b35ce81 Branch: refs/heads/branch-2.0 Commit: 2b35ce818650d91a25a14cb40121ae22521de4b5 Parents: f208a9d Author: Reynold Xin <r...@databricks.com> Authored: Mon May 30 22:47:58 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Mon May 30 22:48:03 2016 -0700 ---------------------------------------------------------------------- .../apache/spark/memory/TaskMemoryManager.java | 2 +- .../spark/scheduler/SchedulableBuilder.scala | 3 +- .../org/apache/spark/storage/BlockManager.scala | 6 +-- .../spark/storage/BlockManagerMaster.scala | 4 +- .../org/apache/spark/util/ListenerBus.scala | 4 +- .../scala/org/apache/spark/util/Utils.scala | 2 +- .../spark/sql/catalyst/catalog/interface.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 35 +++++++-------- .../scala/org/apache/spark/sql/Dataset.scala | 45 +++++++++----------- .../spark/sql/KeyValueGroupedDataset.scala | 8 ++-- .../spark/sql/RelationalGroupedDataset.scala | 4 +- .../scala/org/apache/spark/sql/SQLContext.scala | 18 +------- .../spark/sql/execution/SparkSqlParser.scala | 16 +++---- .../spark/sql/execution/command/views.scala | 4 +- .../org/apache/spark/sql/SQLContextSuite.scala | 1 + .../spark/sql/hive/HiveDDLCommandSuite.scala | 8 ++-- 16 files changed, 69 insertions(+), 93 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java index a4a571f..867c4a1 100644 --- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java +++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java @@ -182,7 +182,7 @@ public class TaskMemoryManager { } consumers.add(consumer); - logger.debug("Task {} acquire {} for {}", taskAttemptId, Utils.bytesToString(got), consumer); + logger.debug("Task {} acquired {} for {}", taskAttemptId, Utils.bytesToString(got), consumer); return got; } } http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala index 100ed76..96325a0 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala @@ -112,7 +112,8 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf) schedulingMode = SchedulingMode.withName(xmlSchedulingMode) } catch { case e: NoSuchElementException => - logWarning("Error xml schedulingMode, using default schedulingMode") + logWarning(s"Unsupported schedulingMode: $xmlSchedulingMode, " + + s"using the default schedulingMode: $schedulingMode") } } http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/storage/BlockManager.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index c56e451..2f9473a 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -92,9 +92,9 @@ private[spark] class BlockManager( private[spark] val diskStore = new DiskStore(conf, diskBlockManager) memoryManager.setMemoryStore(memoryStore) - // Note: depending on the memory manager, `maxStorageMemory` may actually vary over time. + // Note: depending on the memory manager, `maxMemory` may actually vary over time. // However, since we use this only for reporting and logging, what we actually want here is - // the absolute maximum value that `maxStorageMemory` can ever possibly reach. We may need + // the absolute maximum value that `maxMemory` can ever possibly reach. We may need // to revisit whether reporting this value as the "max" is intuitive to the user. private val maxMemory = memoryManager.maxOnHeapStorageMemory @@ -231,7 +231,7 @@ private[spark] class BlockManager( */ def reregister(): Unit = { // TODO: We might need to rate limit re-registering. - logInfo("BlockManager re-registering with master") + logInfo(s"BlockManager $blockManagerId re-registering with master") master.registerBlockManager(blockManagerId, maxMemory, slaveEndpoint) reportAllBlocks() } http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala index c22d2e0f..52db45b 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala @@ -45,9 +45,9 @@ class BlockManagerMaster( /** Register the BlockManager's id with the driver. */ def registerBlockManager( blockManagerId: BlockManagerId, maxMemSize: Long, slaveEndpoint: RpcEndpointRef): Unit = { - logInfo("Trying to register BlockManager") + logInfo(s"Registering BlockManager $blockManagerId") tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveEndpoint)) - logInfo("Registered BlockManager") + logInfo(s"Registered BlockManager $blockManagerId") } def updateBlockInfo( http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/util/ListenerBus.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala index 436c195..79fc2e9 100644 --- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala @@ -54,7 +54,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging { */ final def postToAll(event: E): Unit = { // JavaConverters can create a JIterableWrapper if we use asScala. - // However, this method will be called frequently. To avoid the wrapper cost, here ewe use + // However, this method will be called frequently. To avoid the wrapper cost, here we use // Java Iterator directly. val iter = listeners.iterator while (iter.hasNext) { @@ -70,7 +70,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging { /** * Post an event to the specified listener. `onPostEvent` is guaranteed to be called in the same - * thread. + * thread for all listeners. */ protected def doPostEvent(listener: L, event: E): Unit http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/util/Utils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index a8bb000..7e204fa 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1169,7 +1169,7 @@ private[spark] object Utils extends Logging { } /** - * Execute a block of code that evaluates to Unit, stop SparkContext is there is any uncaught + * Execute a block of code that evaluates to Unit, stop SparkContext if there is any uncaught * exception * * NOTE: This method is to be called by the driver-side components to avoid stopping the http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 77731b1..b591957 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -52,7 +52,7 @@ case class CatalogStorageFormat( object CatalogStorageFormat { /** Empty storage format for default values and copies. */ - val EmptyStorageFormat = CatalogStorageFormat(locationUri = None, inputFormat = None, + val empty = CatalogStorageFormat(locationUri = None, inputFormat = None, outputFormat = None, serde = None, compressed = false, serdeProperties = Map.empty) } http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 204af71..713f794 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -92,7 +92,6 @@ class TypedColumn[-T, U]( } /** - * :: Experimental :: * A column that will be computed based on the data in a [[DataFrame]]. * * A new column is constructed based on the input columns present in a dataframe: @@ -121,7 +120,6 @@ class TypedColumn[-T, U]( * * @since 1.3.0 */ -@Experimental class Column(protected[sql] val expr: Expression) extends Logging { def this(name: String) = this(name match { @@ -132,6 +130,15 @@ class Column(protected[sql] val expr: Expression) extends Logging { case _ => UnresolvedAttribute.quotedString(name) }) + override def toString: String = usePrettyExpression(expr).sql + + override def equals(that: Any): Boolean = that match { + case that: Column => that.expr.equals(this.expr) + case _ => false + } + + override def hashCode: Int = this.expr.hashCode() + /** Creates a column based on the given expression. */ private def withExpr(newExpr: Expression): Column = new Column(newExpr) @@ -158,12 +165,13 @@ class Column(protected[sql] val expr: Expression) extends Logging { // If we have a top level Cast, there is a chance to give it a better alias, if there is a // NamedExpression under this Cast. - case c: Cast => c.transformUp { - case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to)) - } match { - case ne: NamedExpression => ne - case other => Alias(expr, usePrettyExpression(expr).sql)() - } + case c: Cast => + c.transformUp { + case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to)) + } match { + case ne: NamedExpression => ne + case other => Alias(expr, usePrettyExpression(expr).sql)() + } case a: AggregateExpression if a.aggregateFunction.isInstanceOf[TypedAggregateExpression] => UnresolvedAlias(a, Some(Column.generateAlias)) @@ -171,17 +179,6 @@ class Column(protected[sql] val expr: Expression) extends Logging { case expr: Expression => Alias(expr, usePrettyExpression(expr).sql)() } - - - override def toString: String = usePrettyExpression(expr).sql - - override def equals(that: Any): Boolean = that match { - case that: Column => that.expr.equals(this.expr) - case _ => false - } - - override def hashCode: Int = this.expr.hashCode - /** * Provides a type hint about the expected return value of this column. This information can * be used by operations such as `select` on a [[Dataset]] to automatically convert the http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7aeec20..8b6662a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -146,11 +146,8 @@ private[sql] object Dataset { * * @groupname basic Basic Dataset functions * @groupname action Actions - * @groupname untypedrel Untyped Language Integrated Relational Queries - * @groupname typedrel Typed Language Integrated Relational Queries - * @groupname func Functional Transformations - * @groupname rdd RDD Operations - * @groupname output Output Operations + * @groupname untypedrel Untyped transformations + * @groupname typedrel Typed transformations * * @since 1.6.0 */ @@ -1892,7 +1889,7 @@ class Dataset[T] private[sql]( * .transform(...) * }}} * - * @group func + * @group typedrel * @since 1.6.0 */ def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this) @@ -1902,7 +1899,7 @@ class Dataset[T] private[sql]( * (Scala-specific) * Returns a new [[Dataset]] that only contains elements where `func` returns `true`. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1919,7 +1916,7 @@ class Dataset[T] private[sql]( * (Java-specific) * Returns a new [[Dataset]] that only contains elements where `func` returns `true`. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1936,7 +1933,7 @@ class Dataset[T] private[sql]( * (Scala-specific) * Returns a new [[Dataset]] that contains the result of applying `func` to each element. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1949,7 +1946,7 @@ class Dataset[T] private[sql]( * (Java-specific) * Returns a new [[Dataset]] that contains the result of applying `func` to each element. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1963,7 +1960,7 @@ class Dataset[T] private[sql]( * (Scala-specific) * Returns a new [[Dataset]] that contains the result of applying `func` to each partition. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1979,7 +1976,7 @@ class Dataset[T] private[sql]( * (Java-specific) * Returns a new [[Dataset]] that contains the result of applying `f` to each partition. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -1991,8 +1988,6 @@ class Dataset[T] private[sql]( /** * Returns a new [[DataFrame]] that contains the result of applying a serialized R function * `func` to each partition. - * - * @group func */ private[sql] def mapPartitionsInR( func: Array[Byte], @@ -2011,7 +2006,7 @@ class Dataset[T] private[sql]( * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]], * and then flattening the results. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -2024,7 +2019,7 @@ class Dataset[T] private[sql]( * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]], * and then flattening the results. * - * @group func + * @group typedrel * @since 1.6.0 */ @Experimental @@ -2207,7 +2202,7 @@ class Dataset[T] private[sql]( * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. * - * @group rdd + * @group typedrel * @since 1.6.0 */ def coalesce(numPartitions: Int): Dataset[T] = withTypedPlan { @@ -2283,7 +2278,7 @@ class Dataset[T] private[sql]( /** * Represents the content of the [[Dataset]] as an [[RDD]] of [[T]]. * - * @group rdd + * @group basic * @since 1.6.0 */ lazy val rdd: RDD[T] = { @@ -2296,14 +2291,14 @@ class Dataset[T] private[sql]( /** * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s. - * @group rdd + * @group basic * @since 1.6.0 */ def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD() /** * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s. - * @group rdd + * @group basic * @since 1.6.0 */ def javaRDD: JavaRDD[T] = toJavaRDD @@ -2335,9 +2330,9 @@ class Dataset[T] private[sql]( identifier = sparkSession.sessionState.sqlParser.parseTableIdentifier(viewName), tableType = CatalogTableType.VIEW, schema = Seq.empty[CatalogColumn], - storage = CatalogStorageFormat.EmptyStorageFormat) + storage = CatalogStorageFormat.empty) CreateViewCommand(tableDesc, logicalPlan, allowExisting = false, replace = false, - isTemporary = true, sql = "") + isTemporary = true) } /** @@ -2352,16 +2347,16 @@ class Dataset[T] private[sql]( identifier = sparkSession.sessionState.sqlParser.parseTableIdentifier(viewName), tableType = CatalogTableType.VIEW, schema = Seq.empty[CatalogColumn], - storage = CatalogStorageFormat.EmptyStorageFormat) + storage = CatalogStorageFormat.empty) CreateViewCommand(tableDesc, logicalPlan, allowExisting = false, replace = true, - isTemporary = true, sql = "") + isTemporary = true) } /** * :: Experimental :: * Interface for saving the content of the [[Dataset]] out into external storage or streams. * - * @group output + * @group basic * @since 1.6.0 */ @Experimental http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala index 3a5ea19..53f4ea6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala @@ -29,8 +29,8 @@ import org.apache.spark.sql.execution.QueryExecution /** * :: Experimental :: * A [[Dataset]] has been logically grouped by a user specified grouping key. Users should not - * construct a [[KeyValueGroupedDataset]] directly, but should instead call `groupBy` on an existing - * [[Dataset]]. + * construct a [[KeyValueGroupedDataset]] directly, but should instead call `groupByKey` on + * an existing [[Dataset]]. * * @since 2.0.0 */ @@ -73,7 +73,8 @@ class KeyValueGroupedDataset[K, V] private[sql]( groupingAttributes) /** - * Returns a [[Dataset]] that contains each unique key. + * Returns a [[Dataset]] that contains each unique key. This is equivalent to doing mapping + * over the Dataset to extract the keys and then running a distinct operation on those. * * @since 1.6.0 */ @@ -204,7 +205,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * Internal helper function for building typed aggregations that return tuples. For simplicity * and code reuse, we do this without the help of the type system and then use helper functions * that cast appropriately for the user facing interface. - * TODO: does not handle aggregations that return nonflat results, */ protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = { val encoders = columns.map(_.encoder) http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index b0e48a6..58850a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -35,6 +35,8 @@ import org.apache.spark.sql.types.NumericType * The main method is the agg function, which has multiple variants. This class also contains * convenience some first order statistics such as mean, sum for convenience. * + * This class was named `GroupedData` in Spark 1.x. + * * @since 2.0.0 */ class RelationalGroupedDataset protected[sql]( @@ -74,7 +76,7 @@ class RelationalGroupedDataset protected[sql]( private[this] def alias(expr: Expression): NamedExpression = expr match { case u: UnresolvedAttribute => UnresolvedAlias(u) case expr: NamedExpression => expr - case a: AggregateExpression if (a.aggregateFunction.isInstanceOf[TypedAggregateExpression]) => + case a: AggregateExpression if a.aggregateFunction.isInstanceOf[TypedAggregateExpression] => UnresolvedAlias(a, Some(Column.generateAlias)) case expr: Expression => Alias(expr, usePrettyExpression(expr).sql)() } http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index af419fc..0dc70c0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.util.ExecutionListenerManager /** - * The entry point for working with structured data (rows and columns) in Spark, in Spark 1.x. + * The entry point for working with structured data (rows and columns) in Spark 1.x. * * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are keeping the class * here for backward compatibility. @@ -171,13 +171,11 @@ class SQLContext private[sql](val sparkSession: SparkSession) def experimental: ExperimentalMethods = sparkSession.experimental /** - * :: Experimental :: * Returns a [[DataFrame]] with no rows or columns. * * @group basic * @since 1.3.0 */ - @Experimental def emptyDataFrame: DataFrame = sparkSession.emptyDataFrame /** @@ -437,7 +435,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * Returns a [[DataFrameReader]] that can be used to read data and streams in as a [[DataFrame]]. * {{{ * sqlContext.read.parquet("/path/to/file.parquet") @@ -447,31 +444,26 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group genericdata * @since 1.4.0 */ - @Experimental def read: DataFrameReader = sparkSession.read /** - * :: Experimental :: * Creates an external table from the given path and returns the corresponding DataFrame. * It will use the default data source configured by spark.sql.sources.default. * * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable(tableName: String, path: String): DataFrame = { sparkSession.catalog.createExternalTable(tableName, path) } /** - * :: Experimental :: * Creates an external table from the given path based on a data source * and returns the corresponding DataFrame. * * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable( tableName: String, path: String, @@ -480,14 +472,12 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * Creates an external table from the given path based on a data source and a set of options. * Then, returns the corresponding DataFrame. * * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable( tableName: String, source: String, @@ -496,7 +486,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * (Scala-specific) * Creates an external table from the given path based on a data source and a set of options. * Then, returns the corresponding DataFrame. @@ -504,7 +493,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable( tableName: String, source: String, @@ -513,14 +501,12 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * Create an external table from the given path based on a data source, a schema and * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable( tableName: String, source: String, @@ -530,7 +516,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * (Scala-specific) * Create an external table from the given path based on a data source, a schema and * a set of options. Then, returns the corresponding DataFrame. @@ -538,7 +523,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group ddl_ops * @since 1.3.0 */ - @Experimental def createExternalTable( tableName: String, source: String, http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 48fb95b..6c19bf0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -902,9 +902,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { } validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx) val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat) - .getOrElse(CatalogStorageFormat.EmptyStorageFormat) + .getOrElse(CatalogStorageFormat.empty) val rowStorage = Option(ctx.rowFormat).map(visitRowFormat) - .getOrElse(CatalogStorageFormat.EmptyStorageFormat) + .getOrElse(CatalogStorageFormat.empty) val location = Option(ctx.locationSpec).map(visitLocationSpec) // If we are creating an EXTERNAL table, then the LOCATION field is required if (external && location.isEmpty) { @@ -982,7 +982,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { */ override def visitTableFileFormat( ctx: TableFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { - CatalogStorageFormat.EmptyStorageFormat.copy( + CatalogStorageFormat.empty.copy( inputFormat = Option(string(ctx.inFmt)), outputFormat = Option(string(ctx.outFmt))) } @@ -995,7 +995,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { val source = ctx.identifier.getText HiveSerDe.sourceToSerDe(source, conf) match { case Some(s) => - CatalogStorageFormat.EmptyStorageFormat.copy( + CatalogStorageFormat.empty.copy( inputFormat = s.inputFormat, outputFormat = s.outputFormat, serde = s.serde) @@ -1035,7 +1035,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { override def visitRowFormatSerde( ctx: RowFormatSerdeContext): CatalogStorageFormat = withOrigin(ctx) { import ctx._ - CatalogStorageFormat.EmptyStorageFormat.copy( + CatalogStorageFormat.empty.copy( serde = Option(string(name)), serdeProperties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)) } @@ -1065,7 +1065,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { ctx) "line.delim" -> value } - CatalogStorageFormat.EmptyStorageFormat.copy(serdeProperties = entries.toMap) + CatalogStorageFormat.empty.copy(serdeProperties = entries.toMap) } /** @@ -1179,12 +1179,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { identifier = visitTableIdentifier(name), tableType = CatalogTableType.VIEW, schema = schema, - storage = CatalogStorageFormat.EmptyStorageFormat, + storage = CatalogStorageFormat.empty, properties = properties, viewOriginalText = sql, viewText = sql, comment = comment) - CreateViewCommand(tableDesc, plan(query), allowExist, replace, isTemporary, command(ctx)) + CreateViewCommand(tableDesc, plan(query), allowExist, replace, isTemporary) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 6468916..20c0278 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -41,15 +41,13 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} * at the end of current Spark session. Existing permanent relations with the same * name are not visible to the current session while the temporary view exists, * unless they are specified with full qualified table name with database prefix. - * @param sql the original sql */ case class CreateViewCommand( tableDesc: CatalogTable, child: LogicalPlan, allowExisting: Boolean, replace: Boolean, - isTemporary: Boolean, - sql: String) + isTemporary: Boolean) extends RunnableCommand { // TODO: Note that this class can NOT canonicalize the view SQL string entirely, which is http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala index 417d09e..e57c171 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} +@deprecated("This suite is deprecated to silent compiler deprecation warnings", "2.0.0") class SQLContextSuite extends SparkFunSuite with SharedSparkContext { object DummyRule extends Rule[LogicalPlan] { http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 96c8fa6..3297a6f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive -import org.apache.hadoop.hive.serde.serdeConstants - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogTable, CatalogTableType} @@ -37,9 +35,9 @@ class HiveDDLCommandSuite extends PlanTest { private def extractTableDesc(sql: String): (CatalogTable, Boolean) = { parser.parsePlan(sql).collect { - case CreateTableCommand(desc, allowExisting) => (desc, allowExisting) - case CreateTableAsSelectLogicalPlan(desc, _, allowExisting) => (desc, allowExisting) - case CreateViewCommand(desc, _, allowExisting, _, _, _) => (desc, allowExisting) + case c: CreateTableCommand => (c.table, c.ifNotExists) + case c: CreateTableAsSelectLogicalPlan => (c.tableDesc, c.allowExisting) + case c: CreateViewCommand => (c.tableDesc, c.allowExisting) }.head } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org