spark git commit: [SPARK-15638][SQL] Audit Dataset, SparkSession, and SQLContext

rxin Mon, 30 May 2016 22:49:07 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f208a9dcd -> 2b35ce818



[SPARK-15638][SQL] Audit Dataset, SparkSession, and SQLContext

## What changes were proposed in this pull request?
This patch contains a list of changes as a result of my auditing Dataset, 
SparkSession, and SQLContext. The patch audits the categorization of 
experimental APIs, function groups, and deprecations. For the detailed list of 
changes, please see the diff.

## How was this patch tested?
N/A

Author: Reynold Xin <r...@databricks.com>

Closes #13370 from rxin/SPARK-15638.

(cherry picked from commit 675921040ee4802aa9914457de62af746bc3657d)
Signed-off-by: Reynold Xin <r...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b35ce81
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b35ce81
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b35ce81

Branch: refs/heads/branch-2.0
Commit: 2b35ce818650d91a25a14cb40121ae22521de4b5
Parents: f208a9d
Author: Reynold Xin <r...@databricks.com>
Authored: Mon May 30 22:47:58 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon May 30 22:48:03 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/memory/TaskMemoryManager.java  |  2 +-
 .../spark/scheduler/SchedulableBuilder.scala    |  3 +-
 .../org/apache/spark/storage/BlockManager.scala |  6 +--
 .../spark/storage/BlockManagerMaster.scala      |  4 +-
 .../org/apache/spark/util/ListenerBus.scala     |  4 +-
 .../scala/org/apache/spark/util/Utils.scala     |  2 +-
 .../spark/sql/catalyst/catalog/interface.scala  |  2 +-
 .../scala/org/apache/spark/sql/Column.scala     | 35 +++++++--------
 .../scala/org/apache/spark/sql/Dataset.scala    | 45 +++++++++-----------
 .../spark/sql/KeyValueGroupedDataset.scala      |  8 ++--
 .../spark/sql/RelationalGroupedDataset.scala    |  4 +-
 .../scala/org/apache/spark/sql/SQLContext.scala | 18 +-------
 .../spark/sql/execution/SparkSqlParser.scala    | 16 +++----
 .../spark/sql/execution/command/views.scala     |  4 +-
 .../org/apache/spark/sql/SQLContextSuite.scala  |  1 +
 .../spark/sql/hive/HiveDDLCommandSuite.scala    |  8 ++--
 16 files changed, 69 insertions(+), 93 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java 
b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index a4a571f..867c4a1 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -182,7 +182,7 @@ public class TaskMemoryManager {
       }
 
       consumers.add(consumer);
-      logger.debug("Task {} acquire {} for {}", taskAttemptId, 
Utils.bytesToString(got), consumer);
+      logger.debug("Task {} acquired {} for {}", taskAttemptId, 
Utils.bytesToString(got), consumer);
       return got;
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala 
b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index 100ed76..96325a0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -112,7 +112,8 @@ private[spark] class FairSchedulableBuilder(val rootPool: 
Pool, conf: SparkConf)
           schedulingMode = SchedulingMode.withName(xmlSchedulingMode)
         } catch {
           case e: NoSuchElementException =>
-            logWarning("Error xml schedulingMode, using default 
schedulingMode")
+            logWarning(s"Unsupported schedulingMode: $xmlSchedulingMode, " +
+              s"using the default schedulingMode: $schedulingMode")
         }
       }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index c56e451..2f9473a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -92,9 +92,9 @@ private[spark] class BlockManager(
   private[spark] val diskStore = new DiskStore(conf, diskBlockManager)
   memoryManager.setMemoryStore(memoryStore)
 
-  // Note: depending on the memory manager, `maxStorageMemory` may actually 
vary over time.
+  // Note: depending on the memory manager, `maxMemory` may actually vary over 
time.
   // However, since we use this only for reporting and logging, what we 
actually want here is
-  // the absolute maximum value that `maxStorageMemory` can ever possibly 
reach. We may need
+  // the absolute maximum value that `maxMemory` can ever possibly reach. We 
may need
   // to revisit whether reporting this value as the "max" is intuitive to the 
user.
   private val maxMemory = memoryManager.maxOnHeapStorageMemory
 
@@ -231,7 +231,7 @@ private[spark] class BlockManager(
    */
   def reregister(): Unit = {
     // TODO: We might need to rate limit re-registering.
-    logInfo("BlockManager re-registering with master")
+    logInfo(s"BlockManager $blockManagerId re-registering with master")
     master.registerBlockManager(blockManagerId, maxMemory, slaveEndpoint)
     reportAllBlocks()
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index c22d2e0f..52db45b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -45,9 +45,9 @@ class BlockManagerMaster(
   /** Register the BlockManager's id with the driver. */
   def registerBlockManager(
       blockManagerId: BlockManagerId, maxMemSize: Long, slaveEndpoint: 
RpcEndpointRef): Unit = {
-    logInfo("Trying to register BlockManager")
+    logInfo(s"Registering BlockManager $blockManagerId")
     tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveEndpoint))
-    logInfo("Registered BlockManager")
+    logInfo(s"Registered BlockManager $blockManagerId")
   }
 
   def updateBlockInfo(

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala 
b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index 436c195..79fc2e9 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -54,7 +54,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends 
Logging {
    */
   final def postToAll(event: E): Unit = {
     // JavaConverters can create a JIterableWrapper if we use asScala.
-    // However, this method will be called frequently. To avoid the wrapper 
cost, here ewe use
+    // However, this method will be called frequently. To avoid the wrapper 
cost, here we use
     // Java Iterator directly.
     val iter = listeners.iterator
     while (iter.hasNext) {
@@ -70,7 +70,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends 
Logging {
 
   /**
    * Post an event to the specified listener. `onPostEvent` is guaranteed to 
be called in the same
-   * thread.
+   * thread for all listeners.
    */
   protected def doPostEvent(listener: L, event: E): Unit
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a8bb000..7e204fa 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1169,7 +1169,7 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Execute a block of code that evaluates to Unit, stop SparkContext is 
there is any uncaught
+   * Execute a block of code that evaluates to Unit, stop SparkContext if 
there is any uncaught
    * exception
    *
    * NOTE: This method is to be called by the driver-side components to avoid 
stopping the

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 77731b1..b591957 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -52,7 +52,7 @@ case class CatalogStorageFormat(
 
 object CatalogStorageFormat {
   /** Empty storage format for default values and copies. */
-  val EmptyStorageFormat = CatalogStorageFormat(locationUri = None, 
inputFormat = None,
+  val empty = CatalogStorageFormat(locationUri = None, inputFormat = None,
     outputFormat = None, serde = None, compressed = false, serdeProperties = 
Map.empty)
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 204af71..713f794 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -92,7 +92,6 @@ class TypedColumn[-T, U](
 }
 
 /**
- * :: Experimental ::
  * A column that will be computed based on the data in a [[DataFrame]].
  *
  * A new column is constructed based on the input columns present in a 
dataframe:
@@ -121,7 +120,6 @@ class TypedColumn[-T, U](
  *
  * @since 1.3.0
  */
-@Experimental
 class Column(protected[sql] val expr: Expression) extends Logging {
 
   def this(name: String) = this(name match {
@@ -132,6 +130,15 @@ class Column(protected[sql] val expr: Expression) extends 
Logging {
     case _ => UnresolvedAttribute.quotedString(name)
   })
 
+  override def toString: String = usePrettyExpression(expr).sql
+
+  override def equals(that: Any): Boolean = that match {
+    case that: Column => that.expr.equals(this.expr)
+    case _ => false
+  }
+
+  override def hashCode: Int = this.expr.hashCode()
+
   /** Creates a column based on the given expression. */
   private def withExpr(newExpr: Expression): Column = new Column(newExpr)
 
@@ -158,12 +165,13 @@ class Column(protected[sql] val expr: Expression) extends 
Logging {
 
     // If we have a top level Cast, there is a chance to give it a better 
alias, if there is a
     // NamedExpression under this Cast.
-    case c: Cast => c.transformUp {
-      case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to))
-    } match {
-      case ne: NamedExpression => ne
-      case other => Alias(expr, usePrettyExpression(expr).sql)()
-    }
+    case c: Cast =>
+      c.transformUp {
+        case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to))
+      } match {
+        case ne: NamedExpression => ne
+        case other => Alias(expr, usePrettyExpression(expr).sql)()
+      }
 
     case a: AggregateExpression if 
a.aggregateFunction.isInstanceOf[TypedAggregateExpression] =>
       UnresolvedAlias(a, Some(Column.generateAlias))
@@ -171,17 +179,6 @@ class Column(protected[sql] val expr: Expression) extends 
Logging {
     case expr: Expression => Alias(expr, usePrettyExpression(expr).sql)()
   }
 
-
-
-  override def toString: String = usePrettyExpression(expr).sql
-
-  override def equals(that: Any): Boolean = that match {
-    case that: Column => that.expr.equals(this.expr)
-    case _ => false
-  }
-
-  override def hashCode: Int = this.expr.hashCode
-
   /**
    * Provides a type hint about the expected return value of this column.  
This information can
    * be used by operations such as `select` on a [[Dataset]] to automatically 
convert the

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7aeec20..8b6662a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -146,11 +146,8 @@ private[sql] object Dataset {
  *
  * @groupname basic Basic Dataset functions
  * @groupname action Actions
- * @groupname untypedrel Untyped Language Integrated Relational Queries
- * @groupname typedrel Typed Language Integrated Relational Queries
- * @groupname func Functional Transformations
- * @groupname rdd RDD Operations
- * @groupname output Output Operations
+ * @groupname untypedrel Untyped transformations
+ * @groupname typedrel Typed transformations
  *
  * @since 1.6.0
  */
@@ -1892,7 +1889,7 @@ class Dataset[T] private[sql](
    *     .transform(...)
    * }}}
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this)
@@ -1902,7 +1899,7 @@ class Dataset[T] private[sql](
    * (Scala-specific)
    * Returns a new [[Dataset]] that only contains elements where `func` 
returns `true`.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1919,7 +1916,7 @@ class Dataset[T] private[sql](
    * (Java-specific)
    * Returns a new [[Dataset]] that only contains elements where `func` 
returns `true`.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1936,7 +1933,7 @@ class Dataset[T] private[sql](
    * (Scala-specific)
    * Returns a new [[Dataset]] that contains the result of applying `func` to 
each element.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1949,7 +1946,7 @@ class Dataset[T] private[sql](
    * (Java-specific)
    * Returns a new [[Dataset]] that contains the result of applying `func` to 
each element.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1963,7 +1960,7 @@ class Dataset[T] private[sql](
    * (Scala-specific)
    * Returns a new [[Dataset]] that contains the result of applying `func` to 
each partition.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1979,7 +1976,7 @@ class Dataset[T] private[sql](
    * (Java-specific)
    * Returns a new [[Dataset]] that contains the result of applying `f` to 
each partition.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -1991,8 +1988,6 @@ class Dataset[T] private[sql](
   /**
    * Returns a new [[DataFrame]] that contains the result of applying a 
serialized R function
    * `func` to each partition.
-   *
-   * @group func
    */
   private[sql] def mapPartitionsInR(
       func: Array[Byte],
@@ -2011,7 +2006,7 @@ class Dataset[T] private[sql](
    * Returns a new [[Dataset]] by first applying a function to all elements of 
this [[Dataset]],
    * and then flattening the results.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -2024,7 +2019,7 @@ class Dataset[T] private[sql](
    * Returns a new [[Dataset]] by first applying a function to all elements of 
this [[Dataset]],
    * and then flattening the results.
    *
-   * @group func
+   * @group typedrel
    * @since 1.6.0
    */
   @Experimental
@@ -2207,7 +2202,7 @@ class Dataset[T] private[sql](
    * if you go from 1000 partitions to 100 partitions, there will not be a 
shuffle, instead each of
    * the 100 new partitions will claim 10 of the current partitions.
    *
-   * @group rdd
+   * @group typedrel
    * @since 1.6.0
    */
   def coalesce(numPartitions: Int): Dataset[T] = withTypedPlan {
@@ -2283,7 +2278,7 @@ class Dataset[T] private[sql](
   /**
    * Represents the content of the [[Dataset]] as an [[RDD]] of [[T]].
    *
-   * @group rdd
+   * @group basic
    * @since 1.6.0
    */
   lazy val rdd: RDD[T] = {
@@ -2296,14 +2291,14 @@ class Dataset[T] private[sql](
 
   /**
    * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s.
-   * @group rdd
+   * @group basic
    * @since 1.6.0
    */
   def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD()
 
   /**
    * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s.
-   * @group rdd
+   * @group basic
    * @since 1.6.0
    */
   def javaRDD: JavaRDD[T] = toJavaRDD
@@ -2335,9 +2330,9 @@ class Dataset[T] private[sql](
       identifier = 
sparkSession.sessionState.sqlParser.parseTableIdentifier(viewName),
       tableType = CatalogTableType.VIEW,
       schema = Seq.empty[CatalogColumn],
-      storage = CatalogStorageFormat.EmptyStorageFormat)
+      storage = CatalogStorageFormat.empty)
     CreateViewCommand(tableDesc, logicalPlan, allowExisting = false, replace = 
false,
-      isTemporary = true, sql = "")
+      isTemporary = true)
   }
 
   /**
@@ -2352,16 +2347,16 @@ class Dataset[T] private[sql](
       identifier = 
sparkSession.sessionState.sqlParser.parseTableIdentifier(viewName),
       tableType = CatalogTableType.VIEW,
       schema = Seq.empty[CatalogColumn],
-      storage = CatalogStorageFormat.EmptyStorageFormat)
+      storage = CatalogStorageFormat.empty)
     CreateViewCommand(tableDesc, logicalPlan, allowExisting = false, replace = 
true,
-      isTemporary = true, sql = "")
+      isTemporary = true)
   }
 
   /**
    * :: Experimental ::
    * Interface for saving the content of the [[Dataset]] out into external 
storage or streams.
    *
-   * @group output
+   * @group basic
    * @since 1.6.0
    */
   @Experimental

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 3a5ea19..53f4ea6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.execution.QueryExecution
 /**
  * :: Experimental ::
  * A [[Dataset]] has been logically grouped by a user specified grouping key.  
Users should not
- * construct a [[KeyValueGroupedDataset]] directly, but should instead call 
`groupBy` on an existing
- * [[Dataset]].
+ * construct a [[KeyValueGroupedDataset]] directly, but should instead call 
`groupByKey` on
+ * an existing [[Dataset]].
  *
  * @since 2.0.0
  */
@@ -73,7 +73,8 @@ class KeyValueGroupedDataset[K, V] private[sql](
       groupingAttributes)
 
   /**
-   * Returns a [[Dataset]] that contains each unique key.
+   * Returns a [[Dataset]] that contains each unique key. This is equivalent 
to doing mapping
+   * over the Dataset to extract the keys and then running a distinct 
operation on those.
    *
    * @since 1.6.0
    */
@@ -204,7 +205,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
    * Internal helper function for building typed aggregations that return 
tuples.  For simplicity
    * and code reuse, we do this without the help of the type system and then 
use helper functions
    * that cast appropriately for the user facing interface.
-   * TODO: does not handle aggregations that return nonflat results,
    */
   protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
     val encoders = columns.map(_.encoder)

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index b0e48a6..58850a7 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -35,6 +35,8 @@ import org.apache.spark.sql.types.NumericType
  * The main method is the agg function, which has multiple variants. This 
class also contains
  * convenience some first order statistics such as mean, sum for convenience.
  *
+ * This class was named `GroupedData` in Spark 1.x.
+ *
  * @since 2.0.0
  */
 class RelationalGroupedDataset protected[sql](
@@ -74,7 +76,7 @@ class RelationalGroupedDataset protected[sql](
   private[this] def alias(expr: Expression): NamedExpression = expr match {
     case u: UnresolvedAttribute => UnresolvedAlias(u)
     case expr: NamedExpression => expr
-    case a: AggregateExpression if 
(a.aggregateFunction.isInstanceOf[TypedAggregateExpression]) =>
+    case a: AggregateExpression if 
a.aggregateFunction.isInstanceOf[TypedAggregateExpression] =>
       UnresolvedAlias(a, Some(Column.generateAlias))
     case expr: Expression => Alias(expr, usePrettyExpression(expr).sql)()
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index af419fc..0dc70c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
- * The entry point for working with structured data (rows and columns) in 
Spark, in Spark 1.x.
+ * The entry point for working with structured data (rows and columns) in 
Spark 1.x.
  *
  * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are 
keeping the class
  * here for backward compatibility.
@@ -171,13 +171,11 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   def experimental: ExperimentalMethods = sparkSession.experimental
 
   /**
-   * :: Experimental ::
    * Returns a [[DataFrame]] with no rows or columns.
    *
    * @group basic
    * @since 1.3.0
    */
-  @Experimental
   def emptyDataFrame: DataFrame = sparkSession.emptyDataFrame
 
   /**
@@ -437,7 +435,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   }
 
   /**
-   * :: Experimental ::
    * Returns a [[DataFrameReader]] that can be used to read data and streams 
in as a [[DataFrame]].
    * {{{
    *   sqlContext.read.parquet("/path/to/file.parquet")
@@ -447,31 +444,26 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
    * @group genericdata
    * @since 1.4.0
    */
-  @Experimental
   def read: DataFrameReader = sparkSession.read
 
   /**
-   * :: Experimental ::
    * Creates an external table from the given path and returns the 
corresponding DataFrame.
    * It will use the default data source configured by 
spark.sql.sources.default.
    *
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(tableName: String, path: String): DataFrame = {
     sparkSession.catalog.createExternalTable(tableName, path)
   }
 
   /**
-   * :: Experimental ::
    * Creates an external table from the given path based on a data source
    * and returns the corresponding DataFrame.
    *
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(
       tableName: String,
       path: String,
@@ -480,14 +472,12 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   }
 
   /**
-   * :: Experimental ::
    * Creates an external table from the given path based on a data source and 
a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(
       tableName: String,
       source: String,
@@ -496,7 +486,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific)
    * Creates an external table from the given path based on a data source and 
a set of options.
    * Then, returns the corresponding DataFrame.
@@ -504,7 +493,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(
       tableName: String,
       source: String,
@@ -513,14 +501,12 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   }
 
   /**
-   * :: Experimental ::
    * Create an external table from the given path based on a data source, a 
schema and
    * a set of options. Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(
       tableName: String,
       source: String,
@@ -530,7 +516,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific)
    * Create an external table from the given path based on a data source, a 
schema and
    * a set of options. Then, returns the corresponding DataFrame.
@@ -538,7 +523,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
    * @group ddl_ops
    * @since 1.3.0
    */
-  @Experimental
   def createExternalTable(
       tableName: String,
       source: String,

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 48fb95b..6c19bf0 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -902,9 +902,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     }
     validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx)
     val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat)
-      .getOrElse(CatalogStorageFormat.EmptyStorageFormat)
+      .getOrElse(CatalogStorageFormat.empty)
     val rowStorage = Option(ctx.rowFormat).map(visitRowFormat)
-      .getOrElse(CatalogStorageFormat.EmptyStorageFormat)
+      .getOrElse(CatalogStorageFormat.empty)
     val location = Option(ctx.locationSpec).map(visitLocationSpec)
     // If we are creating an EXTERNAL table, then the LOCATION field is 
required
     if (external && location.isEmpty) {
@@ -982,7 +982,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
    */
   override def visitTableFileFormat(
       ctx: TableFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
-    CatalogStorageFormat.EmptyStorageFormat.copy(
+    CatalogStorageFormat.empty.copy(
       inputFormat = Option(string(ctx.inFmt)),
       outputFormat = Option(string(ctx.outFmt)))
   }
@@ -995,7 +995,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     val source = ctx.identifier.getText
     HiveSerDe.sourceToSerDe(source, conf) match {
       case Some(s) =>
-        CatalogStorageFormat.EmptyStorageFormat.copy(
+        CatalogStorageFormat.empty.copy(
           inputFormat = s.inputFormat,
           outputFormat = s.outputFormat,
           serde = s.serde)
@@ -1035,7 +1035,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
   override def visitRowFormatSerde(
       ctx: RowFormatSerdeContext): CatalogStorageFormat = withOrigin(ctx) {
     import ctx._
-    CatalogStorageFormat.EmptyStorageFormat.copy(
+    CatalogStorageFormat.empty.copy(
       serde = Option(string(name)),
       serdeProperties = 
Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
   }
@@ -1065,7 +1065,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
             ctx)
           "line.delim" -> value
         }
-    CatalogStorageFormat.EmptyStorageFormat.copy(serdeProperties = 
entries.toMap)
+    CatalogStorageFormat.empty.copy(serdeProperties = entries.toMap)
   }
 
   /**
@@ -1179,12 +1179,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
       identifier = visitTableIdentifier(name),
       tableType = CatalogTableType.VIEW,
       schema = schema,
-      storage = CatalogStorageFormat.EmptyStorageFormat,
+      storage = CatalogStorageFormat.empty,
       properties = properties,
       viewOriginalText = sql,
       viewText = sql,
       comment = comment)
-    CreateViewCommand(tableDesc, plan(query), allowExist, replace, 
isTemporary, command(ctx))
+    CreateViewCommand(tableDesc, plan(query), allowExist, replace, isTemporary)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 6468916..20c0278 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -41,15 +41,13 @@ import 
org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
  *                 at the end of current Spark session. Existing permanent 
relations with the same
  *                 name are not visible to the current session while the 
temporary view exists,
  *                 unless they are specified with full qualified table name 
with database prefix.
- * @param sql the original sql
  */
 case class CreateViewCommand(
     tableDesc: CatalogTable,
     child: LogicalPlan,
     allowExisting: Boolean,
     replace: Boolean,
-    isTemporary: Boolean,
-    sql: String)
+    isTemporary: Boolean)
   extends RunnableCommand {
 
   // TODO: Note that this class can NOT canonicalize the view SQL string 
entirely, which is

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index 417d09e..e57c171 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, StringType, StructField, 
StructType}
 
+@deprecated("This suite is deprecated to silent compiler deprecation 
warnings", "2.0.0")
 class SQLContextSuite extends SparkFunSuite with SharedSparkContext {
 
   object DummyRule extends Rule[LogicalPlan] {

http://git-wip-us.apache.org/repos/asf/spark/blob/2b35ce81/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 96c8fa6..3297a6f 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.hadoop.hive.serde.serdeConstants
-
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogTable, 
CatalogTableType}
@@ -37,9 +35,9 @@ class HiveDDLCommandSuite extends PlanTest {
 
   private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
     parser.parsePlan(sql).collect {
-      case CreateTableCommand(desc, allowExisting) => (desc, allowExisting)
-      case CreateTableAsSelectLogicalPlan(desc, _, allowExisting) => (desc, 
allowExisting)
-      case CreateViewCommand(desc, _, allowExisting, _, _, _) => (desc, 
allowExisting)
+      case c: CreateTableCommand => (c.table, c.ifNotExists)
+      case c: CreateTableAsSelectLogicalPlan => (c.tableDesc, c.allowExisting)
+      case c: CreateViewCommand => (c.tableDesc, c.allowExisting)
     }.head
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15638][SQL] Audit Dataset, SparkSession, and SQLContext

Reply via email to