spark git commit: [SPARK-17900][SQL] Graduate a list of Spark SQL APIs to stable

marmbrus Fri, 14 Oct 2016 16:14:25 -0700

Repository: spark
Updated Branches:
  refs/heads/master f00df40cf -> 72adfbf94



[SPARK-17900][SQL] Graduate a list of Spark SQL APIs to stable

## What changes were proposed in this pull request?
This patch graduates a list of Spark SQL APIs and mark them stable.

The following are marked stable:

Dataset/DataFrame
- functions, since 1.3
- ColumnName, since 1.3
- DataFrameNaFunctions, since 1.3.1
- DataFrameStatFunctions, since 1.4
- UserDefinedFunction, since 1.3
- UserDefinedAggregateFunction, since 1.5
- Window and WindowSpec, since 1.4

Data sources:
- DataSourceRegister, since 1.5
- RelationProvider, since 1.3
- SchemaRelationProvider, since 1.3
- CreatableRelationProvider, since 1.3
- BaseRelation, since 1.3
- TableScan, since 1.3
- PrunedScan, since 1.3
- PrunedFilteredScan, since 1.3
- InsertableRelation, since 1.3

The following are kept experimental / evolving:

Data sources:
- CatalystScan (tied to internal logical plans so it is not stable by 
definition)

Structured streaming:
- all classes (introduced new in 2.0 and will likely change)

Dataset typed operations (introduced in 1.6 and 2.0 and might change, although 
probability is low)
- all typed methods on Dataset
- KeyValueGroupedDataset
- o.a.s.sql.expressions.javalang.typed
- o.a.s.sql.expressions.scalalang.typed
- methods that return typed Dataset in SparkSession

We should discuss more whether we want to mark Dataset typed operations stable 
in 2.1.

## How was this patch tested?
N/A - just annotation changes.

Author: Reynold Xin <r...@databricks.com>

Closes #15469 from rxin/SPARK-17900.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72adfbf9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72adfbf9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72adfbf9

Branch: refs/heads/master
Commit: 72adfbf94ab6a6ce2a5f3111140274476150f201
Parents: f00df40
Author: Reynold Xin <r...@databricks.com>
Authored: Fri Oct 14 16:13:42 2016 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Fri Oct 14 16:13:42 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/Column.scala     |  6 ++--
 .../apache/spark/sql/DataFrameNaFunctions.scala |  6 ++--
 .../spark/sql/DataFrameStatFunctions.scala      |  6 ++--
 .../sql/expressions/UserDefinedFunction.scala   | 10 ++++--
 .../apache/spark/sql/expressions/Window.scala   | 10 ++----
 .../spark/sql/expressions/WindowSpec.scala      |  6 ++--
 .../org/apache/spark/sql/expressions/udaf.scala | 30 +++++++++++++----
 .../scala/org/apache/spark/sql/functions.scala  |  4 +--
 .../apache/spark/sql/sources/interfaces.scala   | 35 +++++---------------
 9 files changed, 51 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index d22bb17..05e867b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import scala.language.implicitConversions
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
@@ -1181,13 +1181,11 @@ class Column(protected[sql] val expr: Expression) 
extends Logging {
 
 
 /**
- * :: Experimental ::
  * A convenient class used for constructing schema.
  *
  * @since 1.3.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 class ColumnName(name: String) extends Column(name) {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 65a9c00..0d43f09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -21,20 +21,18 @@ import java.{lang => jl}
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
 
 /**
- * :: Experimental ::
  * Functionality for working with missing data in [[DataFrame]]s.
  *
  * @since 1.3.1
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index a212bb6..b5bbcee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -21,20 +21,18 @@ import java.{lang => jl, util => ju}
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.stat._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
 
 /**
- * :: Experimental ::
  * Statistic functions for [[DataFrame]]s.
  *
  * @since 1.4.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 final class DataFrameStatFunctions private[sql](df: DataFrame) {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 2e0e937..28598af 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.expressions
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.expressions.ScalaUDF
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.functions
@@ -39,13 +39,17 @@ import org.apache.spark.sql.types.DataType
  *
  * @since 1.3.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 case class UserDefinedFunction protected[sql] (
     f: AnyRef,
     dataType: DataType,
     inputTypes: Option[Seq[DataType]]) {
 
+  /**
+   * Returns an expression that invokes the UDF, using the given arguments.
+   *
+   * @since 1.3.0
+   */
   def apply(exprs: Column*): Column = {
     Column(ScalaUDF(f, dataType, exprs.map(_.expr), inputTypes.getOrElse(Nil)))
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 07ef601..0b26d86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.expressions
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
- * :: Experimental ::
  * Utility functions for defining window in DataFrames.
  *
  * {{{
@@ -36,8 +35,7 @@ import org.apache.spark.sql.catalyst.expressions._
  *
  * @since 1.4.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 object Window {
 
   /**
@@ -164,7 +162,6 @@ object Window {
 }
 
 /**
- * :: Experimental ::
  * Utility functions for defining window in DataFrames.
  *
  * {{{
@@ -177,6 +174,5 @@ object Window {
  *
  * @since 1.4.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 class Window private()  // So we can see Window in JavaDoc.

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 18778c8..1e85b6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -17,20 +17,18 @@
 
 package org.apache.spark.sql.expressions
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
- * :: Experimental ::
  * A window specification that defines the partitioning, ordering, and frame 
boundaries.
  *
  * Use the static methods in [[Window]] to create a [[WindowSpec]].
  *
  * @since 1.4.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 class WindowSpec private[sql](
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index ef7c09c..bc9788d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -17,20 +17,18 @@
 
 package org.apache.spark.sql.expressions
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.{Column, Row}
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Complete}
 import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.types._
 
 /**
- * :: Experimental ::
  * The base class for implementing user-defined aggregate functions (UDAF).
  *
  * @since 1.5.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 abstract class UserDefinedAggregateFunction extends Serializable {
 
   /**
@@ -46,6 +44,8 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
    *
    * The name of a field of this [[StructType]] is only used to identify the 
corresponding
    * input argument. Users can choose names to identify the input arguments.
+   *
+   * @since 1.5.0
    */
   def inputSchema: StructType
 
@@ -63,17 +63,23 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
    *
    * The name of a field of this [[StructType]] is only used to identify the 
corresponding
    * buffer value. Users can choose names to identify the input arguments.
+   *
+   * @since 1.5.0
    */
   def bufferSchema: StructType
 
   /**
    * The [[DataType]] of the returned value of this 
[[UserDefinedAggregateFunction]].
+   *
+   * @since 1.5.0
    */
   def dataType: DataType
 
   /**
    * Returns true iff this function is deterministic, i.e. given the same 
input,
    * always return the same output.
+   *
+   * @since 1.5.0
    */
   def deterministic: Boolean
 
@@ -83,6 +89,8 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
    * The contract should be that applying the merge function on two initial 
buffers should just
    * return the initial buffer itself, i.e.
    * `merge(initialBuffer, initialBuffer)` should equal `initialBuffer`.
+   *
+   * @since 1.5.0
    */
   def initialize(buffer: MutableAggregationBuffer): Unit
 
@@ -90,6 +98,8 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
    * Updates the given aggregation buffer `buffer` with new input data from 
`input`.
    *
    * This is called once per input row.
+   *
+   * @since 1.5.0
    */
   def update(buffer: MutableAggregationBuffer, input: Row): Unit
 
@@ -97,17 +107,23 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
    * Merges two aggregation buffers and stores the updated buffer values back 
to `buffer1`.
    *
    * This is called when we merge two partially aggregated data together.
+   *
+   * @since 1.5.0
    */
   def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit
 
   /**
    * Calculates the final result of this [[UserDefinedAggregateFunction]] 
based on the given
    * aggregation buffer.
+   *
+   * @since 1.5.0
    */
   def evaluate(buffer: Row): Any
 
   /**
    * Creates a [[Column]] for this UDAF using given [[Column]]s as input 
arguments.
+   *
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def apply(exprs: Column*): Column = {
@@ -122,6 +138,8 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
   /**
    * Creates a [[Column]] for this UDAF using the distinct values of the given
    * [[Column]]s as input arguments.
+   *
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def distinct(exprs: Column*): Column = {
@@ -135,15 +153,13 @@ abstract class UserDefinedAggregateFunction extends 
Serializable {
 }
 
 /**
- * :: Experimental ::
  * A [[Row]] representing a mutable aggregation buffer.
  *
  * This is not meant to be extended outside of Spark.
  *
  * @since 1.5.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 abstract class MutableAggregationBuffer extends Row {
 
   /** Update the ith value of this buffer. */

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index de49431..5f1efd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -37,7 +37,6 @@ import org.apache.spark.util.Utils
 
 
 /**
- * :: Experimental ::
  * Functions available for DataFrame operations.
  *
  * @groupname udf_funcs UDF functions
@@ -53,8 +52,7 @@ import org.apache.spark.util.Utils
  * @groupname Ungrouped Support functions for DataFrames
  * @since 1.3.0
  */
-@Experimental
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 // scalastyle:off
 object functions {
 // scalastyle:on

http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 3172d5d..15a4807 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
- * ::DeveloperApi::
  * Data sources should implement this trait so that they can register an alias 
to their data source.
  * This allows users to give the data source alias as the format type over the 
fully qualified
  * class name.
@@ -36,8 +35,7 @@ import org.apache.spark.sql.types.StructType
  *
  * @since 1.5.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait DataSourceRegister {
 
   /**
@@ -54,7 +52,6 @@ trait DataSourceRegister {
 }
 
 /**
- * ::DeveloperApi::
  * Implemented by objects that produce relations for a specific kind of data 
source.  When
  * Spark SQL is given a DDL operation with a USING clause specified (to 
specify the implemented
  * RelationProvider), this interface is used to pass in the parameters 
specified by a user.
@@ -68,8 +65,7 @@ trait DataSourceRegister {
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait RelationProvider {
   /**
    * Returns a new base relation with the given parameters.
@@ -80,7 +76,6 @@ trait RelationProvider {
 }
 
 /**
- * ::DeveloperApi::
  * Implemented by objects that produce relations for a specific kind of data 
source
  * with a given schema.  When Spark SQL is given a DDL operation with a USING 
clause specified (
  * to specify the implemented SchemaRelationProvider) and a user defined 
schema, this interface
@@ -100,8 +95,7 @@ trait RelationProvider {
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait SchemaRelationProvider {
   /**
    * Returns a new base relation with the given parameters and user defined 
schema.
@@ -164,8 +158,7 @@ trait StreamSinkProvider {
 /**
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait CreatableRelationProvider {
   /**
    * Save the DataFrame to the destination and return a relation with the 
given parameters based on
@@ -189,7 +182,6 @@ trait CreatableRelationProvider {
 }
 
 /**
- * ::DeveloperApi::
  * Represents a collection of tuples with a known schema. Classes that extend 
BaseRelation must
  * be able to produce the schema of their data in the form of a 
[[StructType]]. Concrete
  * implementation should inherit from one of the descendant `Scan` classes, 
which define various
@@ -201,8 +193,7 @@ trait CreatableRelationProvider {
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 abstract class BaseRelation {
   def sqlContext: SQLContext
   def schema: StructType
@@ -248,32 +239,27 @@ abstract class BaseRelation {
 }
 
 /**
- * ::DeveloperApi::
  * A BaseRelation that can produce all of its tuples as an RDD of Row objects.
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait TableScan {
   def buildScan(): RDD[Row]
 }
 
 /**
- * ::DeveloperApi::
  * A BaseRelation that can eliminate unneeded columns before producing an RDD
  * containing all of its tuples as Row objects.
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait PrunedScan {
   def buildScan(requiredColumns: Array[String]): RDD[Row]
 }
 
 /**
- * ::DeveloperApi::
  * A BaseRelation that can eliminate unneeded columns and filter using selected
  * predicates before producing an RDD containing all matching tuples as Row 
objects.
  *
@@ -286,14 +272,12 @@ trait PrunedScan {
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait PrunedFilteredScan {
   def buildScan(requiredColumns: Array[String], filters: Array[Filter]): 
RDD[Row]
 }
 
 /**
- * ::DeveloperApi::
  * A BaseRelation that can be used to insert data into it through the insert 
method.
  * If overwrite in insert method is true, the old data in the relation should 
be overwritten with
  * the new data. If overwrite in insert method is false, the new data should 
be appended.
@@ -310,8 +294,7 @@ trait PrunedFilteredScan {
  *
  * @since 1.3.0
  */
-@DeveloperApi
-@InterfaceStability.Evolving
+@InterfaceStability.Stable
 trait InsertableRelation {
   def insert(data: DataFrame, overwrite: Boolean): Unit
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17900][SQL] Graduate a list of Spark SQL APIs to stable

Reply via email to