Repository: spark Updated Branches: refs/heads/master f00df40cf -> 72adfbf94
[SPARK-17900][SQL] Graduate a list of Spark SQL APIs to stable ## What changes were proposed in this pull request? This patch graduates a list of Spark SQL APIs and mark them stable. The following are marked stable: Dataset/DataFrame - functions, since 1.3 - ColumnName, since 1.3 - DataFrameNaFunctions, since 1.3.1 - DataFrameStatFunctions, since 1.4 - UserDefinedFunction, since 1.3 - UserDefinedAggregateFunction, since 1.5 - Window and WindowSpec, since 1.4 Data sources: - DataSourceRegister, since 1.5 - RelationProvider, since 1.3 - SchemaRelationProvider, since 1.3 - CreatableRelationProvider, since 1.3 - BaseRelation, since 1.3 - TableScan, since 1.3 - PrunedScan, since 1.3 - PrunedFilteredScan, since 1.3 - InsertableRelation, since 1.3 The following are kept experimental / evolving: Data sources: - CatalystScan (tied to internal logical plans so it is not stable by definition) Structured streaming: - all classes (introduced new in 2.0 and will likely change) Dataset typed operations (introduced in 1.6 and 2.0 and might change, although probability is low) - all typed methods on Dataset - KeyValueGroupedDataset - o.a.s.sql.expressions.javalang.typed - o.a.s.sql.expressions.scalalang.typed - methods that return typed Dataset in SparkSession We should discuss more whether we want to mark Dataset typed operations stable in 2.1. ## How was this patch tested? N/A - just annotation changes. Author: Reynold Xin <r...@databricks.com> Closes #15469 from rxin/SPARK-17900. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72adfbf9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72adfbf9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72adfbf9 Branch: refs/heads/master Commit: 72adfbf94ab6a6ce2a5f3111140274476150f201 Parents: f00df40 Author: Reynold Xin <r...@databricks.com> Authored: Fri Oct 14 16:13:42 2016 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Fri Oct 14 16:13:42 2016 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/Column.scala | 6 ++-- .../apache/spark/sql/DataFrameNaFunctions.scala | 6 ++-- .../spark/sql/DataFrameStatFunctions.scala | 6 ++-- .../sql/expressions/UserDefinedFunction.scala | 10 ++++-- .../apache/spark/sql/expressions/Window.scala | 10 ++---- .../spark/sql/expressions/WindowSpec.scala | 6 ++-- .../org/apache/spark/sql/expressions/udaf.scala | 30 +++++++++++++---- .../scala/org/apache/spark/sql/functions.scala | 4 +-- .../apache/spark/sql/sources/interfaces.scala | 35 +++++--------------- 9 files changed, 51 insertions(+), 62 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/Column.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index d22bb17..05e867b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import scala.language.implicitConversions -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder} @@ -1181,13 +1181,11 @@ class Column(protected[sql] val expr: Expression) extends Logging { /** - * :: Experimental :: * A convenient class used for constructing schema. * * @since 1.3.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable class ColumnName(name: String) extends Column(name) { /** http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 65a9c00..0d43f09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -21,20 +21,18 @@ import java.{lang => jl} import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: Experimental :: * Functionality for working with missing data in [[DataFrame]]s. * * @since 1.3.1 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable final class DataFrameNaFunctions private[sql](df: DataFrame) { /** http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index a212bb6..b5bbcee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -21,20 +21,18 @@ import java.{lang => jl, util => ju} import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.stat._ import org.apache.spark.sql.types._ import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch} /** - * :: Experimental :: * Statistic functions for [[DataFrame]]s. * * @since 1.4.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable final class DataFrameStatFunctions private[sql](df: DataFrame) { /** http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala index 2e0e937..28598af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.expressions -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.catalyst.expressions.ScalaUDF import org.apache.spark.sql.Column import org.apache.spark.sql.functions @@ -39,13 +39,17 @@ import org.apache.spark.sql.types.DataType * * @since 1.3.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable case class UserDefinedFunction protected[sql] ( f: AnyRef, dataType: DataType, inputTypes: Option[Seq[DataType]]) { + /** + * Returns an expression that invokes the UDF, using the given arguments. + * + * @since 1.3.0 + */ def apply(exprs: Column*): Column = { Column(ScalaUDF(f, dataType, exprs.map(_.expr), inputTypes.getOrElse(Nil))) } http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala index 07ef601..0b26d86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala @@ -17,12 +17,11 @@ package org.apache.spark.sql.expressions -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions._ /** - * :: Experimental :: * Utility functions for defining window in DataFrames. * * {{{ @@ -36,8 +35,7 @@ import org.apache.spark.sql.catalyst.expressions._ * * @since 1.4.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable object Window { /** @@ -164,7 +162,6 @@ object Window { } /** - * :: Experimental :: * Utility functions for defining window in DataFrames. * * {{{ @@ -177,6 +174,5 @@ object Window { * * @since 1.4.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable class Window private() // So we can see Window in JavaDoc. http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala index 18778c8..1e85b6e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala @@ -17,20 +17,18 @@ package org.apache.spark.sql.expressions -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions._ /** - * :: Experimental :: * A window specification that defines the partitioning, ordering, and frame boundaries. * * Use the static methods in [[Window]] to create a [[WindowSpec]]. * * @since 1.4.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable class WindowSpec private[sql]( partitionSpec: Seq[Expression], orderSpec: Seq[SortOrder], http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala index ef7c09c..bc9788d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala @@ -17,20 +17,18 @@ package org.apache.spark.sql.expressions -import org.apache.spark.annotation.{Experimental, InterfaceStability} +import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.{Column, Row} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.types._ /** - * :: Experimental :: * The base class for implementing user-defined aggregate functions (UDAF). * * @since 1.5.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable abstract class UserDefinedAggregateFunction extends Serializable { /** @@ -46,6 +44,8 @@ abstract class UserDefinedAggregateFunction extends Serializable { * * The name of a field of this [[StructType]] is only used to identify the corresponding * input argument. Users can choose names to identify the input arguments. + * + * @since 1.5.0 */ def inputSchema: StructType @@ -63,17 +63,23 @@ abstract class UserDefinedAggregateFunction extends Serializable { * * The name of a field of this [[StructType]] is only used to identify the corresponding * buffer value. Users can choose names to identify the input arguments. + * + * @since 1.5.0 */ def bufferSchema: StructType /** * The [[DataType]] of the returned value of this [[UserDefinedAggregateFunction]]. + * + * @since 1.5.0 */ def dataType: DataType /** * Returns true iff this function is deterministic, i.e. given the same input, * always return the same output. + * + * @since 1.5.0 */ def deterministic: Boolean @@ -83,6 +89,8 @@ abstract class UserDefinedAggregateFunction extends Serializable { * The contract should be that applying the merge function on two initial buffers should just * return the initial buffer itself, i.e. * `merge(initialBuffer, initialBuffer)` should equal `initialBuffer`. + * + * @since 1.5.0 */ def initialize(buffer: MutableAggregationBuffer): Unit @@ -90,6 +98,8 @@ abstract class UserDefinedAggregateFunction extends Serializable { * Updates the given aggregation buffer `buffer` with new input data from `input`. * * This is called once per input row. + * + * @since 1.5.0 */ def update(buffer: MutableAggregationBuffer, input: Row): Unit @@ -97,17 +107,23 @@ abstract class UserDefinedAggregateFunction extends Serializable { * Merges two aggregation buffers and stores the updated buffer values back to `buffer1`. * * This is called when we merge two partially aggregated data together. + * + * @since 1.5.0 */ def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit /** * Calculates the final result of this [[UserDefinedAggregateFunction]] based on the given * aggregation buffer. + * + * @since 1.5.0 */ def evaluate(buffer: Row): Any /** * Creates a [[Column]] for this UDAF using given [[Column]]s as input arguments. + * + * @since 1.5.0 */ @scala.annotation.varargs def apply(exprs: Column*): Column = { @@ -122,6 +138,8 @@ abstract class UserDefinedAggregateFunction extends Serializable { /** * Creates a [[Column]] for this UDAF using the distinct values of the given * [[Column]]s as input arguments. + * + * @since 1.5.0 */ @scala.annotation.varargs def distinct(exprs: Column*): Column = { @@ -135,15 +153,13 @@ abstract class UserDefinedAggregateFunction extends Serializable { } /** - * :: Experimental :: * A [[Row]] representing a mutable aggregation buffer. * * This is not meant to be extended outside of Spark. * * @since 1.5.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable abstract class MutableAggregationBuffer extends Row { /** Update the ith value of this buffer. */ http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index de49431..5f1efd2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -37,7 +37,6 @@ import org.apache.spark.util.Utils /** - * :: Experimental :: * Functions available for DataFrame operations. * * @groupname udf_funcs UDF functions @@ -53,8 +52,7 @@ import org.apache.spark.util.Utils * @groupname Ungrouped Support functions for DataFrames * @since 1.3.0 */ -@Experimental -@InterfaceStability.Evolving +@InterfaceStability.Stable // scalastyle:off object functions { // scalastyle:on http://git-wip-us.apache.org/repos/asf/spark/blob/72adfbf9/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 3172d5d..15a4807 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType /** - * ::DeveloperApi:: * Data sources should implement this trait so that they can register an alias to their data source. * This allows users to give the data source alias as the format type over the fully qualified * class name. @@ -36,8 +35,7 @@ import org.apache.spark.sql.types.StructType * * @since 1.5.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait DataSourceRegister { /** @@ -54,7 +52,6 @@ trait DataSourceRegister { } /** - * ::DeveloperApi:: * Implemented by objects that produce relations for a specific kind of data source. When * Spark SQL is given a DDL operation with a USING clause specified (to specify the implemented * RelationProvider), this interface is used to pass in the parameters specified by a user. @@ -68,8 +65,7 @@ trait DataSourceRegister { * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait RelationProvider { /** * Returns a new base relation with the given parameters. @@ -80,7 +76,6 @@ trait RelationProvider { } /** - * ::DeveloperApi:: * Implemented by objects that produce relations for a specific kind of data source * with a given schema. When Spark SQL is given a DDL operation with a USING clause specified ( * to specify the implemented SchemaRelationProvider) and a user defined schema, this interface @@ -100,8 +95,7 @@ trait RelationProvider { * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait SchemaRelationProvider { /** * Returns a new base relation with the given parameters and user defined schema. @@ -164,8 +158,7 @@ trait StreamSinkProvider { /** * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait CreatableRelationProvider { /** * Save the DataFrame to the destination and return a relation with the given parameters based on @@ -189,7 +182,6 @@ trait CreatableRelationProvider { } /** - * ::DeveloperApi:: * Represents a collection of tuples with a known schema. Classes that extend BaseRelation must * be able to produce the schema of their data in the form of a [[StructType]]. Concrete * implementation should inherit from one of the descendant `Scan` classes, which define various @@ -201,8 +193,7 @@ trait CreatableRelationProvider { * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable abstract class BaseRelation { def sqlContext: SQLContext def schema: StructType @@ -248,32 +239,27 @@ abstract class BaseRelation { } /** - * ::DeveloperApi:: * A BaseRelation that can produce all of its tuples as an RDD of Row objects. * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait TableScan { def buildScan(): RDD[Row] } /** - * ::DeveloperApi:: * A BaseRelation that can eliminate unneeded columns before producing an RDD * containing all of its tuples as Row objects. * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait PrunedScan { def buildScan(requiredColumns: Array[String]): RDD[Row] } /** - * ::DeveloperApi:: * A BaseRelation that can eliminate unneeded columns and filter using selected * predicates before producing an RDD containing all matching tuples as Row objects. * @@ -286,14 +272,12 @@ trait PrunedScan { * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait PrunedFilteredScan { def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] } /** - * ::DeveloperApi:: * A BaseRelation that can be used to insert data into it through the insert method. * If overwrite in insert method is true, the old data in the relation should be overwritten with * the new data. If overwrite in insert method is false, the new data should be appended. @@ -310,8 +294,7 @@ trait PrunedFilteredScan { * * @since 1.3.0 */ -@DeveloperApi -@InterfaceStability.Evolving +@InterfaceStability.Stable trait InsertableRelation { def insert(data: DataFrame, overwrite: Boolean): Unit } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org