This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f78466d [SPARK-7768][CORE][SQL] Open UserDefinedType as a Developer API f78466d is described below commit f78466dca6f0ddb1c979842f5a22e1a1e3b535bf Author: Sean Owen <sro...@gmail.com> AuthorDate: Sat Feb 20 07:32:06 2021 -0600 [SPARK-7768][CORE][SQL] Open UserDefinedType as a Developer API ### What changes were proposed in this pull request? UserDefinedType and UDTRegistration become public Developer APIs, not package-private to Spark. ### Why are the changes needed? This proposes to simply open up the UserDefinedType class as a developer API. It was public in 1.x, but closed in 2.x for some possible redesign that does not seem to have happened. Other libraries have managed to define UDTs anyway by inserting shims into the Spark namespace, and this evidently has worked OK. But package isolation in Java 9+ breaks this. The logic here is mostly: this is de facto a stable API, so can at least be open to developers with the usual caveats about developer APIs. Open questions: - Is there in fact some important redesign that's needed before opening it? The comment to this effect is from 2016 - Is this all that needs to be opened up? Like PythonUserDefinedType? - Should any of this be kept package-private? This was first proposed in https://github.com/apache/spark/pull/16478 though it was a larger change, but, the other API issues it was fixing seem to have been addressed already (e.g. no need to return internal Spark types). It was never really reviewed. My hunch is that there isn't much downside, and some upside, to just opening this as-is now. ### Does this PR introduce _any_ user-facing change? UserDefinedType becomes visible to developers to subclass. ### How was this patch tested? Existing tests; there is no change to the existing logic. Closes #31461 from srowen/SPARK-7768. Authored-by: Sean Owen <sro...@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala | 2 +- .../main/scala/org/apache/spark/sql/types/UDTRegistration.scala | 3 ++- .../main/scala/org/apache/spark/sql/types/UserDefinedType.scala | 7 +++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala index f4a8556..838b51a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArra import org.apache.spark.sql.types._ /** - * User-defined type for [[Matrix]] in [[mllib-local]] which allows easy interaction with SQL + * User-defined type for [[Matrix]] in mllib-local which allows easy interaction with SQL * via [[org.apache.spark.sql.Dataset]]. */ private[spark] class MatrixUDT extends UserDefinedType[Matrix] { diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala index 35bbaf5..8c81aca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArra import org.apache.spark.sql.types._ /** - * User-defined type for [[Vector]] in [[mllib-local]] which allows easy interaction with SQL + * User-defined type for [[Vector]] in mllib-local which allows easy interaction with SQL * via [[org.apache.spark.sql.Dataset]]. */ private[spark] class VectorUDT extends UserDefinedType[Vector] { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala index 20ec75c..f13651f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.types import scala.collection.mutable import org.apache.spark.SparkException +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -29,7 +30,7 @@ import org.apache.spark.util.Utils * However, by doing this, we add SparkSQL dependency on user classes. This object provides * alternative approach to register UDTs for user classes. */ -private[spark] +@DeveloperApi object UDTRegistration extends Serializable with Logging { /** The mapping between the Class between UserDefinedType and user classes. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala index 689c30f..7a26809 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala @@ -22,6 +22,8 @@ import java.util.Objects import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ +import org.apache.spark.annotation.DeveloperApi + /** * The data type for User Defined Types (UDTs). * @@ -34,11 +36,8 @@ import org.json4s.JsonDSL._ * * The conversion via `serialize` occurs when instantiating a `DataFrame` from another RDD. * The conversion via `deserialize` occurs when reading from a `DataFrame`. - * - * Note: This was previously a developer API in Spark 1.x. We are making this private in Spark 2.0 - * because we will very likely create a new version of this that works better with Datasets. */ -private[spark] +@DeveloperApi abstract class UserDefinedType[UserType >: Null] extends DataType with Serializable { /** Underlying storage type for this UDT */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org