This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f78466d  [SPARK-7768][CORE][SQL] Open UserDefinedType as a Developer 
API
f78466d is described below

commit f78466dca6f0ddb1c979842f5a22e1a1e3b535bf
Author: Sean Owen <sro...@gmail.com>
AuthorDate: Sat Feb 20 07:32:06 2021 -0600

    [SPARK-7768][CORE][SQL] Open UserDefinedType as a Developer API
    
    ### What changes were proposed in this pull request?
    
    UserDefinedType and UDTRegistration become public Developer APIs, not 
package-private to Spark.
    
    ### Why are the changes needed?
    
    This proposes to simply open up the UserDefinedType class as a developer 
API. It was public in 1.x, but closed in 2.x for some possible redesign that 
does not seem to have happened.
    
    Other libraries have managed to define UDTs anyway by inserting shims into 
the Spark namespace, and this evidently has worked OK. But package isolation in 
Java 9+ breaks this.
    
    The logic here is mostly: this is de facto a stable API, so can at least be 
open to developers with the usual caveats about developer APIs.
    
    Open questions:
    
    - Is there in fact some important redesign that's needed before opening it? 
The comment to this effect is from 2016
    - Is this all that needs to be opened up? Like PythonUserDefinedType?
    - Should any of this be kept package-private?
    
    This was first proposed in https://github.com/apache/spark/pull/16478 
though it was a larger change, but, the other API issues it was fixing seem to 
have been addressed already (e.g. no need to return internal Spark types). It 
was never really reviewed.
    
    My hunch is that there isn't much downside, and some upside, to just 
opening this as-is now.
    
    ### Does this PR introduce _any_ user-facing change?
    
    UserDefinedType becomes visible to developers to subclass.
    
    ### How was this patch tested?
    
    Existing tests; there is no change to the existing logic.
    
    Closes #31461 from srowen/SPARK-7768.
    
    Authored-by: Sean Owen <sro...@gmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala    | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala    | 2 +-
 .../main/scala/org/apache/spark/sql/types/UDTRegistration.scala    | 3 ++-
 .../main/scala/org/apache/spark/sql/types/UserDefinedType.scala    | 7 +++----
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala 
b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
index f4a8556..838b51a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
@@ -22,7 +22,7 @@ import 
org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArra
 import org.apache.spark.sql.types._
 
 /**
- * User-defined type for [[Matrix]] in [[mllib-local]] which allows easy 
interaction with SQL
+ * User-defined type for [[Matrix]] in mllib-local which allows easy 
interaction with SQL
  * via [[org.apache.spark.sql.Dataset]].
  */
 private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala 
b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
index 35bbaf5..8c81aca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
@@ -22,7 +22,7 @@ import 
org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArra
 import org.apache.spark.sql.types._
 
 /**
- * User-defined type for [[Vector]] in [[mllib-local]] which allows easy 
interaction with SQL
+ * User-defined type for [[Vector]] in mllib-local which allows easy 
interaction with SQL
  * via [[org.apache.spark.sql.Dataset]].
  */
 private[spark] class VectorUDT extends UserDefinedType[Vector] {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
index 20ec75c..f13651f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.types
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
@@ -29,7 +30,7 @@ import org.apache.spark.util.Utils
  * However, by doing this, we add SparkSQL dependency on user classes. This 
object provides
  * alternative approach to register UDTs for user classes.
  */
-private[spark]
+@DeveloperApi
 object UDTRegistration extends Serializable with Logging {
 
   /** The mapping between the Class between UserDefinedType and user classes. 
*/
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index 689c30f..7a26809 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -22,6 +22,8 @@ import java.util.Objects
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * The data type for User Defined Types (UDTs).
  *
@@ -34,11 +36,8 @@ import org.json4s.JsonDSL._
  *
  * The conversion via `serialize` occurs when instantiating a `DataFrame` from 
another RDD.
  * The conversion via `deserialize` occurs when reading from a `DataFrame`.
- *
- * Note: This was previously a developer API in Spark 1.x. We are making this 
private in Spark 2.0
- * because we will very likely create a new version of this that works better 
with Datasets.
  */
-private[spark]
+@DeveloperApi
 abstract class UserDefinedType[UserType >: Null] extends DataType with 
Serializable {
 
   /** Underlying storage type for this UDT */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to