Repository: spark Updated Branches: refs/heads/master 427c20dd6 -> a5f9fdbba
[SPARK-15268][SQL] Make JavaTypeInference work with UDTRegistration ## What changes were proposed in this pull request? We have a private `UDTRegistration` API to register user defined type. Currently `JavaTypeInference` can't work with it. So `SparkSession.createDataFrame` from a bean class will not correctly infer the schema of the bean class. ## How was this patch tested? `VectorUDTSuite`. Author: Liang-Chi Hsieh <sim...@tw.ibm.com> Closes #13046 from viirya/fix-udt-registry-javatypeinference. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5f9fdbb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5f9fdbb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5f9fdbb Branch: refs/heads/master Commit: a5f9fdbba3bbefb56ca9ab33271301a2ff0834b5 Parents: 427c20d Author: Liang-Chi Hsieh <sim...@tw.ibm.com> Authored: Wed May 11 09:31:22 2016 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Wed May 11 09:31:22 2016 -0700 ---------------------------------------------------------------------- .../org/apache/spark/ml/linalg/VectorUDTSuite.scala | 16 ++++++++++++++++ .../spark/sql/catalyst/JavaTypeInference.scala | 5 +++++ 2 files changed, 21 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a5f9fdbb/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala index 6d01d8f..7b50876 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala @@ -17,9 +17,19 @@ package org.apache.spark.ml.linalg +import scala.beans.BeanInfo + import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.JavaTypeInference import org.apache.spark.sql.types._ +@BeanInfo +case class LabeledPoint(label: Double, features: Vector) { + override def toString: String = { + s"($label,$features)" + } +} + class VectorUDTSuite extends SparkFunSuite { test("preloaded VectorUDT") { @@ -36,4 +46,10 @@ class VectorUDTSuite extends SparkFunSuite { assert(udt.simpleString == "vector") } } + + test("JavaTypeInference with VectorUDT") { + val (dataType, _) = JavaTypeInference.inferDataType(classOf[LabeledPoint]) + assert(dataType.asInstanceOf[StructType].fields.map(_.dataType) + === Seq(new VectorUDT, DoubleType)) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/a5f9fdbb/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 6f9fbbb..92caf8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -63,6 +63,11 @@ object JavaTypeInference { case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) => (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true) + case c: Class[_] if UDTRegistration.exists(c.getName) => + val udt = UDTRegistration.getUDTFor(c.getName).get.newInstance() + .asInstanceOf[UserDefinedType[_ >: Null]] + (udt, true) + case c: Class[_] if c == classOf[java.lang.String] => (StringType, true) case c: Class[_] if c == classOf[Array[Byte]] => (BinaryType, true) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org