[ https://issues.apache.org/jira/browse/SPARK-25654?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16640239#comment-16640239 ]
Michal Šenkýř commented on SPARK-25654: --------------------------------------- Nested JavaBean support > createDataFrame does not support nested JavaBeans inside arrays and > collections > ------------------------------------------------------------------------------- > > Key: SPARK-25654 > URL: https://issues.apache.org/jira/browse/SPARK-25654 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.0 > Reporter: Michal Šenkýř > Priority: Minor > > SPARK-17952 added support for nested JavaBeans to the Java DataFrame API. > This issue tracks support for nested beans inside array and collection fields > of JavaBeans. > Current behavior: > {noformat} > scala> import scala.beans.BeanProperty > import scala.beans.BeanProperty > scala> class Nested(@BeanProperty var i: Int) extends Serializable > defined class Nested > scala> class Test(@BeanProperty var array: Array[Nested], @BeanProperty var > list: java.util.List[Nested], @BeanProperty var map: java.util.Map[Integer, > Nested]) extends Serializable > defined class Test > scala> import scala.collection.JavaConverters._ > import scala.collection.JavaConverters._ > scala> val array = Array(new Nested(1)) > array: Array[Nested] = Array(Nested@3dedc8b8) > scala> val list = Seq(new Nested(2), new Nested(3)).asJava > list: java.util.List[Nested] = [Nested@56adb75e, Nested@2cc7b63d] > scala> val map = Map(Int.box(1) -> new Nested(4), Int.box(2) -> new > Nested(5)).asJava > map: java.util.Map[Integer,Nested] = {1=Nested@40bac624, 2=Nested@1bc80978} > scala> val df = spark.createDataFrame(Seq(new Test(array, list, map)).asJava, > classOf[Test]) > java.lang.IllegalArgumentException: The value (Nested@3dedc8b8) of the type > (Nested) cannot be converted to struct<i:int> > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:262) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:238) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter$$anonfun$toCatalystImpl$1.apply(CatalystTypeConverters.scala:162) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:162) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:154) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103) > at > org.apache.spark.sql.catalyst.CatalystTypeConverters$$anonfun$createToCatalystConverter$2.apply(CatalystTypeConverters.scala:396) > at > org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1114) > at > org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1113) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186) > at > org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1113) > at > org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1108) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) > at scala.collection.Iterator$class.toStream(Iterator.scala:1320) > at scala.collection.AbstractIterator.toStream(Iterator.scala:1334) > at scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298) > at scala.collection.AbstractIterator.toSeq(Iterator.scala:1334) > at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:423) > ... 51 elided > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org