[ https://issues.apache.org/jira/browse/SPARK-12478?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen updated SPARK-12478: ------------------------------ Target Version/s: 1.6.1, 2.0.0 (was: 2.0.0) > Dataset fields of product types can't be null > --------------------------------------------- > > Key: SPARK-12478 > URL: https://issues.apache.org/jira/browse/SPARK-12478 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.6.0, 2.0.0 > Reporter: Cheng Lian > Assignee: Apache Spark > Labels: backport-needed > > Spark shell snippet for reproduction: > {code} > import sqlContext.implicits._ > case class Inner(f: Int) > case class Outer(i: Inner) > Seq(Outer(null)).toDS().toDF().show() > Seq(Outer(null)).toDS().show() > {code} > Expected output should be: > {noformat} > +----+ > | i| > +----+ > |null| > +----+ > +----+ > | i| > +----+ > |null| > +----+ > {noformat} > Actual output: > {noformat} > +------+ > | i| > +------+ > |[null]| > +------+ > java.lang.RuntimeException: Error while decoding: java.lang.RuntimeException: > Null value appeared in non-nullable field Inner.f of type scala.Int. If the > schema is inferred from a Scala tuple/case class, or a Java bean, please try > to use scala.Option[_] or other nullable types (e.g. java.lang.Integer > instead of int/scala.Int). > newinstance(class $iwC$$iwC$Outer,if (isnull(input[0, > StructType(StructField(f,IntegerType,false))])) null else newinstance(class > $iwC$$iwC$Inner,assertnotnull(input[0, > StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class > $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)),false,ObjectType(class > $iwC$$iwC$Outer),Some($iwC$$iwC@6ab35ce3)) > +- if (isnull(input[0, StructType(StructField(f,IntegerType,false))])) null > else newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, > StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class > $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)) > :- isnull(input[0, StructType(StructField(f,IntegerType,false))]) > : +- input[0, StructType(StructField(f,IntegerType,false))] > :- null > +- newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, > StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class > $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)) > +- assertnotnull(input[0, > StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int) > +- input[0, StructType(StructField(f,IntegerType,false))].f > +- input[0, StructType(StructField(f,IntegerType,false))] > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:224) > at > org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704) > at > org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) > at > scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) > at org.apache.spark.sql.Dataset.collect(Dataset.scala:704) > at org.apache.spark.sql.Dataset.take(Dataset.scala:725) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:240) > at org.apache.spark.sql.Dataset.show(Dataset.scala:230) > at org.apache.spark.sql.Dataset.show(Dataset.scala:193) > at org.apache.spark.sql.Dataset.show(Dataset.scala:201) > at > $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > at $iwC$$iwC$$iwC$$iwC.<init>(<console>:48) > at $iwC$$iwC$$iwC.<init>(<console>:50) > at $iwC$$iwC.<init>(<console>:52) > at $iwC.<init>(<console>:54) > at <init>(<console>:56) > at .<init>(<console>:60) > at .<clinit>(<console>) > at .<init>(<console>:7) > at .<clinit>(<console>) > at $print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) > at > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > at > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.RuntimeException: Null value appeared in non-nullable > field Inner.f of type scala.Int. If the schema is inferred from a Scala > tuple/case class, or a Java bean, please try to use scala.Option[_] or other > nullable types (e.g. java.lang.Integer instead of int/scala.Int). > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown > Source) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:221) > ... 62 more > {noformat} > We can see that there's an unexpected extra nested row in the first output, > which causes the exception below. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org