[ https://issues.apache.org/jira/browse/SPARK-21364?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16080963#comment-16080963 ]
Kazuaki Ishizaki commented on SPARK-21364: ------------------------------------------ When I ran the following test case that is derived from the repro, I succeeded to get the result without any exception on the master or 2.1.1. Do I make some mistakes? {code} test("SPARK-21364") { val data = Seq( "{\"menu\":{\"id\":\"file\",\"value\":\"File\",\"popup\":{\"menuitem\":[" + "{\"value\":\"New\",\"onclick\":\"CreateNewDoc()\"}," + "{\"value\":\"Open\",\"onclick\":\"OpenDoc()\"}, " + "{\"value\":\"Close\",\"onclick\":\"CloseDoc()\"}" + "]}}}") val df = sqlContext.read.json(sparkContext.parallelize(data)) df.select($"menu.popup.menuitem"(lit(0)). === ($"menu.popup.menuitem"(lit(1)))).show } {code} {code} +-------------------------------------------------+ |(menu.popup.menuitem[0] = menu.popup.menuitem[1])| +-------------------------------------------------+ | false| +-------------------------------------------------+ {code} > IndexOutOfBoundsException on equality check of two complex array elements > ------------------------------------------------------------------------- > > Key: SPARK-21364 > URL: https://issues.apache.org/jira/browse/SPARK-21364 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.1.0 > Reporter: Vivek Patangiwar > Priority: Minor > > Getting an IndexOutOfBoundsException with the following code: > import org.apache.spark.sql.functions._ > import org.apache.spark.sql.SparkSession > object ArrayEqualityTest { > def main(s:Array[String]) { > val sparkSession = > SparkSession.builder().master("local[*]").appName("app").getOrCreate() > val sqlContext = sparkSession.sqlContext > val sc = sparkSession.sqlContext.sparkContext > import sparkSession.implicits._ > val df = > sqlContext.read.json(sc.parallelize(Seq("{\"menu\":{\"id\":\"file\",\"value\":\"File\",\"popup\":{\"menuitem\":[{\"value\":\"New\",\"onclick\":\"CreateNewDoc()\"},{\"value\":\"Open\",\"onclick\":\"OpenDoc()\"},{\"value\":\"Close\",\"onclick\":\"CloseDoc()\"}]}}}"))) > > df.select($"menu.popup.menuitem"(lit(0)).===($"menu.popup.menuitem"(lit(1)))).show > } > } > Here's the complete stack-trace: > Exception in thread "main" java.lang.IndexOutOfBoundsException: 1 > at > scala.collection.LinearSeqOptimized$class.apply(LinearSeqOptimized.scala:65) > at scala.collection.immutable.List.apply(List.scala:84) > at > org.apache.spark.sql.catalyst.expressions.BoundReference.doGenCode(BoundAttribute.scala:64) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$$anonfun$3.apply(GenerateOrdering.scala:76) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$$anonfun$3.apply(GenerateOrdering.scala:75) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$.genComparisons(GenerateOrdering.scala:75) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$.genComparisons(GenerateOrdering.scala:68) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext.genComp(CodeGenerator.scala:559) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext.genEqual(CodeGenerator.scala:486) > at > org.apache.spark.sql.catalyst.expressions.EqualTo$$anonfun$doGenCode$4.apply(predicates.scala:437) > at > org.apache.spark.sql.catalyst.expressions.EqualTo$$anonfun$doGenCode$4.apply(predicates.scala:437) > at > org.apache.spark.sql.catalyst.expressions.BinaryExpression$$anonfun$defineCodeGen$2.apply(Expression.scala:442) > at > org.apache.spark.sql.catalyst.expressions.BinaryExpression$$anonfun$defineCodeGen$2.apply(Expression.scala:441) > at > org.apache.spark.sql.catalyst.expressions.BinaryExpression.nullSafeCodeGen(Expression.scala:460) > at > org.apache.spark.sql.catalyst.expressions.BinaryExpression.defineCodeGen(Expression.scala:441) > at > org.apache.spark.sql.catalyst.expressions.EqualTo.doGenCode(predicates.scala:437) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:104) > at > org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$genCode$2.apply(Expression.scala:101) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:101) > at > org.apache.spark.sql.execution.ProjectExec$$anonfun$5.apply(basicPhysicalOperators.scala:63) > at > org.apache.spark.sql.execution.ProjectExec$$anonfun$5.apply(basicPhysicalOperators.scala:63) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:63) > at > org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:153) > at > org.apache.spark.sql.execution.InputAdapter.consume(WholeStageCodegenExec.scala:218) > at > org.apache.spark.sql.execution.InputAdapter.doProduce(WholeStageCodegenExec.scala:246) > at > org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) > at > org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at > org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) > at > org.apache.spark.sql.execution.InputAdapter.produce(WholeStageCodegenExec.scala:218) > at > org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:46) > at > org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) > at > org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at > org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) > at > org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:36) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:313) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:354) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2371) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2765) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2370) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2377) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2113) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2112) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2795) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2112) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2327) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:636) > at org.apache.spark.sql.Dataset.show(Dataset.scala:595) > at org.apache.spark.sql.Dataset.show(Dataset.scala:604) > at ArrayEqualityTest$.main(ArrayEqualityTest.scala:13) > at ArrayEqualityTest.main(ArrayEqualityTest.scala) -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org