[ https://issues.apache.org/jira/browse/SPARK-12066?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15036683#comment-15036683 ]
Michael Armbrust commented on SPARK-12066: ------------------------------------------ Can you reproduce this on 1.6-rc1? > spark sql throw java.lang.ArrayIndexOutOfBoundsException when use table.* > with join > ------------------------------------------------------------------------------------- > > Key: SPARK-12066 > URL: https://issues.apache.org/jira/browse/SPARK-12066 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.4.0, 1.5.2 > Environment: linux > Reporter: Ricky Yang > Priority: Blocker > > throw java.lang.ArrayIndexOutOfBoundsException when I use following spark > sql on spark standlone or yarn. > the sql: > select ta.* > from bi_td.dm_price_seg_td tb > join bi_sor.sor_ord_detail_tf ta > on 1 = 1 > where ta.sale_dt = '20140514' > and ta.sale_price >= tb.pri_from > and ta.sale_price < tb.pri_to limit 10 ; > But ,the result is correct when using no * as following: > select ta.sale_dt > from bi_td.dm_price_seg_td tb > join bi_sor.sor_ord_detail_tf ta > on 1 = 1 > where ta.sale_dt = '20140514' > and ta.sale_price >= tb.pri_from > and ta.sale_price < tb.pri_to limit 10 ; > standlone version is 1.4.0 and version spark on yarn is 1.5.2 > error log : > > 15/11/30 14:19:59 ERROR SparkSQLDriver: Failed in [select ta.* > from bi_td.dm_price_seg_td tb > join bi_sor.sor_ord_detail_tf ta > on 1 = 1 > where ta.sale_dt = '20140514' > and ta.sale_price >= tb.pri_from > and ta.sale_price < tb.pri_to limit 10 ] > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in > stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 > (TID 3, namenode2-sit.cnsuning.com): java.lang.ArrayIndexOutOfBoundsException > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) > > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:215) > at > org.apache.spark.sql.execution.Limit.executeCollect(basicOperators.scala:207) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:587) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308) > > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:311) > at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:409) > at org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:425) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:166) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674) > > at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.ArrayIndexOutOfBoundsException > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in > stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 > (TID 3, namenode2-sit.cnsuning.com): java.lang.ArrayIndexOutOfBoundsException > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) > > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:215) > at > org.apache.spark.sql.execution.Limit.executeCollect(basicOperators.scala:207) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:587) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:308) > > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:311) > at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:409) > at org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:425) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:166) > > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674) > > at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.ArrayIndexOutOfBoundsException > -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org