So either you empty line at the end or when you use string.split you dont specify -1 as second parameter... On Aug 29, 2015 1:18 PM, "Akhil Das" <ak...@sigmoidanalytics.com> wrote:
> I suspect in the last scenario you are having an empty new line at the > last line. If you put a try..catch you'd definitely know. > > Thanks > Best Regards > > On Tue, Aug 25, 2015 at 2:53 AM, Michael Armbrust <mich...@databricks.com> > wrote: > >> This top line here is indicating that the exception is being throw from >> your code (i.e. code written in the console). >> >> at >>> $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:40) >> >> >> Check to make sure that you are properly handling data that has less >> columns than you would expect. >> >> >> >> On Mon, Aug 24, 2015 at 12:41 PM, SAHA, DEBOBROTA <ds3...@att.com> wrote: >> >>> Hi , >>> >>> >>> >>> I am using SPARK 1.4 and I am getting an array out of bound Exception >>> when I am trying to read from a registered table in SPARK. >>> >>> >>> >>> For example If I have 3 different text files with the content as below: >>> >>> >>> >>> *Scenario 1*: >>> >>> A1|B1|C1 >>> >>> A2|B2|C2 >>> >>> >>> >>> *Scenario 2*: >>> >>> A1| |C1 >>> >>> A2| |C2 >>> >>> >>> >>> *Scenario 3*: >>> >>> A1| B1| >>> >>> A2| B2| >>> >>> >>> >>> So for Scenario 1 and 2 it’s working fine but for Scenario 3 I am >>> getting the following error: >>> >>> >>> >>> org.apache.spark.SparkException: Job aborted due to stage failure: Task >>> 0 in stage 3.0 failed 1 times, most recent failure: Lost task 0.0 in stage >>> 3.0 (TID 4, localhost): java.lang.ArrayIndexOutOfBoundsException: 2 >>> >>> at >>> $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:40) >>> >>> at >>> $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:38) >>> >>> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) >>> >>> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) >>> >>> at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) >>> >>> at scala.collection.Iterator$$anon$10.next(Iterator.scala:312) >>> >>> at scala.collection.Iterator$class.foreach(Iterator.scala:727) >>> >>> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) >>> >>> at >>> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) >>> >>> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) >>> >>> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) >>> >>> at scala.collection.TraversableOnce$class.to >>> (TraversableOnce.scala:273) >>> >>> at scala.collection.AbstractIterator.to(Iterator.scala:1157) >>> >>> at >>> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) >>> >>> at >>> scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) >>> >>> at >>> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) >>> >>> at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) >>> >>> at >>> org.apache.spark.sql.execution.SparkPlan$$anonfun$3.apply(SparkPlan.scala:143) >>> >>> at >>> org.apache.spark.sql.execution.SparkPlan$$anonfun$3.apply(SparkPlan.scala:143) >>> >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) >>> >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) >>> >>> at >>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) >>> >>> at org.apache.spark.scheduler.Task.run(Task.scala:70) >>> >>> at >>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) >>> >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) >>> >>> at java.lang.Thread.run(Thread.java:745) >>> >>> >>> >>> Driver stacktrace: >>> >>> at org.apache.spark.scheduler.DAGScheduler.org >>> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) >>> >>> at >>> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) >>> >>> at >>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) >>> >>> at scala.Option.foreach(Option.scala:236) >>> >>> at >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) >>> >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) >>> >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) >>> >>> at >>> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) >>> >>> >>> >>> Please help. >>> >>> >>> >>> Thanks, >>> >>> Debobrota >>> >>> >>> >>> >>> >>> >>> >> >> >