JavaRDD<String> lineswithoutStopWords = nonEmptylines
                           .map(new Function<String, String>() {

                                  /**
                                  *
                                   */
                                  private static final long serialVersionUID = 
1L;

                                  @Override
                                  public String call(String line) throws 
Exception {
                                         // TODO Auto-generated method stub
                                         return removeStopWords(line, 
stopwords);
                                  }

                           });

              lineswithoutStopWords.saveAsTextFile("output/testop.txt");



Exception in task 0.0 in stage 1.0 (TID 1)
java.lang.NullPointerException
       at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
       at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
       at org.apache.hadoop.util.Shell.run(Shell.java:379)
       at 
org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
       at 
org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
       at 
org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
       at 
org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
       at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:91)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1068)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1059)
       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
       at org.apache.spark.scheduler.Task.run(Task.scala:64)
       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
       at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
       at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
       at java.lang.Thread.run(Thread.java:745)
15/04/09 18:44:36 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, 
localhost): java.lang.NullPointerException
       at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
       at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
       at org.apache.hadoop.util.Shell.run(Shell.java:379)
       at 
org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
       at 
org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
       at 
org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
       at 
org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
       at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:91)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1068)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1059)
       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
       at org.apache.spark.scheduler.Task.run(Task.scala:64)
       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
       at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
       at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
       at java.lang.Thread.run(Thread.java:745)

15/04/09 18:44:36 ERROR TaskSetManager: Task 0 in stage 1.0 failed 1 times; 
aborting job
15/04/09 18:44:36 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have 
all completed, from pool
15/04/09 18:44:36 INFO TaskSchedulerImpl: Cancelling stage 1
15/04/09 18:44:36 INFO DAGScheduler: Job 1 failed: saveAsTextFile at 
TextPreProcessing.java:49, took 0.172959 s
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to 
stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost 
task 0.0 in stage 1.0 (TID 1, localhost): java.lang.NullPointerException
       at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
       at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
       at org.apache.hadoop.util.Shell.run(Shell.java:379)
       at 
org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
       at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
       at 
org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
       at 
org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
       at 
org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
       at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
       at 
org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
       at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:91)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1068)
       at 
org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1059)
       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
       at org.apache.spark.scheduler.Task.run(Task.scala:64)
       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
       at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
       at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
       at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
       at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
       at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
       at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
       at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
       at 
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
       at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
       at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
       at scala.Option.foreach(Option.scala:236)
       at 
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693)
       at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393)
       at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
       at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


**************** CAUTION - Disclaimer *****************
This e-mail contains PRIVILEGED AND CONFIDENTIAL INFORMATION intended solely
for the use of the addressee(s). If you are not the intended recipient, please
notify the sender by e-mail and delete the original message. Further, you are 
not
to copy, disclose, or distribute this e-mail or its contents to any other 
person and
any such actions are unlawful. This e-mail may contain viruses. Infosys has 
taken
every reasonable precaution to minimize this risk, but is not liable for any 
damage
you may sustain as a result of any virus in this e-mail. You should carry out 
your
own virus checks before opening the e-mail or attachment. Infosys reserves the
right to monitor and review the content of all messages sent to or from this 
e-mail
address. Messages sent to or from this e-mail address may be stored on the
Infosys e-mail system.
***INFOSYS******** End of Disclaimer ********INFOSYS***

Reply via email to