object StreamJob { val conf = new SparkConf val sc = new SparkContext(conf)
def main(args:Array[String]) { val baseRDD = sc.parallelize(Array("hi","hai","hi","bye","bye","hi","hai","hi","bye","bye")) val words = baseRDD.flatMap(line => line.split(",")) val wordPairs = words.map(word => (word,1)) val reducedWords = wordPairs.reduceByKey((a,b) => a+b) reducedWords.print } } Please try to run this code in cluster mode, possibly YARN mode and the spark version is 1.3.0 This has thrown java.io.IOException: org.apache.spark.SparkException: Failed to get broadcast_5_piece0 of broadcast_5 Regards, Padma Ch On Thu, Sep 24, 2015 at 11:25 AM, Akhil Das <ak...@sigmoidanalytics.com> wrote: > It should, i don't see any reason for it to not run in cluster mode. > > Thanks > Best Regards > > On Wed, Sep 23, 2015 at 8:56 PM, Priya Ch <learnings.chitt...@gmail.com> > wrote: > >> does it run in cluster mode ??? >> >> On Wed, Sep 23, 2015 at 7:11 PM, Akhil Das <ak...@sigmoidanalytics.com> >> wrote: >> >>> Yes of course it works. >>> >>> [image: Inline image 1] >>> >>> Thanks >>> Best Regards >>> >>> On Tue, Sep 22, 2015 at 4:53 PM, Priya Ch <learnings.chitt...@gmail.com> >>> wrote: >>> >>>> Parallelzing some collection (array of strings). Infact in our product >>>> we are reading data from kafka using KafkaUtils.createStream and applying >>>> some transformations. >>>> >>>> Is creating sparContext at object level instead of creating in main >>>> doesn't work ???? >>>> >>>> On Tue, Sep 22, 2015 at 2:59 PM, Akhil Das <ak...@sigmoidanalytics.com> >>>> wrote: >>>> >>>>> Its a "value" not a variable, and what are you parallelizing here? >>>>> >>>>> Thanks >>>>> Best Regards >>>>> >>>>> On Fri, Sep 18, 2015 at 11:21 PM, Priya Ch < >>>>> learnings.chitt...@gmail.com> wrote: >>>>> >>>>>> Hello All, >>>>>> >>>>>> Instead of declaring sparkContext in main, declared as object >>>>>> variable as - >>>>>> >>>>>> object sparkDemo >>>>>> { >>>>>> >>>>>> val conf = new SparkConf >>>>>> val sc = new SparkContext(conf) >>>>>> >>>>>> def main(args:Array[String]) >>>>>> { >>>>>> >>>>>> val baseRdd = sc.parallelize() >>>>>> . >>>>>> . >>>>>> . >>>>>> } >>>>>> >>>>>> } >>>>>> >>>>>> But this piece of code is giving : >>>>>> java.io.IOException: org.apache.spark.SparkException: Failed to get >>>>>> broadcast_5_piece0 of broadcast_5 >>>>>> at >>>>>> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1155) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:164) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:64) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:64) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:87) >>>>>> at >>>>>> org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) >>>>>> at >>>>>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:58) >>>>>> at org.apache.spark.scheduler.Task.run(Task.scala:64) >>>>>> at >>>>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) >>>>>> at >>>>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >>>>>> at >>>>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >>>>>> at java.lang.Thread.run(Thread.java:745) >>>>>> Caused by: org.apache.spark.SparkException: Failed to get >>>>>> broadcast_5_piece0 of broadcast_5 >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$2.apply(TorrentBroadcast.scala:137) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1$$anonfun$2.apply(TorrentBroadcast.scala:137) >>>>>> at scala.Option.getOrElse(Option.scala:120) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply$mcVI$sp(TorrentBroadcast.scala:136) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:119) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:119) >>>>>> at scala.collection.immutable.List.foreach(List.scala:318) >>>>>> at org.apache.spark.broadcast.TorrentBroadcast.org >>>>>> <http://org.apache.spark.broadcast.torrentbroadcast.org/> >>>>>> $apache$spark$broadcast$TorrentBroadcast$$readBlocks(TorrentBroadcast.scala:119) >>>>>> at >>>>>> org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:174) >>>>>> at >>>>>> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1152) >>>>>> >>>>>> Why should't we declare sc as object variable ??? >>>>>> >>>>>> Regards, >>>>>> Padma Ch >>>>>> >>>>> >>>>> >>>> >>> >> >