Re: spark streaming - saving kafka DStream into hadoop throws exception

salemi Fri, 15 Aug 2014 16:19:28 -0700

Look this is the whole program. I am not trying to serialize the JobConf.
 
def main(args: Array[String]) {
    try {
      val properties = getProperties("settings.properties")
      StreamingExamples.setStreamingLogLevels()
      val zkQuorum =  properties.get("zookeeper.list").toString()
      val topic = properties.get("topic.name").toString()
      val group = properties.get("group.name").toString()
      val threads = properties.get("consumer.threads").toString()
      val topicpMap = Map(topic -> threads.toInt)
      val hdfsNameNodeUrl = properties.get("hdfs.namenode.url").toString()
      val hdfsCheckPointUrl = hdfsNameNodeUrl +
properties.get("hdfs.checkpoint.path").toString()
      val hdfsDataUrl = hdfsNameNodeUrl +
properties.get("hdfs.data.path").toString()
      val checkPointInterval =
properties.get("spark.streaming.checkpoint.interval").toString().toInt
      val sparkConf = new SparkConf().setAppName("KafkaMessageReceiver")
      println("=======================================================")
      println("kafka configuration: zk: "+ zkQuorum +" ; topic: "+ topic +"
; group: "+ group + " ; threads:" + threads)
      println("=======================================================")
      val ssc = new StreamingContext(sparkConf, Seconds(1))
      ssc.checkpoint(hdfsCheckPointUrl)
      val dStream = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap)
      dStream.checkpoint(Seconds(checkPointInterval))
      dStream.saveAsNewAPIHadoopFiles(hdfsDataUrl, "csv", classOf[String],
classOf[String], classOf[TextOutputFormat[String,String]],
ssc.sparkContext.hadoopConfiguration)
      
      val eventData = dStream.map(_._2).map(_.split(",")).map(data =>
DataObject(data(0), data(1), data(2), data(3), data(4), data(5), data(6),
data(7), data(8).toLong, data(9), data(10), data(11), data(12).toLong,
data(13), data(14)))
      val count = eventData.filter(_.state ==
"COMPLETE").countByWindow(Minutes(15), Seconds(1))
      count.map(cnt => "the Total count of calls in complete state  in the
last 15 minutes is: " + cnt).print()
      ssc.start()
      ssc.awaitTermination()


    } catch {
      case e: Exception => println("exception caught: " + e);
    }
  }



--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/spark-streaming-saving-kafka-DStream-into-hadoop-throws-exception-tp12202p12207.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Re: spark streaming - saving kafka DStream into hadoop throws exception

Reply via email to