Look this is the whole program. I am not trying to serialize the JobConf.
def main(args: Array[String]) {
try {
val properties = getProperties("settings.properties")
StreamingExamples.setStreamingLogLevels()
val zkQuorum = properties.get("zookeeper.list").toString()
val topic = properties.get("topic.name").toString()
val group = properties.get("group.name").toString()
val threads = properties.get("consumer.threads").toString()
val topicpMap = Map(topic -> threads.toInt)
val hdfsNameNodeUrl = properties.get("hdfs.namenode.url").toString()
val hdfsCheckPointUrl = hdfsNameNodeUrl +
properties.get("hdfs.checkpoint.path").toString()
val hdfsDataUrl = hdfsNameNodeUrl +
properties.get("hdfs.data.path").toString()
val checkPointInterval =
properties.get("spark.streaming.checkpoint.interval").toString().toInt
val sparkConf = new SparkConf().setAppName("KafkaMessageReceiver")
println("=======================================================")
println("kafka configuration: zk: "+ zkQuorum +" ; topic: "+ topic +"
; group: "+ group + " ; threads:" + threads)
println("=======================================================")
val ssc = new StreamingContext(sparkConf, Seconds(1))
ssc.checkpoint(hdfsCheckPointUrl)
val dStream = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap)
dStream.checkpoint(Seconds(checkPointInterval))
dStream.saveAsNewAPIHadoopFiles(hdfsDataUrl, "csv", classOf[String],
classOf[String], classOf[TextOutputFormat[String,String]],
ssc.sparkContext.hadoopConfiguration)
val eventData = dStream.map(_._2).map(_.split(",")).map(data =>
DataObject(data(0), data(1), data(2), data(3), data(4), data(5), data(6),
data(7), data(8).toLong, data(9), data(10), data(11), data(12).toLong,
data(13), data(14)))
val count = eventData.filter(_.state ==
"COMPLETE").countByWindow(Minutes(15), Seconds(1))
count.map(cnt => "the Total count of calls in complete state in the
last 15 minutes is: " + cnt).print()
ssc.start()
ssc.awaitTermination()
} catch {
case e: Exception => println("exception caught: " + e);
}
}
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/spark-streaming-saving-kafka-DStream-into-hadoop-throws-exception-tp12202p12207.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]