spark git commit: [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream
Repository: spark Updated Branches: refs/heads/branch-1.3 01905c41e -> 281614d7c [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream Changes - Added example - Added a critical unit test that verifies that offset ranges can be recovered through checkpoints Might add more changes. Author: Tathagata Das Closes #4384 from tdas/new-kafka-fixes and squashes the following commits: 7c931c3 [Tathagata Das] Small update 3ed9284 [Tathagata Das] updated scala doc 83d0402 [Tathagata Das] Added JavaDirectKafkaWordCount example. 26df23c [Tathagata Das] Updates based on PR comments from Cody e4abf69 [Tathagata Das] Scala doc improvements and stuff. bb65232 [Tathagata Das] Fixed test bug and refactored KafkaStreamSuite 50f2b56 [Tathagata Das] Added Java API and added more Scala and Java unit tests. Also updated docs. e73589c [Tathagata Das] Minor changes. 4986784 [Tathagata Das] Added unit test to kafka offset recovery 6a91cab [Tathagata Das] Added example (cherry picked from commit c15134632e74e3dee05eda20c6ef79915e15d02e) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/281614d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/281614d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/281614d7 Branch: refs/heads/branch-1.3 Commit: 281614d7cd7870dc8c140c08f15902109658360d Parents: 01905c4 Author: Tathagata Das Authored: Mon Feb 9 22:45:48 2015 -0800 Committer: Tathagata Das Committed: Mon Feb 9 22:47:09 2015 -0800 -- .../streaming/JavaDirectKafkaWordCount.java | 113 ++ .../streaming/DirectKafkaWordCount.scala| 71 .../kafka/DirectKafkaInputDStream.scala | 5 +- .../spark/streaming/kafka/KafkaCluster.scala| 3 + .../apache/spark/streaming/kafka/KafkaRDD.scala | 12 +- .../streaming/kafka/KafkaRDDPartition.scala | 23 +- .../spark/streaming/kafka/KafkaUtils.scala | 353 ++- .../apache/spark/streaming/kafka/Leader.scala | 21 +- .../spark/streaming/kafka/OffsetRange.scala | 53 ++- .../kafka/JavaDirectKafkaStreamSuite.java | 159 + .../streaming/kafka/JavaKafkaStreamSuite.java | 5 +- .../kafka/DirectKafkaStreamSuite.scala | 302 .../streaming/kafka/KafkaClusterSuite.scala | 24 +- .../kafka/KafkaDirectStreamSuite.scala | 92 - .../spark/streaming/kafka/KafkaRDDSuite.scala | 8 +- .../streaming/kafka/KafkaStreamSuite.scala | 62 ++-- .../kafka/ReliableKafkaStreamSuite.scala| 4 +- 17 files changed, 1048 insertions(+), 262 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/281614d7/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java -- diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java new file mode 100644 index 000..bab9f24 --- /dev/null +++ b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.streaming; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Arrays; +import java.util.regex.Pattern; + +import scala.Tuple2; + +import com.google.common.collect.Lists; +import kafka.serializer.StringDecoder; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.streaming.api.java.*; +import org.apache.spark.streaming.kafka.KafkaUtils; +import org.apache.spark.streaming.Durations; + +/** + * Consumes messages from one or more topics in Kafka and does wordcount. + * Usage: DirectKafkaWordCount + *is a list of one or more Kafka brokers + *is a list of one or more kafka topics to consume from + * + * Example: +
spark git commit: [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream
Repository: spark Updated Branches: refs/heads/master ef2f55b97 -> c15134632 [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream Changes - Added example - Added a critical unit test that verifies that offset ranges can be recovered through checkpoints Might add more changes. Author: Tathagata Das Closes #4384 from tdas/new-kafka-fixes and squashes the following commits: 7c931c3 [Tathagata Das] Small update 3ed9284 [Tathagata Das] updated scala doc 83d0402 [Tathagata Das] Added JavaDirectKafkaWordCount example. 26df23c [Tathagata Das] Updates based on PR comments from Cody e4abf69 [Tathagata Das] Scala doc improvements and stuff. bb65232 [Tathagata Das] Fixed test bug and refactored KafkaStreamSuite 50f2b56 [Tathagata Das] Added Java API and added more Scala and Java unit tests. Also updated docs. e73589c [Tathagata Das] Minor changes. 4986784 [Tathagata Das] Added unit test to kafka offset recovery 6a91cab [Tathagata Das] Added example Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1513463 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1513463 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1513463 Branch: refs/heads/master Commit: c15134632e74e3dee05eda20c6ef79915e15d02e Parents: ef2f55b Author: Tathagata Das Authored: Mon Feb 9 22:45:48 2015 -0800 Committer: Tathagata Das Committed: Mon Feb 9 22:45:48 2015 -0800 -- .../streaming/JavaDirectKafkaWordCount.java | 113 ++ .../streaming/DirectKafkaWordCount.scala| 71 .../kafka/DirectKafkaInputDStream.scala | 5 +- .../spark/streaming/kafka/KafkaCluster.scala| 3 + .../apache/spark/streaming/kafka/KafkaRDD.scala | 12 +- .../streaming/kafka/KafkaRDDPartition.scala | 23 +- .../spark/streaming/kafka/KafkaUtils.scala | 353 ++- .../apache/spark/streaming/kafka/Leader.scala | 21 +- .../spark/streaming/kafka/OffsetRange.scala | 53 ++- .../kafka/JavaDirectKafkaStreamSuite.java | 159 + .../streaming/kafka/JavaKafkaStreamSuite.java | 5 +- .../kafka/DirectKafkaStreamSuite.scala | 302 .../streaming/kafka/KafkaClusterSuite.scala | 24 +- .../kafka/KafkaDirectStreamSuite.scala | 92 - .../spark/streaming/kafka/KafkaRDDSuite.scala | 8 +- .../streaming/kafka/KafkaStreamSuite.scala | 62 ++-- .../kafka/ReliableKafkaStreamSuite.scala| 4 +- 17 files changed, 1048 insertions(+), 262 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c1513463/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java -- diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java new file mode 100644 index 000..bab9f24 --- /dev/null +++ b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.streaming; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Arrays; +import java.util.regex.Pattern; + +import scala.Tuple2; + +import com.google.common.collect.Lists; +import kafka.serializer.StringDecoder; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.streaming.api.java.*; +import org.apache.spark.streaming.kafka.KafkaUtils; +import org.apache.spark.streaming.Durations; + +/** + * Consumes messages from one or more topics in Kafka and does wordcount. + * Usage: DirectKafkaWordCount + *is a list of one or more Kafka brokers + *is a list of one or more kafka topics to consume from + * + * Example: + *$ bin/run-example streaming.KafkaWordCount broker1-host:port,broker2-host:port topic1,topic2 + */ + +