Repository: incubator-samza Updated Branches: refs/heads/master 9dc5aa2d4 -> 2e7b91b20
SAMZA-185: Upgrade Samza to Kafka 0.8.1. Reviewed by Chris Riccomini. Project: http://git-wip-us.apache.org/repos/asf/incubator-samza/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-samza/commit/2e7b91b2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-samza/tree/2e7b91b2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-samza/diff/2e7b91b2 Branch: refs/heads/master Commit: 2e7b91b20d0660fe127778d3a1bdca44e16c1245 Parents: 9dc5aa2 Author: Martin Kleppmann <[email protected]> Authored: Mon Mar 17 21:19:32 2014 +0000 Committer: Martin Kleppmann <[email protected]> Committed: Tue Mar 18 00:28:16 2014 +0000 ---------------------------------------------------------------------- build.gradle | 34 ++++++++++++------- docs/index.md | 1 - .../0.7.0/container/state-management.md | 2 +- gradle/dependency-versions.gradle | 2 +- samza-kafka/lib/kafka_2.10-0.8.1-SNAPSHOT.jar | Bin 2856071 -> 0 bytes samza-kafka/lib/kafka_2.9.2-0.8.1-SNAPSHOT.jar | Bin 2747348 -> 0 bytes 6 files changed, 24 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/build.gradle ---------------------------------------------------------------------- diff --git a/build.gradle b/build.gradle index 31c54be..fc59626 100644 --- a/build.gradle +++ b/build.gradle @@ -8,8 +8,7 @@ buildscript { allprojects { repositories { - // Required for Kafka. Kafka's 0.8.0-beta1 Maven Central - // POM is broken. Should go away in future releases. + // Required for Kafka. maven { url 'https://repository.apache.org/content/groups/public' } @@ -62,6 +61,13 @@ project(":samza-core_$scalaVersion") { project(":samza-kafka_$scalaVersion") { apply plugin: 'scala' + configurations { + // Remove transitive dependencies from Zookeeper that we don't want. + compile.exclude group: 'javax.jms', module: 'jms' + compile.exclude group: 'com.sun.jdmk', module: 'jmxtools' + compile.exclude group: 'com.sun.jmx', module: 'jmxri' + } + dependencies { compile project(':samza-api') compile project(":samza-core_$scalaVersion") @@ -69,17 +75,13 @@ project(":samza-kafka_$scalaVersion") { compile "org.scala-lang:scala-library:$scalaLibVersion" compile "org.clapper:grizzled-slf4j_$scalaVersion:$grizzledVersion" compile "com.101tec:zkclient:$zkClientVersion" + compile "org.apache.zookeeper:zookeeper:$zookeeperVersion" compile "org.codehaus.jackson:jackson-jaxrs:$jacksonVersion" - // these can all go away when kafka is in maven - compile files("lib/kafka_$scalaVersion-" + kafkaVersion + ".jar") - compile "com.yammer.metrics:metrics-core:$metricsVersion" - compile "com.yammer.metrics:metrics-annotation:$metricsVersion" - // end these can all go away when kafka is in maven + compile "org.apache.kafka:kafka_$scalaVersion:$kafkaVersion" testCompile "junit:junit:$junitVersion" testCompile "org.mockito:mockito-all:$mockitoVersion" - // these can all go away when kafka is in maven - testCompile files("lib/kafka_$scalaVersion-$kafkaVersion-test.jar") - // end these can all go away when kafka is in maven + // TODO replace this with an official release, if possible + testCompile files("lib/kafka_$scalaVersion-0.8.1-SNAPSHOT-test.jar") // Logging in tests is good. testRuntime "org.slf4j:slf4j-simple:1.6.2" @@ -177,6 +179,13 @@ project(":samza-kv_$scalaVersion") { project(":samza-test_$scalaVersion") { apply plugin: 'scala' + configurations { + // Remove transitive dependencies from Zookeeper that we don't want. + compile.exclude group: 'javax.jms', module: 'jms' + compile.exclude group: 'com.sun.jdmk', module: 'jmxtools' + compile.exclude group: 'com.sun.jmx', module: 'jmxri' + } + dependencies { compile project(':samza-api') compile project(":samza-kv_$scalaVersion") @@ -186,8 +195,9 @@ project(":samza-test_$scalaVersion") { compile "net.sf.jopt-simple:jopt-simple:$joptSimpleVersion" compile "javax.mail:mail:1.4" compile "junit:junit:$junitVersion" - compile files("../samza-kafka/lib/kafka_$scalaVersion-" + kafkaVersion + ".jar") - testCompile files("../samza-kafka/lib/kafka_$scalaVersion-" + kafkaVersion + "-test.jar") + compile "org.apache.kafka:kafka_$scalaVersion:$kafkaVersion" + // TODO replace this with an official release, if possible + testCompile files("../samza-kafka/lib/kafka_$scalaVersion-0.8.1-SNAPSHOT-test.jar") testCompile "com.101tec:zkclient:$zkClientVersion" testCompile project(":samza-kafka_$scalaVersion") testRuntime "org.slf4j:slf4j-simple:1.6.2" http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/docs/index.md ---------------------------------------------------------------------- diff --git a/docs/index.md b/docs/index.md index 5b27e37..383f546 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,7 +20,6 @@ Check out [Hello Samza](/startup/hello-samza/0.7.0) to try Samza. Read the [Back We are just moving our code to open source. This newly open sourced version has a few limitations: - * It depends on a snapshot version of Kafka that will not officially be released for a few months. * This branch represents our trunk, not the production version at LinkedIn. This rollout is pending. * We have not yet fully implemented our plans around fault-tolerance semantics. http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/docs/learn/documentation/0.7.0/container/state-management.md ---------------------------------------------------------------------- diff --git a/docs/learn/documentation/0.7.0/container/state-management.md b/docs/learn/documentation/0.7.0/container/state-management.md index c7b25b0..aa418ff 100644 --- a/docs/learn/documentation/0.7.0/container/state-management.md +++ b/docs/learn/documentation/0.7.0/container/state-management.md @@ -133,7 +133,7 @@ However often the state that is stored is much smaller than the input stream (be The changelogs are just normal streams—other downstream tasks can subscribe to this state and use it. And it turns out that very often the most natural way to represent the output of a job is as the changelog of its task (we'll show some examples in a bit). -Of course a log of changes only grows over time so this would soon become impractical. Kafka has [log-compaction](https://cwiki.apache.org/confluence/display/KAFKA/Log+Compaction) which provides special support for this kind of use case, though. This feature allows Kafka to compact duplicate entries (i.e. multiple updates with the same key) in the log rather than just deleting old log segments. This feature is new, it is in trunk and will be released soon as part of the 0.8.1 release. +Of course a log of changes only grows over time so this would soon become impractical. Kafka has [log compaction](http://kafka.apache.org/documentation#compaction) which provides special support for this kind of use case, though. This feature allows Kafka to compact duplicate entries (i.e. multiple updates with the same key) in the log rather than just deleting old log segments. This feature is available since Kafka 0.8.1. The Kafka brokers scale well up to terabytes of data per machine for changelogs as for other topics. Log compaction proceeds at about 50MB/sec/core or whatever the I/O limits of the broker are. http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/gradle/dependency-versions.gradle ---------------------------------------------------------------------- diff --git a/gradle/dependency-versions.gradle b/gradle/dependency-versions.gradle index 5740798..612670d 100644 --- a/gradle/dependency-versions.gradle +++ b/gradle/dependency-versions.gradle @@ -6,7 +6,7 @@ ext { zkClientVersion = "0.3" zookeeperVersion = "3.3.4" metricsVersion = "2.2.0" - kafkaVersion = "0.8.1-SNAPSHOT" + kafkaVersion = "0.8.1" commonsHttpClientVersion = "3.1" leveldbVersion = "1.8" yarnVersion = "2.2.0" http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/samza-kafka/lib/kafka_2.10-0.8.1-SNAPSHOT.jar ---------------------------------------------------------------------- diff --git a/samza-kafka/lib/kafka_2.10-0.8.1-SNAPSHOT.jar b/samza-kafka/lib/kafka_2.10-0.8.1-SNAPSHOT.jar deleted file mode 100644 index 88d81b2..0000000 Binary files a/samza-kafka/lib/kafka_2.10-0.8.1-SNAPSHOT.jar and /dev/null differ http://git-wip-us.apache.org/repos/asf/incubator-samza/blob/2e7b91b2/samza-kafka/lib/kafka_2.9.2-0.8.1-SNAPSHOT.jar ---------------------------------------------------------------------- diff --git a/samza-kafka/lib/kafka_2.9.2-0.8.1-SNAPSHOT.jar b/samza-kafka/lib/kafka_2.9.2-0.8.1-SNAPSHOT.jar deleted file mode 100644 index d7c9bd6..0000000 Binary files a/samza-kafka/lib/kafka_2.9.2-0.8.1-SNAPSHOT.jar and /dev/null differ
