This is an automated email from the ASF dual-hosted git repository. mergebot-role pushed a commit to branch mergebot in repository https://gitbox.apache.org/repos/asf/beam-site.git
commit 2cf192c9cd2c8642a3367f0150d860740ede5258 Author: basti.lj <basti...@alibaba-inc.com> AuthorDate: Tue Aug 29 12:34:29 2017 +0800 [BEAM-2666] Add JStorm runner to Beam asf-site. --- src/_data/capability-matrix.yml | 114 ++++++++++++++++++++++++++++++++++++ src/contribute/work-in-progress.md | 2 +- src/documentation/runners/jstorm.md | 101 ++++++++++++++++++++++++++++++++ src/images/logos/runners/jstorm.png | Bin 0 -> 6375 bytes 4 files changed, 216 insertions(+), 1 deletion(-) diff --git a/src/_data/capability-matrix.yml b/src/_data/capability-matrix.yml index b0ea35a..95aaaa9 100644 --- a/src/_data/capability-matrix.yml +++ b/src/_data/capability-matrix.yml @@ -13,6 +13,8 @@ columns: name: Apache Gearpump - class: mapreduce name: Apache Hadoop MapReduce + - class: jstorm + name: JStorm categories: - description: What is being computed? @@ -52,6 +54,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: GroupByKey values: - class: model @@ -82,6 +88,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Flatten values: - class: model @@ -112,6 +122,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Combine values: - class: model @@ -142,6 +156,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Composite Transforms values: - class: model @@ -172,6 +190,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Side Inputs values: - class: model @@ -202,6 +224,10 @@ categories: l1: 'Yes' l2: fully supported l3: '' + - class: jstorm + l1: 'Yes' + l2: some size restrictions + l3: '' - name: Source API values: - class: model @@ -232,6 +258,10 @@ categories: l1: 'Partially' l2: bounded source only l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Splittable DoFn values: - class: model @@ -262,6 +292,10 @@ categories: l1: 'No' l2: not implemented l3: + - class: jstorm + l1: 'No' + l2: not implemented + l3: - name: Metrics values: - class: model @@ -292,6 +326,10 @@ categories: l1: 'Partially' l2: Only attempted counters are supported l3: '' + - class: jstorm + l1: 'Partially' + l2: Metrics are only supported in local mode. + l3: '' - name: Stateful Processing values: - class: model @@ -322,6 +360,10 @@ categories: l1: 'Partially' l2: non-merging windows l3: '' + - class: jstorm + l1: 'Partially' + l2: non-merging windows + l3: '' - description: Where in event time? anchor: where color-b: '37d' @@ -359,6 +401,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Fixed windows values: - class: model @@ -389,6 +435,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Sliding windows values: - class: model @@ -419,6 +469,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Session windows values: - class: model @@ -449,6 +503,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Custom windows values: - class: model @@ -479,6 +537,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Custom merging windows values: - class: model @@ -509,6 +571,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - name: Timestamp control values: - class: model @@ -539,6 +605,10 @@ categories: l1: 'Yes' l2: supported l3: '' + - class: jstorm + l1: 'Yes' + l2: supported + l3: '' - description: When in processing time? @@ -579,6 +649,10 @@ categories: l1: 'No' l2: batch-only runner l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Event-time triggers values: @@ -610,6 +684,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Processing-time triggers values: @@ -641,6 +719,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Count triggers values: @@ -672,6 +754,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: '[Meta]data driven triggers' values: @@ -704,6 +790,10 @@ categories: l1: 'No' l2: '' l3: + - class: jstorm + l1: 'No' + l2: pending model support + l3: - name: Composite triggers values: @@ -735,6 +825,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Allowed lateness values: @@ -766,6 +860,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Timers values: @@ -797,6 +895,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Partially' + l2: non-merging windows + l3: '' - description: How do refinements relate? anchor: how @@ -836,6 +938,10 @@ categories: l1: 'No' l2: batch-only runner l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: Accumulating values: @@ -867,6 +973,10 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'Yes' + l2: fully supported + l3: '' - name: 'Accumulating & Retracting' values: @@ -899,3 +1009,7 @@ categories: l1: 'No' l2: '' l3: '' + - class: jstorm + l1: 'No' + l2: pending model support + l3: '' diff --git a/src/contribute/work-in-progress.md b/src/contribute/work-in-progress.md index e35965b..a2572af 100644 --- a/src/contribute/work-in-progress.md +++ b/src/contribute/work-in-progress.md @@ -25,7 +25,7 @@ Current branches include: | Feature | Branch | JIRA Component | More Info | | ---- | ---- | ---- | ---- | | Apache Spark 2.0 Runner | [runners-spark2](https://github.com/apache/beam/tree/runners-spark2) | - | [thread](https://lists.apache.org/thread.html/e38ac4e4914a6cb1b865b1f32a6ca06c2be28ea4aa0f6b18393de66f@%3Cdev.beam.apache.org%3E) | -| JStorm Runner | [jstorm-runner](https://github.com/apache/beam/tree/jstorm-runner) | [runner-jstorm](https://issues.apache.org/jira/browse/BEAM/component/12332477) | [BEAM-1899](https://issues.apache.org/jira/browse/BEAM-1899) | +| [JStorm Runner]({{ site.baseurl }}/documentation/runners/jstorm) | [jstorm-runner](https://github.com/apache/beam/tree/jstorm-runner) | [runner-jstorm](https://issues.apache.org/jira/browse/BEAM/component/12332477) | [BEAM-1899](https://issues.apache.org/jira/browse/BEAM-1899) | | MapReduce Runner | [mr-runner](https://github.com/apache/beam/tree/mr-runner) | [runner-mapreduce](https://issues.apache.org/jira/browse/BEAM/component/12333013) | [BEAM-165](https://issues.apache.org/jira/browse/BEAM-165) | | Tez Runner | [tez-runner](https://github.com/apache/beam/tree/tez-runner) | [runner-tez](https://issues.apache.org/jira/browse/BEAM/component/12333014) | [BEAM-2709](https://issues.apache.org/jira/browse/BEAM-2709) | | Beam SQL DSL | [DSL_SQL](https://github.com/apache/beam/tree/DSL_SQL) | [dsl-sql](https://issues.apache.org/jira/browse/BEAM/component/12332480) | [BEAM-301](https://issues.apache.org/jira/browse/BEAM-301) | diff --git a/src/documentation/runners/jstorm.md b/src/documentation/runners/jstorm.md new file mode 100644 index 0000000..20e9322 --- /dev/null +++ b/src/documentation/runners/jstorm.md @@ -0,0 +1,101 @@ +--- +layout: default +title: "JStorm Runner" +permalink: /documentation/runners/jstorm/ +--- +# Using the JStorm Runner + +The JStorm Runner can be used to execute Beam pipelines using [JStorm](http://jstorm.io/), while providing: + +* High throughput and low latency. +* At-least-once and exactly-once fault tolerance. + +Like a native JStorm topology, users can execute Beam topology with local mode, standalone cluster or jstorm-on-yarn cluster. + +The [Beam Capability Matrix]({{ site.baseurl }}/documentation/runners/capability-matrix/) documents the currently supported capabilities of the JStorm Runner. + +## JStorm Runner prerequisites and setup + +The JStorm runner currently supports JStorm version 2.5.0-SNAPSHOT. + +You can add a dependency on the latest version of the JStorm runner by adding the following to your pom.xml: +```java +<dependency> + <groupId>org.apache.beam</groupId> + <artifactId>beam-runners-jstorm</artifactId> + <version>{{ site.release_latest }}</version> +</dependency> +``` + +### Deploying JStorm with your application + +To run against a Standalone cluster, you can package your program with all Beam dependencies into a fat jar, and then submit the topology with the following command. +``` +jstorm jar WordCount.jar org.apache.beam.examples.WordCount --runner=org.apache.beam.runners.jstorm.JStormRunner +``` + +If you don't want to package a fat jar, you can upload the Beam dependencies onto all cluster nodes(`$JSTORM_HOME/lib/ext/beam`) first. +When you submit a topology with argument `"--external-libs beam"`, JStorm will load the Beam dependencies automatically. +``` +jstorm jar WordCount.jar org.apache.beam.examples.WordCount --external-libs beam --runner=org.apache.beam.runners.jstorm.JStormRunner +``` + +To learn about deploying a JStorm cluster, please refer to [JStorm cluster deploy](http://jstorm.io/QuickStart/Deploy/index.html) + +## Pipeline options for the JStorm Runner + +When executing your pipeline with the JStorm Runner, you should consider the following pipeline options. + +<table class="table table-bordered"> +<tr> + <th>Field</th> + <th>Description</th> + <th>Default Value</th> +</tr> +<tr> + <td><code>runner</code></td> + <td>The pipeline runner to use. This option allows you to determine the pipeline runner at runtime.</td> + <td>Set to <code>JStormRunner</code> to run using JStorm.</td> +</tr> +<tr> + <td><code>topologyConfig</code></td> + <td>System topology config of JStorm</td> + <td>DefaultMapValueFactory.class</td> +</tr> +<tr> + <td><code>workerNumber</code></td> + <td>Worker number of topology</td> + <td>1</td> +</tr> +<tr> + <td><code>parallelism</code></td> + <td>Global parallelism number of a component</td> + <td>1</td> +</tr> +<tr> + <td><code>parallelismMap</code></td> + <td>Parallelism number of a specified composite PTransform</td> + <td>DefaultMapValueFactory.class</td> +</tr> +<tr> + <td><code>exactlyOnceTopology</code></td> + <td>Indicate if it is an exactly once topology</td> + <td>false</td> +</tr> +<tr> + <td><code>localMode</code></td> + <td>Indicate if the topology is running on local machine or distributed cluster</td> + <td>false</td> +</tr> +<tr> + <td><code>localModeExecuteTimeSec</code></td> + <td>Executing time(sec) of topology on local mode.</td> + <td>60</td> +</tr> +</table> + +## Additional notes + +### Monitoring your job +You can monitor your job with the JStorm UI, which displays all JStorm system metrics and Beam metrics. +For testing on local mode, you can retreive the Beam metrics with the metrics method of PipelineResult. diff --git a/src/images/logos/runners/jstorm.png b/src/images/logos/runners/jstorm.png new file mode 100644 index 0000000..c0435db Binary files /dev/null and b/src/images/logos/runners/jstorm.png differ -- To stop receiving notification emails like this one, please contact "commits@beam.apache.org" <commits@beam.apache.org>.