svn commit: r1618097 [45/48] - in /incubator/samza/site: ./ archive/ community/ contribute/ css/ fonts/ img/latest/ img/latest/learn/ img/latest/learn/documentation/ img/latest/learn/documentation/comparisons/ img/latest/learn/documentation/container/ ...

yanfang Thu, 14 Aug 2014 22:30:49 -0700

Added: 
incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html?rev=1618097&view=auto
==============================================================================
--- 
incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html 
(added)
+++ 
incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,1157 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html>
+    <head>
+        <meta charset="utf-8">
+        <title>Samza Configuration Reference</title>
+        <style type="text/css">
+            body {
+                font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
+                font-size: 14px;
+                line-height: 22px;
+                color: #333;
+                background-color: #fff;
+            }
+
+            table {
+                border-collapse: collapse;
+                margin: 1em 0;
+            }
+
+            table th, table td {
+                text-align: left;
+                vertical-align: top;
+                padding: 12px;
+                border-bottom: 1px solid #ccc;
+                border-top: 1px solid #ccc;
+                border-left: 0;
+                border-right: 0;
+            }
+
+            table td.property, table td.default {
+                white-space: nowrap;
+            }
+
+            table th.section {
+                background-color: #eee;
+            }
+
+            table th.section .subtitle {
+                font-weight: normal;
+            }
+
+            code, a.property {
+                font-family: monospace;
+            }
+
+            span.system, span.stream, span.store, span.serde, span.rewriter, 
span.listener, span.reporter {
+                padding: 1px;
+                margin: 1px;
+                border-width: 1px;
+                border-style: solid;
+                border-radius: 4px;
+            }
+
+            span.system {
+                background-color: #ddf;
+                border-color: #bbd;
+            }
+
+            span.stream {
+                background-color: #dfd;
+                border-color: #bdb;
+            }
+
+            span.store {
+                background-color: #fdf;
+                border-color: #dbd;
+            }
+
+            span.serde {
+                background-color: #fdd;
+                border-color: #dbb;
+            }
+
+            span.rewriter {
+                background-color: #eee;
+                border-color: #ccc;
+            }
+
+            span.listener {
+                background-color: #ffd;
+                border-color: #ddb;
+            }
+
+            span.reporter {
+                background-color: #dff;
+                border-color: #bdd;
+            }
+        </style>
+    </head>
+
+    <body>
+        <h1>Samza Configuration Reference</h1>
+        <p>The following table lists all the standard properties that can be 
included in a Samza job configuration file.</p>
+        <p>Words highlighted like <span class="system">this</span> are 
placeholders for your own variable names.</p>
+        <table>
+            <tbody>
+                <tr><th>Name</th><th>Default</th><th>Description</th></tr>
+                <tr>
+                    <th colspan="3" class="section" id="job"><a 
href="configuration.html">Samza job configuration</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-factory-class">job.factory.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required:</strong> The <a 
href="job-runner.html">job factory</a> to use for running this job.
+                        The value is a fully-qualified Java classname, which 
must implement
+                        <a 
href="../api/javadocs/org/apache/samza/job/StreamJobFactory.html">StreamJobFactory</a>.
+                        Samza ships with two implementations:
+                        <dl>
+                            
<dt><code>org.apache.samza.job.local.ThreadJobFactory</code></dt>
+                            <dd>Runs your job on your local machine using 
threads. This is intended only for
+                                development, not for production 
deployments.</dd>
+                            
<dt><code>org.apache.samza.job.local.ProcessJobFactory</code></dt>
+                            <dd>Runs your job on your local machine as a 
subprocess. An optional command builder
+                                property can also be specified (see <a 
href="#task-command-class" class="property">
+                                    task.command.class</a> for details). This 
is intended only for development,
+                                not for production deployments.</dd>
+                            
<dt><code>org.apache.samza.job.yarn.YarnJobFactory</code></dt>
+                            <dd>Runs your job on a YARN grid. See <a 
href="#yarn">below</a> for YARN-specific configuration.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="job-name">job.name</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required:</strong> The name of your job. This 
name appears on the Samza dashboard, and it
+                        is used to tell apart this job's checkpoints from 
other jobs' checkpoints.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="job-id">job.id</td>
+                    <td class="default">1</td>
+                    <td class="description">
+                        If you run several instances of your job at the same 
time, you need to give each execution a
+                        different <code>job.id</code>. This is important, 
since otherwise the jobs will overwrite each
+                        others' checkpoints, and perhaps interfere with each 
other in other ways.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-config-rewriter-class">job.config.rewriter.<br><span 
class="rewriter">rewriter-name</span>.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        You can optionally define configuration rewriters, 
which have the opportunity to dynamically
+                        modify the job configuration before the job is 
started. For example, this can be useful for
+                        pulling configuration from an external configuration 
management system, or for determining
+                        the set of input streams dynamically at runtime. The 
value of this property is a
+                        fully-qualified Java classname which must implement
+                        <a 
href="../api/javadocs/org/apache/samza/config/ConfigRewriter.html">ConfigRewriter</a>.
+                        Samza ships with one rewriter by default:
+                        <dl>
+                            
<dt><code>org.apache.samza.config.RegExTopicGenerator</code></dt>
+                            <dd>When consuming from Kafka, this allows you to 
consume all Kafka topics that match
+                                some regular expression (rather than having to 
list each topic explicitly).
+                                This rewriter has <a 
href="#regex-rewriter">additional configuration</a>.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-config-rewriters">job.config.rewriters</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If you have defined configuration rewriters, you need 
to list them here, in the order in
+                        which they should be applied. The value of this 
property is a comma-separated list of
+                        <span class="rewriter">rewriter-name</span> tokens.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="task"><a 
href="../api/overview.html">Task configuration</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" id="task-class">task.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required:</strong> The fully-qualified name of 
the Java class which processes
+                        incoming messages from input streams. The class must 
implement
+                        <a 
href="../api/javadocs/org/apache/samza/task/StreamTask.html">StreamTask</a>, 
and may optionally implement
+                        <a 
href="../api/javadocs/org/apache/samza/task/InitableTask.html">InitableTask</a>,
+                        <a 
href="../api/javadocs/org/apache/samza/task/ClosableTask.html">ClosableTask</a> 
and/or
+                        <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>.
+                        The class will be instantiated several times, once for 
every
+                        <a 
href="../container/samza-container.html#tasks-and-partitions">input stream 
partition</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="task-inputs">task.inputs</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required:</strong> A comma-separated list of 
streams that are consumed by this job.
+                        Each stream is given in the format
+                        <span class="system">system-name</span>.<span 
class="stream">stream-name</span>.
+                        For example, if you have one input system called 
<code>my-kafka</code>, and want to consume two
+                        Kafka topics called <code>PageViewEvent</code> and 
<code>UserActivityEvent</code>, then you would set
+                        <code>task.inputs=my-kafka.PageViewEvent, 
my-kafka.UserActivityEvent</code>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-window-ms">task.window.ms</td>
+                    <td class="default">-1</td>
+                    <td class="description">
+                        If <a href="#task-class" 
class="property">task.class</a> implements
+                        <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>,
 it can
+                        receive a <a 
href="../container/windowing.html">windowing callback</a> in regular intervals.
+                        This property specifies the time between window() 
calls, in milliseconds. If the number is
+                        negative (the default), window() is never called. Note 
that Samza is
+                        <a 
href="../container/event-loop.html">single-threaded</a>, so a window() call 
will never
+                        occur concurrently with the processing of a message. 
If a message is being processed at the
+                        time when a window() call is due, the window() call 
occurs after the processing of the current
+                        message has completed.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-checkpoint-factory">task.checkpoint.factory</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        To enable <a 
href="../container/checkpointing.html">checkpointing</a>, you must set
+                        this property to the fully-qualified name of a Java 
class that implements
+                        <a 
href="../api/javadocs/org/apache/samza/checkpoint/CheckpointManagerFactory.html">CheckpointManagerFactory</a>.
+                        This is not required, but recommended for most jobs. 
If you don't configure checkpointing,
+                        and a job or container restarts, it does not remember 
which messages it has already processed.
+                        Without checkpointing, consumer behavior is determined 
by the
+                        <a href="#systems-samza-offset-default" 
class="property">...samza.offset.default</a>
+                        setting, which by default skips any messages that were 
published while the container was
+                        restarting. Checkpointing allows a job to start up 
where it previously left off.
+                        Samza ships with two checkpoint managers by default:
+                        <dl>
+                            
<dt><code>org.apache.samza.checkpoint.file.FileSystemCheckpointManagerFactory</code></dt>
+                            <dd>Writes checkpoints to files on the local 
filesystem. You can configure the file path
+                                with the <a href="#task-checkpoint-path" 
class="property">task.checkpoint.path</a>
+                                property. This is a simple option if your job 
always runs on the same machine.
+                                On a multi-machine cluster, this would require 
a network filesystem mount.</dd>
+                            
<dt><code>org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory</code></dt>
+                            <dd>Writes checkpoints to a dedicated topic on a 
Kafka cluster. This is the recommended
+                                option if you are already using Kafka for 
input or output streams. Use the
+                                <a href="#task-checkpoint-system" 
class="property">task.checkpoint.system</a>
+                                property to configure which Kafka cluster to 
use for checkpoints.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-commit-ms">task.commit.ms</td>
+                    <td class="default">60000</td>
+                    <td class="description">
+                        If <a href="#task-checkpoint-factory" 
class="property">task.checkpoint.factory</a> is
+                        configured, this property determines how often a 
checkpoint is written. The value is
+                        the time between checkpoints, in milliseconds. The 
frequency of checkpointing affects
+                        failure recovery: if a container fails unexpectedly 
(e.g. due to crash or machine failure)
+                        and is restarted, it resumes processing at the last 
checkpoint. Any messages processed
+                        since the last checkpoint on the failed container are 
processed again. Checkpointing
+                        more frequently reduces the number of messages that 
may be processed twice, but also
+                        uses more resources.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-command-class">task.command.class</td>
+                    <td 
class="default">org.apache.samza.job.<br>ShellCommandBuilder</td>
+                    <td class="description">
+                        The fully-qualified name of the Java class which 
determines the command line and environment
+                        variables for a <a 
href="../container/samza-container.html">container</a>. It must be a subclass of
+                        <a 
href="../api/javadocs/org/apache/samza/job/CommandBuilder.html">CommandBuilder</a>.
+                        This defaults to 
<code>task.command.class=org.apache.samza.job.ShellCommandBuilder</code>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="task-opts">task.opts</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Any JVM options to include in the command line when 
executing Samza containers. For example,
+                        this can be used to set the JVM heap size, to tune the 
garbage collector, or to enable
+                        <a 
href="/learn/tutorials/{{site.version}}/remote-debugging-samza.html">remote 
debugging</a>. Note
+                        there are some issues with the current implementation 
of <code>task.opts</code>:
+                        <ul>
+                            <li>If you set this property, the log 
configuration is disrupted. Please see
+                            <a 
href="https://issues.apache.org/jira/browse/SAMZA-109";>SAMZA-109</a> for a 
workaround.</li>
+                            <li>This cannot be used when running with 
<code>ThreadJobFactory</code></li>
+                        </ul>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="task-execute">task.execute</td>
+                    <td class="default">bin/run-container.sh</td>
+                    <td class="description">
+                        The command that starts a Samza container. The script 
must be included in the
+                        <a href="packaging.html">job package</a>. There is 
usually no need to customize this.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-chooser-class">task.chooser.class</td>
+                    <td 
class="default">org.apache.samza.<br>system.chooser.<br>RoundRobinChooserFactory</td>
+                    <td class="description">
+                        This property can be optionally set to override the 
default
+                        <a 
href="../container/streams.html#messagechooser">message chooser</a>, which 
determines the
+                        order in which messages from multiple input streams 
are processed. The value of this
+                        property is the fully-qualified name of a Java class 
that implements
+                        <a 
href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-lifecycle-listener-class">task.lifecycle.listener.<br><span 
class="listener">listener-name</span>.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Use this property to register a
+                        <a 
href="../container/event-loop.html#lifecycle-listeners">lifecycle listener</a>, 
which can receive
+                        a notification when a container starts up or shuts 
down, or when a message is processed.
+                        The value is the fully-qualified name of a Java class 
that implements
+                        <a 
href="../api/javadocs/org/apache/samza/task/TaskLifecycleListenerFactory.html">TaskLifecycleListenerFactory</a>.
+                        You can define multiple lifecycle listeners, each with 
a different <span class="listener">listener-name</span>,
+                        and reference them in <a 
href="#task-lifecycle-listeners" class="property">task.lifecycle.listeners</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-lifecycle-listeners">task.lifecycle.listeners</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If you have defined <a 
href="../container/event-loop.html#lifecycle-listeners">lifecycle listeners</a> 
with
+                        <a href="#task-lifecycle-listener-class" 
class="property">task.lifecycle.listener.*.class</a>,
+                        you need to list them here in order to enable them. 
The value of this property is a
+                        comma-separated list of <span 
class="listener">listener-name</span> tokens.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-drop-deserialization-errors">task.drop.deserialization.errors</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        This property is to define how the system deals with 
deserialization failure situation. If set to true, the system will
+                        skip the error messages and keep running. If set to 
false, the system with throw exceptions and fail the container. Default 
+                        is false.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-drop-serialization-errors">task.drop.serialization.errors</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        This property is to define how the system deals with 
serialization failure situation. If set to true, the system will
+                        drop the error messages and keep running. If set to 
false, the system with throw exceptions and fail the container. Default 
+                        is false.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-poll-interval-ms">task.poll.interval.ms</td>
+                    <td class="default"></td>
+                    <td class="description">
+                      Samza's container polls for more messages under two 
conditions. The first condition arises when there are simply no remaining 
+                      buffered messages to process for any input 
SystemStreamPartition. The second condition arises when some input 
+                      SystemStreamPartitions have empty buffers, but some do 
not. In the latter case, a polling interval is defined to determine how
+                      often to refresh the empty SystemStreamPartition 
buffers. By default, this interval is 50ms, which means that any empty 
+                      SystemStreamPartition buffer will be refreshed at least 
every 50ms. A higher value here means that empty SystemStreamPartitions 
+                      will be refreshed less often, which means more latency 
is introduced, but less CPU and network will be used. Decreasing this 
+                      value means that empty SystemStreamPartitions are 
refreshed more frequently, thereby introducing less latency, but increasing 
+                      CPU and network utilization.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="streams"><a 
href="../container/streams.html">Systems (input and output streams)</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-factory">systems.<span 
class="system">system-name</span>.<br>samza.factory</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required:</strong> The fully-qualified name of 
a Java class which provides a
+                        <em>system</em>. A system can provide input streams 
which you can consume in your Samza job,
+                        or output streams to which you can write, or both. The 
requirements on a system are very
+                        flexible &mdash; it may connect to a message broker, 
or read and write files, or use a database,
+                        or anything else. The class must implement
+                        <a 
href="../api/javadocs/org/apache/samza/system/SystemFactory.html">SystemFactory</a>.
+                        Samza ships with the following implementations:
+                        <dl>
+                            
<dt><code>org.apache.samza.system.kafka.KafkaSystemFactory</code></dt>
+                            <dd>Connects to a cluster of <a 
href="http://kafka.apache.org/";>Kafka</a> brokers, allows
+                                Kafka topics to be consumed as streams in 
Samza, allows messages to be published to
+                                Kafka topics, and allows Kafka to be used for 
checkpointing (see
+                                <a href="#task-checkpoint-factory" 
class="property">task.checkpoint.factory</a>).
+                                See also <a href="#kafka">configuration of a 
Kafka system</a>.</dd>
+                            
<dt><code>org.apache.samza.system.filereader.FileReaderSystemFactory</code></dt>
+                            <dd>Reads data from a file on the local filesystem 
(the stream name is the path of the
+                                file to read). The file is read as ASCII, and 
treated as a stream of messages separated
+                                by newline (<code>\n</code>) characters. A 
task can consume each line of the file as
+                                a <code>java.lang.String</code> object. This 
system does not provide output streams.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-key-serde">systems.<span 
class="system">system-name</span>.<br>samza.key.serde</td>
+                    <td class="default" rowspan="2"></td>
+                    <td class="description" rowspan="2">
+                        The <a 
href="../container/serialization.html">serde</a> which will be used to 
deserialize the
+                        <em>key</em> of messages on input streams, and to 
serialize the <em>key</em> of messages on
+                        output streams. This property can be defined either 
for an individual stream, or for all
+                        streams within a system (if both are defined, the 
stream-level definition takes precedence).
+                        The value of this property must be a <span 
class="serde">serde-name</span> that is registered
+                        with <a href="#serializers-registry-class" 
class="property">serializers.registry.*.class</a>.
+                        If this property is not set, messages are passed 
unmodified between the input stream consumer,
+                        the task and the output stream producer.
+                    </td>
+                </tr>
+                <tr>
+                    <td class="property">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.key.serde</td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-msg-serde">systems.<span 
class="system">system-name</span>.<br>samza.msg.serde</td>
+                    <td class="default" rowspan="2"></td>
+                    <td class="description" rowspan="2">
+                        The <a 
href="../container/serialization.html">serde</a> which will be used to 
deserialize the
+                        <em>value</em> of messages on input streams, and to 
serialize the <em>value</em> of messages on
+                        output streams. This property can be defined either 
for an individual stream, or for all
+                        streams within a system (if both are defined, the 
stream-level definition takes precedence).
+                        The value of this property must be a <span 
class="serde">serde-name</span> that is registered
+                        with <a href="#serializers-registry-class" 
class="property">serializers.registry.*.class</a>.
+                        If this property is not set, messages are passed 
unmodified between the input stream consumer,
+                        the task and the output stream producer.
+                    </td>
+                </tr>
+                <tr>
+                    <td class="property">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.msg.serde</td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-offset-default">systems.<span 
class="system">system-name</span>.<br>samza.offset.default</td>
+                    <td class="default" rowspan="2">upcoming</td>
+                    <td class="description" rowspan="2">
+                        If a container starts up without a <a 
href="../container/checkpointing.html">checkpoint</a>,
+                        this property determines where in the input stream we 
should start consuming. The value must be an
+                        <a 
href="../api/javadocs/org/apache/samza/system/SystemStreamMetadata.OffsetType.html">OffsetType</a>,
+                        one of the following:
+                        <dl>
+                            <dt><code>upcoming</code></dt>
+                            <dd>Start processing messages that are published 
after the job starts. Any messages published while 
+                                the job was not running are not processed.</dd>
+                            <dt><code>oldest</code></dt>
+                            <dd>Start processing at the oldest available 
message in the system, and
+                                <a href="reprocessing.html">reprocess</a> the 
entire available message history.</dd>
+                        </dl>
+                        This property can be defined either for an individual 
stream, or for all streams within a system
+                        (if both are defined, the stream-level definition 
takes precedence).
+                    </td>
+                </tr>
+                <tr>
+                    <td class="property">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.offset.default</td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-streams-samza-reset-offset">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.reset.offset</td>
+                    <td>false</td>
+                    <td>
+                        If set to <code>true</code>, when a Samza container 
starts up, it ignores any
+                        <a href="../container/checkpointing.html">checkpointed 
offset</a> for this particular input
+                        stream. Its behavior is thus determined by the 
<code>samza.offset.default</code> setting.
+                        Note that the reset takes effect <em>every time a 
container is started</em>, which may be
+                        every time you restart your job, or more frequently if 
a container fails and is restarted
+                        by the framework.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-streams-samza-priority">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.priority</td>
+                    <td>-1</td>
+                    <td>
+                        If one or more streams have a priority set (any 
positive integer), they will be processed
+                        with <a 
href="../container/streams.html#prioritizing-input-streams">higher priority</a> 
than the other streams.
+                        You can set several streams to the same priority, or 
define multiple priority levels by
+                        assigning a higher number to the higher-priority 
streams. If a higher-priority stream has
+                        any messages available, they will always be processed 
first; messages from lower-priority
+                        streams are only processed when there are no new 
messages on higher-priority inputs.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-streams-samza-bootstrap">systems.<span 
class="system">system-name</span>.<br>streams.<span 
class="stream">stream-name</span>.<br>samza.bootstrap</td>
+                    <td>false</td>
+                    <td>
+                        If set to <code>true</code>, this stream will be 
processed as a
+                        <a 
href="../container/streams.html#bootstrapping">bootstrap stream</a>. This means 
that every time
+                        a Samza container starts up, this stream will be fully 
consumed before messages from any
+                        other stream are processed.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-consumer-batch-size">task.consumer.batch.size</td>
+                    <td>1</td>
+                    <td>
+                        If set to a positive integer, the task will try to 
consume
+                        <a 
href="../container/streams.html#batching">batches</a> with the given number of 
messages
+                        from each input stream, rather than consuming 
round-robin from all the input streams on
+                        each individual message. Setting this property can 
improve performance in some cases.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="serdes"><a 
href="../container/serialization.html">Serializers/Deserializers 
(Serdes)</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="serializers-registry-class">serializers.registry.<br><span 
class="serde">serde-name</span>.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Use this property to register a <a 
href="../container/serialization.html">serializer/deserializer</a>,
+                        which defines a way of encoding application objects as 
an array of bytes (used for messages
+                        in streams, and for data in persistent storage). You 
can give a serde any
+                        <span class="serde">serde-name</span> you want, and 
reference that name in properties like
+                        <a href="#systems-samza-key-serde" 
class="property">systems.*.samza.key.serde</a>,
+                        <a href="#systems-samza-msg-serde" 
class="property">systems.*.samza.msg.serde</a>,
+                        <a href="#stores-key-serde" 
class="property">stores.*.key.serde</a> and
+                        <a href="#stores-msg-serde" 
class="property">stores.*.msg.serde</a>.
+                        The value of this property is the fully-qualified name 
of a Java class that implements
+                        <a 
href="../api/javadocs/org/apache/samza/serializers/SerdeFactory.html">SerdeFactory</a>.
+                        Samza ships with several serdes:
+                        <dl>
+                            
<dt><code>org.apache.samza.serializers.ByteSerdeFactory</code></dt>
+                            <dd>A no-op serde which passes through the 
undecoded byte array.</dd>
+                            
<dt><code>org.apache.samza.serializers.IntegerSerdeFactory</code></dt>
+                            <dd>Encodes <code>java.lang.Integer</code> objects 
as binary (4 bytes fixed-length big-endian encoding).</dd>
+                            
<dt><code>org.apache.samza.serializers.StringSerdeFactory</code></dt>
+                            <dd>Encodes <code>java.lang.String</code> objects 
as UTF-8.</dd>
+                            
<dt><code>org.apache.samza.serializers.JsonSerdeFactory</code></dt>
+                            <dd>Encodes nested structures of 
<code>java.util.Map</code>, <code>java.util.List</code> etc. as JSON.</dd>
+                            
<dt><code>org.apache.samza.serializers.MetricsSnapshotSerdeFactory</code></dt>
+                            <dd>Encodes 
<code>org.apache.samza.metrics.reporter.MetricsSnapshot</code> objects (which 
are
+                                used for <a 
href="../container/metrics.html">reporting metrics</a>) as JSON.</dd>
+                            
<dt><code>org.apache.samza.serializers.KafkaSerdeFactory</code></dt>
+                            <dd>Adapter which allows existing 
<code>kafka.serializer.Encoder</code> and
+                                <code>kafka.serializer.Decoder</code> 
implementations to be used as Samza serdes.
+                                Set serializers.registry.<span 
class="serde">serde-name</span>.encoder and
+                                serializers.registry.<span 
class="serde">serde-name</span>.decoder to the appropriate
+                                class names.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" 
id="filesystem-checkpoints">
+                        Using the filesystem for checkpoints<br>
+                        <span class="subtitle">
+                            (This section applies if you have set
+                            <a href="#task-checkpoint-factory" 
class="property">task.checkpoint.factory</a>
+                            <code>= 
org.apache.samza.checkpoint.file.FileSystemCheckpointManagerFactory</code>)
+                        </span>
+                    </th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-checkpoint-path">task.checkpoint.path</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Required if you are using the filesystem for 
checkpoints. Set this to the path on your local filesystem
+                        where checkpoint files should be stored.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="kafka">
+                        Using <a href="http://kafka.apache.org/";>Kafka</a> for 
input streams, output streams and checkpoints<br>
+                        <span class="subtitle">
+                            (This section applies if you have set
+                            <a href="#systems-samza-factory" 
class="property">systems.*.samza.factory</a>
+                            <code>= 
org.apache.samza.system.kafka.KafkaSystemFactory</code>)
+                        </span>
+                    </th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-consumer-zookeeper-connect">systems.<span 
class="system">system-name</span>.<br>consumer.zookeeper.connect</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        The hostname and port of one or more Zookeeper nodes 
where information about the
+                        Kafka cluster can be found. This is given as a 
comma-separated list of
+                        <code>hostname:port</code> pairs, such as
+                        
<code>zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181</code>.
+                        If the cluster information is at some sub-path of the 
Zookeeper namespace, you need to
+                        include the path at the end of the list of hostnames, 
for example:
+                        
<code>zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181/clusters/my-kafka</code>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-consumer-auto-offset-reset">systems.<span 
class="system">system-name</span>.<br>consumer.auto.offset.reset</td>
+                    <td class="default">largest</td>
+                    <td class="description">
+                        This setting determines what happens if a consumer 
attempts to read an offset that is
+                        outside of the current valid range. This could happen 
if the topic does not exist, or
+                        if a checkpoint is older than the maximum message 
history retained by the brokers.
+                        This property is not to be confused with
+                        <a 
href="#systems-samza-offset-default">systems.*.samza.offset.default</a>,
+                        which determines what happens if there is no 
checkpoint. The following are valid
+                        values for <code>auto.offset.reset</code>:
+                        <dl>
+                            <dt><code>smallest</code></dt>
+                            <dd>Start consuming at the smallest (oldest) 
offset available on the broker
+                                (process as much message history as 
available).</dd>
+                            <dt><code>largest</code></dt>
+                            <dd>Start consuming at the largest (newest) offset 
available on the broker
+                                (skip any messages published while the job was 
not running).</dd>
+                            <dt>anything else</dt>
+                            <dd>Throw an exception and refuse to start up the 
job.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-consumer">systems.<span 
class="system">system-name</span>.<br>consumer.*</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Any <a 
href="http://kafka.apache.org/documentation.html#consumerconfigs";>Kafka 
consumer configuration</a>
+                        can be included here. For example, to change the 
socket timeout, you can set
+                        systems.<span 
class="system">system-name</span>.consumer.socket.timeout.ms.
+                        (There is no need to configure <code>group.id</code> 
or <code>client.id</code>,
+                        as they are automatically configured by Samza. Also, 
there is no need to set
+                        <code>auto.commit.enable</code> because Samza has its 
own checkpointing mechanism.)
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-producer-metadata-broker-list">systems.<span 
class="system">system-name</span>.<br>producer.metadata.broker.list</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        A list of network endpoints where the Kafka brokers 
are running. This is given as
+                        a comma-separated list of <code>hostname:port</code> 
pairs, for example
+                        
<code>kafka1.example.com:9092,kafka2.example.com:9092,kafka3.example.com:9092</code>.
+                        It's not necessary to list every single Kafka node in 
the cluster: Samza uses this
+                        property in order to discover which topics and 
partitions are hosted on which broker.
+                        This property is needed even if you are only consuming 
from Kafka, and not writing
+                        to it, because Samza uses it to discover metadata 
about streams being consumed.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-producer-producer-type">systems.<span 
class="system">system-name</span>.<br>producer.producer.type</td>
+                    <td class="default">sync</td>
+                    <td class="description">
+                        Controls whether messages emitted from a stream 
processor should be buffered before
+                        they are sent to Kafka. The options are:
+                        <dl>
+                            <dt><code>sync</code></dt>
+                            <dd>Any messages sent to output streams are 
synchronously flushed to the Kafka brokers
+                                before the next message from an input stream 
is processed.</dd>
+                            <dt><code>async</code></dt>
+                            <dd>Messages sent to output streams are buffered 
within the Samza container, and published
+                                to the Kafka brokers as a batch. This setting 
can increase throughput, but
+                                risks buffered messages being lost if a 
container abruptly fails. The maximum
+                                number of messages to buffer is controlled with
+                                systems.<span 
class="system">system-name</span>.producer.batch.num.messages
+                                and the maximum time (in milliseconds) to wait 
before flushing the buffer is set with
+                                systems.<span 
class="system">system-name</span>.producer.queue.buffering.max.ms.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-producer">systems.<span 
class="system">system-name</span>.<br>producer.*</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Any <a 
href="http://kafka.apache.org/documentation.html#producerconfigs";>Kafka 
producer configuration</a>
+                        can be included here. For example, to change the 
request timeout, you can set
+                        systems.<span 
class="system">system-name</span>.producer.request.timeout.ms.
+                        (There is no need to configure <code>client.id</code> 
as it is automatically
+                        configured by Samza.)
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="systems-samza-fetch-threshold">systems.<span 
class="system">system-name</span>.<br>samza.fetch.threshold</td>
+                    <td class="default">50000</td>
+                    <td class="description">
+                        When consuming streams from Kafka, a Samza container 
maintains an in-memory buffer
+                        for incoming messages in order to increase throughput 
(the stream task can continue
+                        processing buffered messages while new messages are 
fetched from Kafka). This
+                        parameter determines the number of messages we aim to 
buffer across all stream
+                        partitions consumed by a container. For example, if a 
container consumes 50 partitions,
+                        it will try to buffer 1000 messages per partition by 
default. When the number of
+                        buffered messages falls below that threshold, Samza 
fetches more messages from the
+                        Kafka broker to replenish the buffer. Increasing this 
parameter can increase a job's
+                        processing throughput, but also increases the amount 
of memory used.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-checkpoint-system">task.checkpoint.system</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        This property is required if you are using Kafka for 
checkpoints
+                        (<a href="#task-checkpoint-factory" 
class="property">task.checkpoint.factory</a>
+                        <code>= 
org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory</code>).
+                        You must set it to the <span 
class="system">system-name</span> of a Kafka system. The stream
+                        name (topic name) within that system is automatically 
determined from the job name and ID:
+                        <code>__samza_checkpoint_${<a href="#job-name" 
class="property">job.name</a>}_${<a href="#job-id" 
class="property">job.id</a>}</code>
+                        (with underscores in the job name and ID replaced by 
hyphens).
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="task-checkpoint-replication-factor">task.checkpoint.<br>replication.factor</td>
+                    <td class="default">3</td>
+                    <td class="description">
+                        If you are using Kafka for checkpoints, this is the 
number of Kafka nodes to which you want the
+                        checkpoint topic replicated for durability.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="regex-rewriter">
+                        Consuming all Kafka topics matching a regular 
expression<br>
+                        <span class="subtitle">
+                            (This section applies if you have set
+                            <a href="#job-config-rewriter-class" 
class="property">job.config.rewriter.*.class</a>
+                            <code>= 
org.apache.samza.config.RegExTopicGenerator</code>)
+                        </span>
+                    </th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-config-rewriter-system">job.config.rewriter.<br><span 
class="rewriter">rewriter-name</span>.system</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Set this property to the <span 
class="system">system-name</span> of the Kafka system
+                        from which you want to consume all matching topics.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-config-rewriter-regex">job.config.rewriter.<br><span 
class="rewriter">rewriter-name</span>.regex</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        A regular expression specifying which topics you want 
to consume within the Kafka system
+                        <a href="#job-config-rewriter-system" 
class="property">job.config.rewriter.*.system</a>.
+                        Any topics matched by this regular expression will be 
consumed <em>in addition to</em> any
+                        topics you specify with <a href="#task-inputs" 
class="property">task.inputs</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="job-config-rewriter-config">job.config.rewriter.<br><span 
class="rewriter">rewriter-name</span>.config.*</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Any properties specified within this namespace are 
applied to the configuration of streams
+                        that match the regex in
+                        <a href="#job-config-rewriter-regex" 
class="property">job.config.rewriter.*.regex</a>.
+                        For example, you can set 
<code>job.config.rewriter.*.config.samza.msg.serde</code> to configure
+                        the deserializer for messages in the matching streams, 
which is equivalent to setting
+                        <a href="#systems-samza-msg-serde" 
class="property">systems.*.streams.*.samza.msg.serde</a>
+                        for each topic that matches the regex.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="state"><a 
href="../container/state-management.html">Storage and State Management</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" id="stores-factory">stores.<span 
class="store">store-name</span>.factory</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        This property defines a store, Samza's mechanism for 
efficient
+                        <a href="../container/state-management.html">stateful 
stream processing</a>. You can give a
+                        store any <span class="store">store-name</span>, and 
use that name to get a reference to the
+                        store in your stream task (call
+                        <a 
href="../api/javadocs/org/apache/samza/task/TaskContext.html#getStore(java.lang.String)">TaskContext.getStore()</a>
+                        in your task's
+                        <a 
href="../api/javadocs/org/apache/samza/task/InitableTask.html#init(org.apache.samza.config.Config,
 org.apache.samza.task.TaskContext)">init()</a>
+                        method). The value of this property is the 
fully-qualified name of a Java class that implements
+                        <a 
href="../api/javadocs/org/apache/samza/storage/StorageEngineFactory.html">StorageEngineFactory</a>.
+                        Samza currently ships with one storage engine 
implementation:
+                        <dl>
+                            
<dt><code>org.apache.samza.storage.kv.KeyValueStorageEngineFactory</code></dt>
+                            <dd>An on-disk storage engine with a key-value 
interface, implemented using
+                                <a 
href="https://code.google.com/p/leveldb/";>LevelDB</a>. It supports fast 
random-access
+                                reads and writes, as well as range queries on 
keys. LevelDB can be configured with
+                                various <a href="#keyvalue">additional tuning 
parameters</a>.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="stores-key-serde">stores.<span 
class="store">store-name</span>.key.serde</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If the storage engine expects keys in the store to be 
simple byte arrays, this
+                        <a href="../container/serialization.html">serde</a> 
allows the stream task to access the
+                        store using another object type as key. The value of 
this property must be a
+                        <span class="serde">serde-name</span> that is 
registered with
+                        <a href="#serializers-registry-class" 
class="property">serializers.registry.*.class</a>.
+                        If this property is not set, keys are passed 
unmodified to the storage engine
+                        (and the <a href="#stores-changelog">changelog 
stream</a>, if appropriate).
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="stores-msg-serde">stores.<span 
class="store">store-name</span>.msg.serde</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If the storage engine expects values in the store to 
be simple byte arrays, this
+                        <a href="../container/serialization.html">serde</a> 
allows the stream task to access the
+                        store using another object type as value. The value of 
this property must be a
+                        <span class="serde">serde-name</span> that is 
registered with
+                        <a href="#serializers-registry-class" 
class="property">serializers.registry.*.class</a>.
+                        If this property is not set, values are passed 
unmodified to the storage engine
+                        (and the <a href="#stores-changelog">changelog 
stream</a>, if appropriate).
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="stores-changelog">stores.<span 
class="store">store-name</span>.changelog</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Samza stores are local to a container. If the 
container fails, the contents of the
+                        store are lost. To prevent loss of data, you need to 
set this property to configure
+                        a changelog stream: Samza then ensures that writes to 
the store are replicated to
+                        this stream, and the store is restored from this 
stream after a failure. The value
+                        of this property is given in the form
+                        <span class="system">system-name</span>.<span 
class="stream">stream-name</span>.
+                        Any output stream can be used as changelog, but you 
must ensure that only one job
+                        ever writes to a given changelog stream (each instance 
of a job and each store
+                        needs its own changelog stream).
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="keyvalue">
+                        Using LevelDB for key-value storage<br>
+                        <span class="subtitle">
+                            (This section applies if you have set
+                            <a href="#stores-factory" 
class="property">stores.*.factory</a>
+                            <code>= 
org.apache.samza.storage.kv.KeyValueStorageEngineFactory</code>)
+                        </span>
+                    </th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-write-batch-size">stores.<span 
class="store">store-name</span>.<br>write.batch.size</td>
+                    <td class="default">500</td>
+                    <td class="description">
+                        For better write performance, the storage engine 
buffers writes and applies them
+                        to the underlying store in a batch. If the same key is 
written multiple times
+                        in quick succession, this buffer also deduplicates 
writes to the same key. This
+                        property is set to the number of key/value pairs that 
should be kept in this
+                        in-memory buffer, per task instance. The number cannot 
be greater than
+                        <a href="#stores-object-cache-size" 
class="property">stores.*.object.cache.size</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-object-cache-size">stores.<span 
class="store">store-name</span>.<br>object.cache.size</td>
+                    <td class="default">1000</td>
+                    <td class="description">
+                        Samza maintains an additional cache in front of 
LevelDB for frequently-accessed
+                        objects. This cache contains deserialized objects 
(avoiding the deserialization
+                        overhead on cache hits), in contrast to the LevelDB 
block cache
+                        (<a href="#stores-container-cache-size-bytes" 
class="property">stores.*.container.cache.size.bytes</a>),
+                        which caches serialized objects. This property 
determines the number of objects
+                        to keep in Samza's cache, per task instance. This same 
cache is also used for
+                        write buffering (see <a 
href="#stores-write-batch-size" class="property">stores.*.write.batch.size</a>).
+                        A value of 0 disables all caching and batching.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-container-cache-size-bytes">stores.<span 
class="store">store-name</span>.container.<br>cache.size.bytes</td>
+                    <td class="default">104857600</td>
+                    <td class="description">
+                        The size of LevelDB's block cache in bytes, per 
container. If there are several
+                        task instances within one container, each is given a 
proportional share of this cache.
+                        Note that this is an off-heap memory allocation, so 
the container's total memory use
+                        is the maximum JVM heap size <em>plus</em> the size of 
this cache.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-container-write-buffer-size-bytes">stores.<span 
class="store">store-name</span>.container.<br>write.buffer.size.bytes</td>
+                    <td class="default">33554432</td>
+                    <td class="description">
+                        The amount of memory (in bytes) that LevelDB uses for 
buffering writes before they are
+                        written to disk, per container. If there are several 
task instances within one
+                        container, each is given a proportional share of this 
buffer. This setting also
+                        determines the size of LevelDB's segment files.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-compaction-delete-threshold">stores.<span 
class="store">store-name</span>.<br>compaction.delete.threshold</td>
+                    <td class="default">-1</td>
+                    <td class="description">
+                        Setting this property forces a LevelDB compaction to 
be performed after a certain
+                        number of keys have been deleted from the store. This 
is used to work around
+                        <a 
href="https://issues.apache.org/jira/browse/SAMZA-254";>performance issues</a>
+                        in certain workloads.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-leveldb-compression">stores.<span 
class="store">store-name</span>.<br>leveldb.compression</td>
+                    <td class="default">snappy</td>
+                    <td class="description">
+                        This property controls whether LevelDB should compress 
data on disk and in the
+                        block cache. The following values are valid:
+                        <dl>
+                            <dt><code>snappy</code></dt>
+                            <dd>Compress data using the <a 
href="https://code.google.com/p/snappy/";>Snappy</a> codec.</dd>
+                            <dt><code>none</code></dt>
+                            <dd>Do not compress data.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="stores-leveldb-block-size-bytes">stores.<span 
class="store">store-name</span>.<br>leveldb.block.size.bytes</td>
+                    <td class="default">4096</td>
+                    <td class="description">
+                        If compression is enabled, LevelDB groups 
approximately this many uncompressed bytes
+                        into one compressed block. You probably don't need to 
change this property.
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="yarn">
+                        Running your job on a <a 
href="../jobs/yarn-jobs.html">YARN</a> cluster<br>
+                        <span class="subtitle">
+                            (This section applies if you have set
+                            <a href="#job-factory-class" 
class="property">job.factory.class</a>
+                            <code>= 
org.apache.samza.job.yarn.YarnJobFactory</code>)
+                        </span>
+                    </th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-package-path">yarn.package.path</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        <strong>Required for YARN jobs:</strong> The URL from 
which the job package can
+                        be downloaded, for example a <code>http://</code> or 
<code>hdfs://</code> URL.
+                        The job package is a .tar.gz file with a
+                        <a href="../jobs/packaging.html">specific directory 
structure</a>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-container-count">yarn.container.count</td>
+                    <td class="default">1</td>
+                    <td class="description">
+                        The number of YARN containers to request for running 
your job. This is the main parameter
+                        for controlling the scale (allocated computing 
resources) of your job: to increase the
+                        parallelism of processing, you need to increase the 
number of containers. The minimum is one
+                        container, and the maximum number of containers is the 
number of task instances (usually the
+                        <a 
href="../container/samza-container.html#tasks-and-partitions">number of input 
stream partitions</a>).
+                        Task instances are evenly distributed across the 
number of containers that you specify.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-container-memory-mb">yarn.container.memory.mb</td>
+                    <td class="default">1024</td>
+                    <td class="description">
+                        How much memory, in megabytes, to request from YARN 
per container of your job. Along with
+                        <a href="#yarn-container-cpu-cores" 
class="property">yarn.container.cpu.cores</a>, this
+                        property determines how many containers YARN will run 
on one machine. If the container
+                        exceeds this limit, YARN will kill it, so it is 
important that the container's actual
+                        memory use remains below the limit. The amount of 
memory used is normally the JVM heap
+                        size (configured with <a href="#task-opts" 
class="property">task.opts</a>), plus the
+                        size of any off-heap memory allocation (for example
+                        <a href="#stores-container-cache-size-bytes" 
class="property">stores.*.container.cache.size.bytes</a>),
+                        plus a safety margin to allow for JVM overheads.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-container-cpu-cores">yarn.container.cpu.cores</td>
+                    <td class="default">1</td>
+                    <td class="description">
+                        The number of CPU cores to request from YARN per 
container of your job. Each node in the
+                        YARN cluster has a certain number of CPU cores 
available, so this number (along with
+                        <a href="#yarn-container-memory-mb" 
class="property">yarn.container.memory.mb</a>)
+                        determines how many containers can be run on one 
machine. Samza is
+                        <a 
href="../container/event-loop.html">single-threaded</a> and designed to run on 
one
+                        CPU core, so you shouldn't normally need to change 
this property.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-container-retry-count">yarn.container.<br>retry.count</td>
+                    <td class="default">8</td>
+                    <td class="description">
+                        If a container fails, it is automatically restarted by 
YARN. However, if a container keeps
+                        failing shortly after startup, that indicates a deeper 
problem, so we should kill the job
+                        rather than retrying indefinitely. This property 
determines the maximum number of times we are
+                        willing to restart a failed container in quick 
succession (the time period is configured with
+                        <a href="#yarn-container-retry-window-ms" 
class="property">yarn.container.retry.window.ms</a>).
+                        Each container in the job is counted separately. If 
this property is set to 0, any failed
+                        container immediately causes the whole job to fail. If 
it is set to a negative number, there
+                        is no limit on the number of retries.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-container-retry-window-ms">yarn.container.<br>retry.window.ms</td>
+                    <td class="default">300000</td>
+                    <td class="description">
+                        This property determines how frequently a container is 
allowed to fail before we give up and
+                        fail the job. If the same container has failed more 
than
+                        <a href="#yarn-container-retry-count" 
class="property">yarn.container.retry.count</a>
+                        times, and the time between failures was less than 
this property
+                        <code>yarn.container.retry.window.ms</code> (in 
milliseconds), then we fail the job.
+                        There is no limit to the number of times we will 
restart a container if the time between
+                        failures is greater than 
<code>yarn.container.retry.window.ms</code>.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-am-container-memory-mb">yarn.am.container.<br>memory.mb</td>
+                    <td class="default">1024</td>
+                    <td class="description">
+                        Each Samza job has one special container, the
+                        <a 
href="../yarn/application-master.html">ApplicationMaster</a> (AM), which 
manages the
+                        execution of the job. This property determines how 
much memory, in megabytes, to request
+                        from YARN for running the ApplicationMaster.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" id="yarn-am-opts">yarn.am.opts</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Any JVM options to include in the command line when 
executing the Samza
+                        <a 
href="../yarn/application-master.html">ApplicationMaster</a>. For example, this 
can be
+                        used to set the JVM heap size, to tune the garbage 
collector, or to enable remote debugging.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-am-poll-interval-ms">yarn.am.poll.interval.ms</td>
+                    <td class="default">1000</td>
+                    <td class="description">
+                        THe Samza ApplicationMaster sends regular heartbeats 
to the YARN ResourceManager
+                        to confirm that it is alive. This property determines 
the time (in milliseconds)
+                        between heartbeats.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="yarn-am-jmx-enabled">yarn.am.jmx.enabled</td>
+                    <td class="default">true</td>
+                    <td class="description">
+                        Determines whether a JMX server should be started on 
this job's YARN ApplicationMaster
+                        (<code>true</code> or <code>false</code>).
+                    </td>
+                </tr>
+
+                <tr>
+                    <th colspan="3" class="section" id="metrics"><a 
href="../container/metrics.html">Metrics</a></th>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="metrics-reporter-class">metrics.reporter.<br><span 
class="reporter">reporter-name</span>.class</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        Samza automatically tracks various metrics which are 
useful for monitoring the health
+                        of a job, and you can also track <a 
href="../container/metrics.html">your own metrics</a>.
+                        With this property, you can define any number of 
<em>metrics reporters</em> which send
+                        the metrics to a system of your choice (for graphing, 
alerting etc). You give each reporter
+                        an arbitrary <span 
class="reporter">reporter-name</span>. To enable the reporter, you need
+                        to reference the <span 
class="reporter">reporter-name</span> in
+                        <a href="#metrics-reporters" 
class="property">metrics.reporters</a>.
+                        The value of this property is the fully-qualified name 
of a Java class that implements
+                        <a 
href="../api/javadocs/org/apache/samza/metrics/MetricsReporterFactory.html">MetricsReporterFactory</a>.
+                        Samza ships with these implementations by default:
+                        <dl>
+                            
<dt><code>org.apache.samza.metrics.reporter.JmxReporterFactory</code></dt>
+                            <dd>With this reporter, every container exposes 
its own metrics as JMX MBeans. The JMX
+                                server is started on a <a 
href="../container/jmx.html">random port</a> to avoid
+                                collisions between containers running on the 
same machine.</dd>
+                            
<dt><code>org.apache.samza.metrics.reporter.MetricsSnapshotReporterFactory</code></dt>
+                            <dd>This reporter sends the latest values of all 
metrics as messages to an output
+                                stream once per minute. The output stream is 
configured with
+                                <a href="#metrics-reporter-stream" 
class="property">metrics.reporter.*.stream</a>
+                                and it can use any system supported by 
Samza.</dd>
+                        </dl>
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="metrics-reporters">metrics.reporters</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If you have defined any metrics reporters with
+                        <a href="#metrics-reporter-class" 
class="property">metrics.reporter.*.class</a>, you
+                        need to list them here in order to enable them. The 
value of this property is a
+                        comma-separated list of <span 
class="reporter">reporter-name</span> tokens.
+                    </td>
+                </tr>
+
+                <tr>
+                    <td class="property" 
id="metrics-reporter-stream">metrics.reporter.<br><span 
class="reporter">reporter-name</span>.stream</td>
+                    <td class="default"></td>
+                    <td class="description">
+                        If you have registered the metrics reporter
+                        <a href="#metrics-reporter-class" 
class="property">metrics.reporter.*.class</a>
+                        <code>= 
org.apache.samza.metrics.reporter.MetricsSnapshotReporterFactory</code>,
+                        you need to set this property to configure the output 
stream to which the metrics data
+                        should be sent. The stream is given in the form
+                        <span class="system">system-name</span>.<span 
class="stream">stream-name</span>,
+                        and the system must be defined in the job 
configuration. It's fine for many different jobs
+                        to publish their metrics to the same metrics stream. 
Samza defines a simple
+                        <a href="../container/metrics.html">JSON encoding</a> 
for metrics; in order to use this
+                        encoding, you also need to configure a serde for the 
metrics stream:
+                        <ul>
+                            <li><a href="#systems-samza-msg-serde" 
class="property">systems.*.streams.*.samza.msg.serde</a>
+                                <code>= metrics-serde</code> (replacing the 
asterisks with the
+                                <span class="system">system-name</span> and 
<span class="stream">stream-name</span>
+                                of the metrics stream)</li>
+                            <li><a href="#serializers-registry-class" 
class="property">serializers.registry.metrics-serde.class</a>
+                                <code>= 
org.apache.samza.serializers.MetricsSnapshotSerdeFactory</code>
+                                (registering the serde under a <span 
class="serde">serde-name</span> of
+                                <code>metrics-serde</code>)</li>
+                        </ul>
+                    </td>
+                </tr>
+            </tbody>
+        </table>
+    </body>
+</html>


Added: incubator/samza/site/learn/documentation/latest/jobs/configuration.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/configuration.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/configuration.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/jobs/configuration.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,204 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Configuration</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/configuration.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Configuration</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>All Samza jobs have a configuration file that defines the job. A very basic 
configuration file looks like this:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># 
Job</span>
+<span class="na">job.factory.class</span><span class="o">=</span><span 
class="s">samza.job.local.ThreadJobFactory</span>
+<span class="na">job.name</span><span class="o">=</span><span 
class="s">hello-world</span>
+
+<span class="c"># Task</span>
+<span class="na">task.class</span><span class="o">=</span><span 
class="s">samza.task.example.MyJavaStreamerTask</span>
+<span class="na">task.inputs</span><span class="o">=</span><span 
class="s">example-system.example-stream</span>
+
+<span class="c"># Serializers</span>
+<span class="na">serializers.registry.json.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.JsonSerdeFactory</span>
+<span class="na">serializers.registry.string.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.StringSerdeFactory</span>
+
+<span class="c"># Systems</span>
+<span class="na">systems.example-system.samza.factory</span><span 
class="o">=</span><span 
class="s">samza.stream.example.ExampleConsumerFactory</span>
+<span class="na">systems.example-system.samza.key.serde</span><span 
class="o">=</span><span class="s">string</span>
+<span class="na">systems.example-system.samza.msg.serde</span><span 
class="o">=</span><span class="s">json</span></code></pre></div>
+
+<p>There are four major sections to a configuration file:</p>
+
+<ol>
+<li>The job section defines things like the name of the job, and whether to 
use the YarnJobFactory or ProcessJobFactory/ThreadJobFactory.</li>
+<li>The task section is where you specify the class name for your <a 
href="../api/overview.html">StreamTask</a>. It&rsquo;s also where you define 
what the <a href="../container/streams.html">input streams</a> are for your 
task.</li>
+<li>The serializers section defines the classes of the <a 
href="../container/serialization.html">serdes</a> used for serialization and 
deserialization of specific objects that are received and sent along different 
streams.</li>
+<li>The system section defines systems that your StreamTask can read from 
along with the types of serdes used for sending keys and messages from that 
system. Usually, you&rsquo;ll define a Kafka system, if you&rsquo;re reading 
from Kafka, although you can also specify your own self-implemented 
Samza-compatible systems. See the <a 
href="/startup/hello-samza/latest">hello-samza example project</a>&rsquo;s 
Wikipedia system for a good example of a self-implemented system.</li>
+</ol>
+
+<h3 id="required-configuration">Required Configuration</h3>
+
+<p>Configuration keys that absolutely must be defined for a Samza job are:</p>
+
+<ul>
+<li><code>job.factory.class</code></li>
+<li><code>job.name</code></li>
+<li><code>task.class</code></li>
+<li><code>task.inputs</code></li>
+</ul>
+
+<h3 id="configuration-keys">Configuration Keys</h3>
+
+<p>A complete list of configuration keys can be found on the <a 
href="configuration-table.html">Configuration Table</a> page.</p>
+
+<h2 id="packaging-&raquo;"><a href="packaging.html">Packaging &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

svn commit: r1618097 [45/48] - in /incubator/samza/site: ./ archive/ community/ contribute/ css/ fonts/ img/latest/ img/latest/learn/ img/latest/learn/documentation/ img/latest/learn/documentation/comparisons/ img/latest/learn/documentation/container/ ...

Reply via email to