Added: incubator/samza/site/learn/documentation/latest/jobs/job-runner.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/job-runner.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/job-runner.html (added)
+++ incubator/samza/site/learn/documentation/latest/jobs/job-runner.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,193 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - JobRunner</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/job-runner.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>JobRunner</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza jobs are started using a script called run-job.sh.</p>
+
+<div class="highlight"><pre><code 
class="bash">samza-example/target/bin/run-job.sh <span class="se">\</span>
+  --config-factory<span 
class="o">=</span>samza.config.factories.PropertiesConfigFactory <span 
class="se">\</span>
+  --config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/config/hello-world.properties</code></pre></div>
+
+<p>You provide two parameters to the run-job.sh script. One is the config 
location, and the other is a factory class that is used to read your 
configuration file. The run-job.sh script is actually executing a Samza class 
called JobRunner. The JobRunner uses your ConfigFactory to get a Config object 
from the config path.</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">interface</span> <span class="nc">ConfigFactory</span> <span 
class="o">{</span>
+  <span class="n">Config</span> <span class="nf">getConfig</span><span 
class="o">(</span><span class="n">URI</span> <span 
class="n">configUri</span><span class="o">);</span>
+<span class="o">}</span></code></pre></div>
+
+<p>The Config object is just a wrapper around Map<String, String>, with some 
nice helper methods. Out of the box, Samza ships with the 
PropertiesConfigFactory, but developers can implement any kind of ConfigFactory 
they wish.</p>
+
+<p>Once the JobRunner gets your configuration, it gives your configuration to 
the StreamJobFactory class defined by the &ldquo;job.factory&rdquo; property. 
Samza ships with three job factory implementations: ThreadJobFactory, 
ProcessJobFactory and YarnJobFactory. The StreamJobFactory&rsquo;s 
responsibility is to give the JobRunner a job that it can run.</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">interface</span> <span class="nc">StreamJob</span> <span 
class="o">{</span>
+  <span class="n">StreamJob</span> <span class="nf">submit</span><span 
class="o">();</span>
+
+  <span class="n">StreamJob</span> <span class="nf">kill</span><span 
class="o">();</span>
+
+  <span class="n">ApplicationStatus</span> <span 
class="nf">waitForFinish</span><span class="o">(</span><span 
class="kt">long</span> <span class="n">timeoutMs</span><span class="o">);</span>
+
+  <span class="n">ApplicationStatus</span> <span 
class="nf">waitForStatus</span><span class="o">(</span><span 
class="n">ApplicationStatus</span> <span class="n">status</span><span 
class="o">,</span> <span class="kt">long</span> <span 
class="n">timeoutMs</span><span class="o">);</span>
+
+  <span class="n">ApplicationStatus</span> <span 
class="nf">getStatus</span><span class="o">();</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Once the JobRunner gets a job, it calls submit() on the job. This method is 
what tells the StreamJob implementation to start the SamzaContainer. In the 
case of LocalJobRunner, it uses a run-container.sh script to execute the 
SamzaContainer in a separate process, which will start one SamzaContainer 
locally on the machine that you ran run-job.sh on.</p>
+
+<p>This flow differs slightly when you use YARN, but we&rsquo;ll get to that 
later.</p>
+
+<h2 id="configuration-&raquo;"><a href="configuration.html">Configuration 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/jobs/logging.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/logging.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/logging.html (added)
+++ incubator/samza/site/learn/documentation/latest/jobs/logging.html Fri Aug 
15 05:28:03 2014
@@ -0,0 +1,222 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Logging</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/logging.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Logging</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza uses <a href="http://www.slf4j.org/";>SLF4J</a> for all of its 
logging. By default, Samza only depends on slf4j-api, so you must add an SLF4J 
runtime dependency to your Samza packages for whichever underlying logging 
platform you wish to use.</p>
+
+<h3 id="log4j">Log4j</h3>
+
+<p>The <a href="/startup/hello-samza/latest">hello-samza</a> project shows how 
to use <a href="http://logging.apache.org/log4j/1.2/";>log4j</a> with Samza. To 
turn on log4j logging, you just need to make sure slf4j-log4j12 is in your 
SamzaContainer&rsquo;s classpath. In Maven, this can be done by adding the 
following dependency to your Samza package project.</p>
+
+<div class="highlight"><pre><code class="xml"><span 
class="nt">&lt;dependency&gt;</span>
+  <span class="nt">&lt;groupId&gt;</span>org.slf4j<span 
class="nt">&lt;/groupId&gt;</span>
+  <span class="nt">&lt;artifactId&gt;</span>slf4j-log4j12<span 
class="nt">&lt;/artifactId&gt;</span>
+  <span class="nt">&lt;scope&gt;</span>runtime<span 
class="nt">&lt;/scope&gt;</span>
+  <span class="nt">&lt;version&gt;</span>1.6.2<span 
class="nt">&lt;/version&gt;</span>
+<span class="nt">&lt;/dependency&gt;</span></code></pre></div>
+
+<p>If you&rsquo;re not using Maven, just make sure that slf4j-log4j12 ends up 
in your Samza package&rsquo;s lib directory.</p>
+
+<h4 id="log4j-configuration">Log4j configuration</h4>
+
+<p>Samza&rsquo;s <a href="packaging.html">run-class.sh</a> script will 
automatically set the following setting if log4j.xml exists in your <a 
href="packaging.html">Samza package&rsquo;s</a> lib directory.</p>
+
+<div class="highlight"><pre><code class="bash">-Dlog4j.configuration<span 
class="o">=</span>file:<span 
class="nv">$base_dir</span>/lib/log4j.xml</code></pre></div>
+
+<p>The <a href="packaging.html">run-class.sh</a> script will also set the 
following Java system properties:</p>
+
+<div class="highlight"><pre><code class="bash">-Dsamza.log.dir<span 
class="o">=</span><span class="nv">$SAMZA_LOG_DIR</span> 
-Dsamza.container.name<span class="o">=</span><span 
class="nv">$SAMZA_CONTAINER_NAME</span><span 
class="o">=</span></code></pre></div>
+
+<p>These settings are very useful if you&rsquo;re using a file-based appender. 
For example, you can use a daily rolling appender by configuring log4j.xml like 
this:</p>
+
+<div class="highlight"><pre><code class="xml"><span 
class="nt">&lt;appender</span> <span class="na">name=</span><span 
class="s">&quot;RollingAppender&quot;</span> <span 
class="na">class=</span><span 
class="s">&quot;org.apache.log4j.DailyRollingFileAppender&quot;</span><span 
class="nt">&gt;</span>
+   <span class="nt">&lt;param</span> <span class="na">name=</span><span 
class="s">&quot;File&quot;</span> <span class="na">value=</span><span 
class="s">&quot;${samza.log.dir}/${samza.container.name}.log&quot;</span> <span 
class="nt">/&gt;</span>
+   <span class="nt">&lt;param</span> <span class="na">name=</span><span 
class="s">&quot;DatePattern&quot;</span> <span class="na">value=</span><span 
class="s">&quot;&#39;.&#39;yyyy-MM-dd&quot;</span> <span class="nt">/&gt;</span>
+   <span class="nt">&lt;layout</span> <span class="na">class=</span><span 
class="s">&quot;org.apache.log4j.PatternLayout&quot;</span><span 
class="nt">&gt;</span>
+    <span class="nt">&lt;param</span> <span class="na">name=</span><span 
class="s">&quot;ConversionPattern&quot;</span> <span 
class="na">value=</span><span class="s">&quot;%d{yyyy-MM-dd HH:mm:ss} %c{1} 
[%p] %m%n&quot;</span> <span class="nt">/&gt;</span>
+   <span class="nt">&lt;/layout&gt;</span>
+<span class="nt">&lt;/appender&gt;</span></code></pre></div>
+
+<p>Setting up a file-based appender is recommended as a better alternative to 
using standard out. Standard out log files (see below) don&rsquo;t roll, and 
can get quite large if used for logging.</p>
+
+<p><strong>NOTE:</strong> If you use the <code>task.opts</code> configuration 
property, the log configuration is disrupted. This is a known bug; please see 
<a href="https://issues.apache.org/jira/browse/SAMZA-109";>SAMZA-109</a> for a 
workaround.</p>
+
+<h3 id="log-directory">Log Directory</h3>
+
+<p>Samza will look for the <code>SAMZA_LOG_DIR</code> environment variable 
when it executes. If this variable is defined, all logs will be written to this 
directory. If the environment variable is empty, or not defined, then Samza 
will use /tmp. This environment variable can also be referenced inside 
log4j.xml files (see above).</p>
+
+<h3 id="garbage-collection-logging">Garbage Collection Logging</h3>
+
+<p>Samza&rsquo;s will automatically set the following garbage collection 
logging setting, and will output it to <code>$SAMZA_LOG_DIR/gc.log</code>.</p>
+
+<div class="highlight"><pre><code class="bash">-XX:+PrintGCDateStamps 
-Xloggc:<span class="nv">$SAMZA_LOG_DIR</span>/gc.log</code></pre></div>
+
+<h4 id="rotation">Rotation</h4>
+
+<p>In older versions of Java, it is impossible to have GC logs roll over based 
on time or size without the use of a secondary tool. This means that your GC 
logs will never be deleted until a Samza job ceases to run. As of <a 
href="http://www.oracle.com/technetwork/java/javase/2col/6u34-bugfixes-1733379.html";>Java
 6 Update 34</a>, and <a 
href="http://www.oracle.com/technetwork/java/javase/7u2-relnotes-1394228.html";>Java
 7 Update 2</a>, <a 
href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6941923";>new GC 
command line switches</a> have been added to support this functionality. If you 
are using a version of Java that supports GC log rotation, it&rsquo;s highly 
recommended that you turn it on.</p>
+
+<h3 id="yarn">YARN</h3>
+
+<p>When a Samza job executes on a YARN grid, the <code>$SAMZA_LOG_DIR</code> 
environment variable will point to a directory that is secured such that only 
the user executing the Samza job can read and write to it, if YARN is <a 
href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ClusterSetup.html";>securely
 configured</a>.</p>
+
+<h4 id="stdout">STDOUT</h4>
+
+<p>Samza&rsquo;s <a 
href="../yarn/application-master.html">ApplicationMaster</a> pipes all STDOUT 
and STDERR output to logs/stdout and logs/stderr, respectively. These files are 
never rotated.</p>
+
+<h2 id="reprocessing-&raquo;"><a href="reprocessing.html">Reprocessing 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/jobs/packaging.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/packaging.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/packaging.html (added)
+++ incubator/samza/site/learn/documentation/latest/jobs/packaging.html Fri Aug 
15 05:28:03 2014
@@ -0,0 +1,180 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Packaging</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/packaging.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Packaging</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>The <a href="job-runner.html">JobRunner</a> page talks about run-job.sh, 
and how it&rsquo;s used to start a job either locally 
(ProcessJobFactory/ThreadJobFactory) or with YARN (YarnJobFactory). In the 
diagram that shows the execution flow, it also shows a run-container.sh script. 
This script, along with a run-am.sh script, are what Samza actually calls to 
execute its code.</p>
+<div class="highlight"><pre><code class="language-text" 
data-lang="text">bin/run-am.sh
+bin/run-container.sh
+</code></pre></div>
+<p>The run-container.sh script is responsible for starting the <a 
href="../container/samza-container.html">SamzaContainer</a>. The run-am.sh 
script is responsible for starting Samza&rsquo;s application master for YARN. 
Thus, the run-am.sh script is only used by the YarnJob, but both YarnJob and 
ProcessJob use run-container.sh.</p>
+
+<p>Typically, these two scripts are bundled into a tar.gz file that has a 
structure like this:</p>
+<div class="highlight"><pre><code class="language-text" 
data-lang="text">bin/run-am.sh
+bin/run-class.sh
+bin/run-job.sh
+bin/run-container.sh
+lib/*.jar
+</code></pre></div>
+<p>To run a Samza job, you un-zip its tar.gz file, and execute the run-job.sh 
script, as defined in the JobRunner section. There are a number of interesting 
implications from this packaging scheme. First, you&rsquo;ll notice that there 
is no configuration in the package. Second, you&rsquo;ll notice that the lib 
directory contains all JARs that you&rsquo;ll need to run your Samza job.</p>
+
+<p>The reason that configuration is decoupled from your Samza job packaging is 
that it allows configuration to be updated without having to re-build the 
entire Samza package. This makes life easier for everyone when you just need to 
tweak one parameter, and don&rsquo;t want to have to worry about which branch 
your package was built from, or whether trunk is in a stable state. It also has 
the added benefit of forcing configuration to be fully resolved at runtime. 
This means that that the configuration for a job is resolved at the time 
run-job.sh is called (using &ndash;config-path and &ndash;config-provider 
parameters), and from that point on, the configuration is immutable, and passed 
where it needs to be by Samza (and YARN, if you&rsquo;re using it).</p>
+
+<p>The second statement, that your Samza package contains all JARs that it 
needs to run, means that a Samza package is entirely self contained. This 
allows Samza jobs to run on independent Samza versions without conflicting with 
each other. This is in contrast to Hadoop, where JARs are pulled in from the 
local machine that the job is running on (using environment variables). With 
Samza, you might run your job on version 0.7.0, and someone else might run 
their job on version 0.8.0. There is no problem with this.</p>
+
+<h2 id="yarn-jobs-&raquo;"><a href="yarn-jobs.html">YARN Jobs &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/jobs/reprocessing.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/reprocessing.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/reprocessing.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/jobs/reprocessing.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,231 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Reprocessing previously processed data</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/reprocessing.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Reprocessing previously processed data</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>From time to time you may want to deploy a new version of your Samza job 
that computes results differently. Perhaps you fixed a bug or introduced a new 
feature. For example, say you have a Samza job that classifies messages as spam 
or not-spam, using a machine learning model that you train offline. 
Periodically you want to deploy an updated version of your Samza job which 
includes the latest classification model.</p>
+
+<p>When you start up a new version of your job, a question arises: what do you 
want to do with messages that were previously processed with the old version of 
your job? The answer depends on the behavior you want:</p>
+
+<ol>
+<li><p><strong>No reprocessing:</strong> By default, Samza assumes that 
messages processed by the old version don&rsquo;t need to be processed again. 
When the new version starts up, it will resume processing at the point where 
the old version left off (assuming you have <a 
href="../container/checkpointing.html">checkpointing</a> enabled). If this is 
the behavior you want, there&rsquo;s nothing special you need to do.</p></li>
+<li><p><strong>Simple rewind:</strong> Perhaps you want to go back and 
re-process old messages using the new version of your job. For example, maybe 
the old version of your classifier marked things as spam too aggressively, so 
you now want to revisit its previous spam/not-spam decisions using an improved 
classifier. You can do this by restarting the job at an older point in time in 
the stream, and running through all the messages since that time. Thus your job 
starts off reprocessing messages that it has already seen, but it then 
seamlessly continues with new messages when the reprocessing is done.</p></li>
+</ol>
+
+<p>This approach requires an input system such as Kafka, which allows you to 
jump back in time to a previous point in the stream. We discuss below how this 
works in practice.</p>
+
+<ol>
+<li><strong>Parallel rewind:</strong> This approach avoids a downside of the 
<em>simple rewind</em> approach. With simple rewind, any new messages that 
appear while the job is reprocessing old data are queued up, and are processed 
when the reprocessing is done. The queueing delay needn&rsquo;t be long, 
because Samza can stream through historical data very quickly, but some 
latency-sensitive applications need to process messages faster.</li>
+</ol>
+
+<p>In the <em>parallel rewind</em> approach, you run two jobs in parallel: one 
job continues to handle live updates with low latency (the <em>real-time 
job</em>), while the other is started at an older point in the stream and 
reprocesses historical data (the <em>reprocessing job</em>). The two jobs 
consume the same input stream at different points in time, and eventually the 
reprocessing job catches up with the real-time job.</p>
+
+<p>There are a few details that you need to think through before deploying 
parallel rewind, which we discuss below.</p>
+
+<h3 id="jumping-back-in-time">Jumping Back in Time</h3>
+
+<p>A common aspect of the <em>simple rewind</em> and <em>parallel rewind</em> 
approaches is: you have a job which jumps back to an old point in time in the 
input streams, and consumes all messages since that time. You achieve this by 
working with Samza&rsquo;s checkpoints.</p>
+
+<p>Normally, when a Samza job starts up, it reads the latest checkpoint to 
determine at which offset in the input streams it needs to resume processing. 
If you need to rewind to an earlier time, you do that in one of two ways:</p>
+
+<ol>
+<li>You can stop the job, manipulate its last checkpoint to point to an older 
offset, and start the job up again. Samza includes a command-line tool called 
<a href="../container/checkpointing.html#toc_0">CheckpointTool</a> which you 
can use to manipulate checkpoints.</li>
+<li>You can start a new job with a different <em>job.name</em> or 
<em>job.id</em> (e.g. increment <em>job.id</em> every time you need to jump 
back in time). This gives the job a new checkpoint stream, with none of the old 
checkpoint information. You also need to set <a 
href="../container/checkpointing.html">samza.offset.default=oldest</a>, so that 
when the job starts up without checkpoint, it starts consuming at the oldest 
offset available.</li>
+</ol>
+
+<p>With either of these approaches you can get Samza to reprocess the entire 
history of messages in the input system. Input systems such as Kafka can retain 
a large amount of history &mdash; see discussion below. In order to speed up 
the reprocessing of historical data, you can increase the container count 
(<em>yarn.container.count</em> if you&rsquo;re running Samza on YARN) to boost 
your job&rsquo;s computational resources.</p>
+
+<p>If your job maintains any <a 
href="../container/state-management.html">persistent state</a>, you need to be 
careful when jumping back in time: resetting a checkpoint does not 
automatically change persistent state, so you could end up reprocessing old 
messages while using state from a later point in time. In most cases, a job 
that jumps back in time should start with an empty state. You can reset the 
state by deleting the changelog topic, or by changing the name of the changelog 
topic in your job configuration.</p>
+
+<p>When you&rsquo;re jumping back in time, you&rsquo;re using Samza somewhat 
like a batch processing framework (e.g. MapReduce) &mdash; with the difference 
that your job doesn&rsquo;t stop when it has processed all the historical data, 
but instead continues running, incrementally processing the stream of new 
messages as they come in. This has the advantage that you don&rsquo;t need to 
write and maintain separate batch and streaming versions of your job: you can 
just use the same Samza API for processing both real-time and historical 
data.</p>
+
+<h3 id="retention-of-history">Retention of history</h3>
+
+<p>Samza doesn&rsquo;t maintain history itself &mdash; that is the 
responsibility of the input system, such as Kafka. How far back in time you can 
jump depends on the amount of history that is retained in that system.</p>
+
+<p>Kafka is designed to keep a fairly large amount of history: it is common 
for Kafka brokers to keep one or two weeks of message history accessible, even 
for high volume topics. The retention period is mostly determined by how much 
disk space you have available. Kafka&rsquo;s performance <a 
href="http://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines";>remains
 high</a> even if you have terabytes of history.</p>
+
+<p>There are two different kinds of history which require different 
configuration:</p>
+
+<ul>
+<li><strong>Activity events</strong> are things like user tracking events, web 
server log events and the like. This kind of stream is typically configured 
with a time-based retention, e.g. a few weeks. Events older than the retention 
period are deleted (or archived in an offline system such as HDFS).</li>
+<li><strong>Database changes</strong> are events that show inserts, updates 
and deletes in a database. In this kind of stream, each event typically has a 
primary key, and a newer event for a key overwrites any older events for the 
same key. If the same key is updated many times, you&rsquo;re only really 
interested in the most recent value. (The <a 
href="../container/state-management.html">changelog streams</a> used by 
Samza&rsquo;s persistent state fall in this category.)</li>
+</ul>
+
+<p>In a database change stream, when you&rsquo;re reprocessing data, you 
typically want to reprocess the entire database. You don&rsquo;t want to miss a 
value just because it was last updated more than a few weeks ago. In other 
words, you don&rsquo;t want change events to be deleted just because they are 
older than some threshold. In this case, when you&rsquo;re jumping back in 
time, you need to rewind to the <em>beginning of time</em>, to the first change 
ever made to the database (known in Kafka as &ldquo;offset 0&rdquo;).</p>
+
+<p>Fortunately this can be done efficiently, using a Kafka feature called <a 
href="http://kafka.apache.org/documentation.html#compaction";>log 
compaction</a>. </p>
+
+<p>For example, imagine your database contains counters: every time something 
happens, you increment the appropriate counters and update the database with 
the new counter values. Every update is sent to the changelog, and because 
there are many updates, the changelog stream will take up a lot of space. With 
log compaction turned on, Kafka deduplicates the stream in the background, 
keeping only the most recent counter value for each key, and deleting any old 
values for the same counter. This reduces the size of the stream so much that 
you can keep the most recent update for every key, even if it was last updated 
long ago.</p>
+
+<p>With log compaction enabled, the stream of database changes becomes a full 
copy of the entire database. By jumping back to offset 0, your Samza job can 
scan over the entire database and reprocess it. This is a very powerful way of 
building scalable applications.</p>
+
+<h3 id="details-of-parallel-rewind">Details of Parallel Rewind</h3>
+
+<p>If you are taking the <em>parallel rewind</em> approach described above, 
running two jobs in parallel, you need to configure them carefully to avoid 
problems. In particular, some things to look out for:</p>
+
+<ul>
+<li>Make sure that the two jobs don&rsquo;t interfere with each other. They 
need different <em>job.name</em> or <em>job.id</em> configuration properties, 
so that each job gets its own checkpoint stream. If the jobs maintain <a 
href="../container/state-management.html">persistent state</a>, each job needs 
its own changelog (two different jobs writing to the same changelog produces 
undefined results).</li>
+<li>What happens to job output? If the job sends its results to an output 
stream, or writes to a database, then the easiest solution is for each job to 
have a separate output stream or database table. If they write to the same 
output, you need to take care to ensure that newer data isn&rsquo;t overwritten 
with older data (due to race conditions between the two jobs).</li>
+<li>Do you need to support A/B testing between the old and the new version of 
your job, e.g. to test whether the new version improves your metrics? Parallel 
rewind is ideal for this: each job writes to a separate output, and clients or 
consumers of the output can read from either the old or the new version&rsquo;s 
output, depending on whether a user is in test group A or B.</li>
+<li>Reclaiming resources: you might want to keep the old version of your job 
running for a while, even when the new version has finished reprocessing 
historical data (especially if the old version&rsquo;s output is being used in 
an A/B test). However, eventually you&rsquo;ll want to shut it down, and delete 
the checkpoint and changelog streams belonging to the old version.</li>
+</ul>
+
+<p>Samza gives you a lot of flexibility for reprocessing historical data, and 
you don&rsquo;t need to program against a separate batch processing API to take 
advantage of it. If you&rsquo;re mindful of these issues, you can build a data 
system that is very robust, but still gives you lots of freedom to change your 
processing logic in future.</p>
+
+<h2 id="application-master-&raquo;"><a 
href="../yarn/application-master.html">Application Master &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/jobs/yarn-jobs.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/yarn-jobs.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/jobs/yarn-jobs.html (added)
+++ incubator/samza/site/learn/documentation/latest/jobs/yarn-jobs.html Fri Aug 
15 05:28:03 2014
@@ -0,0 +1,171 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - YARN Jobs</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/yarn-jobs.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>YARN Jobs</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>When you define 
<code>job.factory.class=org.apache.samza.job.yarn.YarnJobFactory</code> in your 
job&rsquo;s configuration, Samza will use YARN to execute your job. The 
YarnJobFactory will use the YARN_HOME environment variable on the machine that 
run-job.sh is executed on to get the appropriate YARN configuration, which will 
define where the YARN resource manager is. The YarnJob will work with the 
resource manager to get your job started on the YARN cluster.</p>
+
+<p>If you want to use YARN to run your Samza job, you&rsquo;ll also need to 
define the location of your Samza job&rsquo;s package. For example, you might 
say:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">yarn.package.path</span><span class="o">=</span><span 
class="s">http://my.http.server/jobs/ingraphs-package-0.0.55.tgz</span></code></pre></div>
+
+<p>This .tgz file follows the conventions outlined on the <a 
href="packaging.html">Packaging</a> page (it has bin/run-am.sh and 
bin/run-container.sh). YARN NodeManagers will take responsibility for 
downloading this .tgz file on the appropriate machines, and untar&#39;ing them. 
From there, YARN will execute run-am.sh or run-container.sh for the Samza 
Application Master, and SamzaContainer, respectively.</p>
+
+<!-- TODO document yarn.container.count and other key configs -->
+
+<h2 id="logging-&raquo;"><a href="logging.html">Logging &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/operations/kafka.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/operations/kafka.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/operations/kafka.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/operations/kafka.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,171 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Kafka</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/kafka.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Kafka</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!-- TODO kafka page should be fleshed out a bit -->
+
+<!-- TODO when 0.8.1 is released, update with state management config 
information -->
+
+<p>Kafka has a great <a href="http://kafka.apache.org/08/ops.html";>operations 
wiki</a>, which provides some detail on how to operate Kafka at scale.</p>
+
+<h3 id="auto-create-topics">Auto-Create Topics</h3>
+
+<p>Kafka brokers should be configured to automatically create topics. Without 
this, it&rsquo;s going to be very cumbersome to run Samze jobs, since jobs will 
write to arbitrary (and sometimes new) topics.</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">auto.create.topics.enable</span><span class="o">=</span><span 
class="s">true</span></code></pre></div>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/operations/security.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/operations/security.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/operations/security.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/operations/security.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,213 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Security</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/security.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Security</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza provides no security. All security is implemented in the stream 
system, or in the environment that Samza containers run.</p>
+
+<h3 id="securing-streaming-systems">Securing Streaming Systems</h3>
+
+<p>Samza does not provide any security at the stream system level. It is up to 
individual streaming systems to enforce their own security. If a stream system 
requires usernames and passwords in order to consume from specific streams, 
these values must be supplied via configuration, and used at the 
StreamConsumer/StreamConsumerFactory implementation. The same holds true if the 
streaming system uses SSL certificates or Kerberos. The environment in which 
Samza runs must provide the appropriate certificate or Kerberos ticket, and the 
StreamConsumer must be implemented to use these certificates or tickets.</p>
+
+<h4 id="securing-kafka">Securing Kafka</h4>
+
+<p>Kafka provides no security for its topics, and therefore Samza 
doesn&rsquo;t provide any security when using Kafka topics.</p>
+
+<h3 id="securing-samza&#39;s-environment">Securing Samza&rsquo;s 
Environment</h3>
+
+<p>The most important thing to keep in mind when securing an environment that 
Samza containers run in is that <strong>Samza containers execute arbitrary user 
code</strong>. They must considered an adversarial application, and the 
environment must be locked down accordingly.</p>
+
+<h4 id="configuration">Configuration</h4>
+
+<p>Samza reads all configuration at the time a Samza job is started using the 
run-job.sh script. If configuration contains sensitive information, then care 
must be taken to provide the JobRunner with the configuration. This means 
implementing a ConfigFactory that understands the configuration security model, 
and resolves configuration to Samza&rsquo;s Config object in a secure way.</p>
+
+<p>During the duration of a Samza job&rsquo;s execution, the configuration is 
kept in memory. The only time configuration is visible is:</p>
+
+<ol>
+<li>When configuration is resolved using a ConfigFactory.</li>
+<li>The configuration is printed to STDOUT when run-job.sh is run.</li>
+<li>The configuration is written to the logs when a Samza container 
starts.</li>
+</ol>
+
+<p>If configuration contains sensitive data, then these three points must be 
secured.</p>
+
+<h4 id="ports">Ports</h4>
+
+<p>The only port that a Samza container opens by default is an un-secured JMX 
port that is randomly selected at start time. If this is not desired, JMX can 
be disabled through configuration. See the <a 
href="configuration.html">Configuration</a> page for details.</p>
+
+<p>Users might open ports from inside a Samza container. If this is not 
desired, then the user that executes the Samza container must have the 
appropriate permissions revoked, usually using iptables.</p>
+
+<h4 id="logs">Logs</h4>
+
+<p>Samza container logs contain configuration, and might contain arbitrary 
sensitive data logged by the user. A secure log directory must be provided to 
the Samza container.</p>
+
+<h4 id="starting-a-samza-job">Starting a Samza Job</h4>
+
+<p>If operators do not wish to allow Samza containers to be executed by 
arbitrary users, then the mechanism that Samza containers are deployed must 
secured. Usually, this means controlling execution of the run-job.sh script. 
The recommended pattern is to lock down the machines that Samza containers run 
on, and execute run-job.sh from either a blessed web service or special 
machine, and only allow access to the service or machine by specific users.</p>
+
+<h4 id="shell-scripts">Shell Scripts</h4>
+
+<p>Please see the <a href="packaging.html">Packaging</a> section for details 
on the the shell scripts that Samza uses. Samza containers allow users to 
execute arbitrary shell commands, so user permissions must be locked down to 
prevent users from damaging the environment or reading sensitive data.</p>
+
+<h4 id="yarn">YARN</h4>
+
+<!-- TODO make the security page link to the actual YARN security document, 
when we write it. -->
+
+<p>Samza provides out-of-the-box YARN integration. Take a look at 
Samza&rsquo;s YARN Security page for details.</p>
+
+<h2 id="kafka-&raquo;"><a href="kafka.html">Kafka &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>


Reply via email to