Added: incubator/samza/site/learn/documentation/latest/container/event-loop.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/event-loop.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/container/event-loop.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/container/event-loop.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,199 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Event Loop</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/event-loop.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Event Loop</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>The event loop is the <a href="samza-container.html">container</a>&rsquo;s 
single thread that is in charge of <a href="streams.html">reading and writing 
messages</a>, <a href="metrics.html">flushing metrics</a>, <a 
href="checkpointing.html">checkpointing</a>, and <a 
href="windowing.html">windowing</a>.</p>
+
+<p>Samza uses a single thread because every container is designed to use a 
single CPU core; to get more parallelism, simply run more containers. This uses 
a bit more memory than multithreaded parallelism, because each JVM has some 
overhead, but it simplifies resource management and improves isolation between 
jobs. This helps Samza jobs run reliably on a multitenant cluster, where many 
different jobs written by different people are running at the same time.</p>
+
+<p>You are strongly discouraged from using threads in your job&rsquo;s code. 
Samza uses multiple threads internally for communicating with input and output 
streams, but all message processing and user code runs on a single-threaded 
event loop. In general, Samza is not thread-safe.</p>
+
+<h3 id="event-loop-internals">Event Loop Internals</h3>
+
+<p>A container may have multiple <a 
href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">SystemConsumers</a>
 for consuming messages from different input systems. Each SystemConsumer reads 
messages on its own thread, but writes messages into a shared in-process 
message queue. The container uses this queue to funnel all of the messages into 
the event loop.</p>
+
+<p>The event loop works as follows:</p>
+
+<ol>
+<li>Take a message from the incoming message queue;</li>
+<li>Give the message to the appropriate <a href="samza-container.html">task 
instance</a> by calling process() on it;</li>
+<li>Call window() on the task instance if it implements <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>,
 and the window time has expired;</li>
+<li>Send any output from the process() and window() calls to the appropriate 
<a 
href="../api/javadocs/org/apache/samza/system/SystemProducer.html">SystemProducers</a>;</li>
+<li>Write checkpoints for any tasks whose <a href="checkpointing.html">commit 
interval</a> has elapsed.</li>
+</ol>
+
+<p>The container does this, in a loop, until it is shut down. Note that 
although there can be multiple task instances within a container (depending on 
the number of input stream partitions), their process() and window() methods 
are all called on the same thread, never concurrently on different threads.</p>
+
+<h3 id="lifecycle-listeners">Lifecycle Listeners</h3>
+
+<p>Sometimes, you need to run your own code at specific points in a 
task&rsquo;s lifecycle. For example, you might want to set up some context in 
the container whenever a new message arrives, or perform some operations on 
startup or shutdown.</p>
+
+<p>To receive notifications when such events happen, you can implement the <a 
href="../api/javadocs/org/apache/samza/task/TaskLifecycleListenerFactory.html">TaskLifecycleListenerFactory</a>
 interface. It returns a <a 
href="../api/javadocs/org/apache/samza/task/TaskLifecycleListener.html">TaskLifecycleListener</a>,
 whose methods are called by Samza at the appropriate times.</p>
+
+<p>You can then tell Samza to use your lifecycle listener with the following 
properties in your job configuration:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Define 
a listener called &quot;my-listener&quot; by giving the factory class 
name</span>
+<span class="na">task.lifecycle.listener.my-listener.class</span><span 
class="o">=</span><span class="s">com.example.foo.MyListenerFactory</span>
+
+<span class="c"># Enable it in this job (multiple listeners can be separated 
by commas)</span>
+<span class="na">task.lifecycle.listeners</span><span class="o">=</span><span 
class="s">my-listener</span></code></pre></div>
+
+<p>The Samza container creates one instance of your <a 
href="../api/javadocs/org/apache/samza/task/TaskLifecycleListener.html">TaskLifecycleListener</a>.
 If the container has multiple task instances (processing different input 
stream partitions), the beforeInit, afterInit, beforeClose and afterClose 
methods are called for each task instance. The <a 
href="../api/javadocs/org/apache/samza/task/TaskContext.html">TaskContext</a> 
argument of those methods gives you more information about the partitions.</p>
+
+<h2 id="jmx-&raquo;"><a href="jmx.html">JMX &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/container/jmx.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/jmx.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/container/jmx.html (added)
+++ incubator/samza/site/learn/documentation/latest/container/jmx.html Fri Aug 
15 05:28:03 2014
@@ -0,0 +1,176 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - JMX</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/jmx.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>JMX</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza&rsquo;s containers and YARN ApplicationMaster enable <a 
href="http://docs.oracle.com/javase/tutorial/jmx/";>JMX</a> by default. JMX can 
be used for managing the JVM; for example, you can connect to it using <a 
href="http://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html";>jconsole</a>,
 which is included in the JDK.</p>
+
+<p>You can tell Samza to publish its internal <a 
href="metrics.html">metrics</a>, and any custom metrics you define, as JMX 
MBeans. To enable this, set the following properties in your job 
configuration:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Define 
a Samza metrics reporter called &quot;jmx&quot;, which publishes to JMX</span>
+<span class="na">metrics.reporter.jmx.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.metrics.reporter.JmxReporterFactory</span>
+
+<span class="c"># Use it (if you have multiple reporters defined, separate 
them with commas)</span>
+<span class="na">metrics.reporters</span><span class="o">=</span><span 
class="s">jmx</span></code></pre></div>
+
+<p>JMX needs to be configured to use a specific port, but in a distributed 
environment, there is no way of knowing in advance which ports are available on 
the machines running your containers. Therefore Samza chooses the JMX port 
randomly. If you need to connect to it, you can find the port by looking in the 
container&rsquo;s logs, which report the JMX server details as follows:</p>
+<div class="highlight"><pre><code class="language-text" 
data-lang="text">2014-06-02 21:50:17 JmxServer [INFO] According to 
InetAddress.getLocalHost.getHostName we are samza-grid-1234.example.com
+2014-06-02 21:50:17 JmxServer [INFO] Started JmxServer registry port=50214 
server port=50215 
url=service:jmx:rmi://localhost:50215/jndi/rmi://localhost:50214/jmxrmi
+2014-06-02 21:50:17 JmxServer [INFO] If you are tunneling, you might want to 
try JmxServer registry port=50214 server port=50215 
url=service:jmx:rmi://samza-grid-1234.example.com:50215/jndi/rmi://samza-grid-1234.example.com:50214/jmxrmi
+</code></pre></div>
+<h2 id="jobrunner-&raquo;"><a href="../jobs/job-runner.html">JobRunner 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/container/metrics.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/metrics.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/container/metrics.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/container/metrics.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,235 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Metrics</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/metrics.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Metrics</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>When you&rsquo;re running a stream process in production, it&rsquo;s 
important that you have good metrics to track the health of your job. In order 
to make this easy, Samza includes a metrics library. It is used by Samza itself 
to generate some standard metrics such as message throughput, but you can also 
use it in your task code to emit custom metrics.</p>
+
+<p>Metrics can be reported in various ways. You can expose them via <a 
href="jmx.html">JMX</a>, which is useful in development. In production, a 
common setup is for each Samza container to periodically publish its metrics to 
a &ldquo;metrics&rdquo; Kafka topic, in which the metrics from all Samza jobs 
are aggregated. You can then consume this stream in another Samza job, and send 
the metrics to your favorite graphing system such as <a 
href="http://graphite.wikidot.com/";>Graphite</a>.</p>
+
+<p>To set up your job to publish metrics to Kafka, you can use the following 
configuration:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Define 
a metrics reporter called &quot;snapshot&quot;, which publishes metrics</span>
+<span class="c"># every 60 seconds.</span>
+<span class="na">metrics.reporters</span><span class="o">=</span><span 
class="s">snapshot</span>
+<span class="na">metrics.reporter.snapshot.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.metrics.reporter.MetricsSnapshotReporterFactory</span>
+
+<span class="c"># Tell the snapshot reporter to publish to a topic called 
&quot;metrics&quot;</span>
+<span class="c"># in the &quot;kafka&quot; system.</span>
+<span class="na">metrics.reporter.snapshot.stream</span><span 
class="o">=</span><span class="s">kafka.metrics</span>
+
+<span class="c"># Encode metrics data as JSON.</span>
+<span class="na">serializers.registry.metrics.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.MetricsSnapshotSerdeFactory</span>
+<span class="na">systems.kafka.streams.metrics.samza.msg.serde</span><span 
class="o">=</span><span class="s">metrics</span></code></pre></div>
+
+<p>With this configuration, the job automatically sends several JSON-encoded 
messages to the &ldquo;metrics&rdquo; topic in Kafka every 60 seconds. The 
messages look something like this:</p>
+
+<div class="highlight"><pre><code class="json"><span class="p">{</span>
+  <span class="nt">&quot;header&quot;</span><span class="p">:</span> <span 
class="p">{</span>
+    <span class="nt">&quot;container-name&quot;</span><span class="p">:</span> 
<span class="s2">&quot;samza-container-0&quot;</span><span class="p">,</span>
+    <span class="nt">&quot;host&quot;</span><span class="p">:</span> <span 
class="s2">&quot;samza-grid-1234.example.com&quot;</span><span 
class="p">,</span>
+    <span class="nt">&quot;job-id&quot;</span><span class="p">:</span> <span 
class="s2">&quot;1&quot;</span><span class="p">,</span>
+    <span class="nt">&quot;job-name&quot;</span><span class="p">:</span> <span 
class="s2">&quot;my-samza-job&quot;</span><span class="p">,</span>
+    <span class="nt">&quot;reset-time&quot;</span><span class="p">:</span> 
<span class="mi">1401729000347</span><span class="p">,</span>
+    <span class="nt">&quot;samza-version&quot;</span><span class="p">:</span> 
<span class="s2">&quot;0.0.1&quot;</span><span class="p">,</span>
+    <span class="nt">&quot;source&quot;</span><span class="p">:</span> <span 
class="s2">&quot;Partition-2&quot;</span><span class="p">,</span>
+    <span class="nt">&quot;time&quot;</span><span class="p">:</span> <span 
class="mi">1401729420566</span><span class="p">,</span>
+    <span class="nt">&quot;version&quot;</span><span class="p">:</span> <span 
class="s2">&quot;0.0.1&quot;</span>
+  <span class="p">},</span>
+  <span class="nt">&quot;metrics&quot;</span><span class="p">:</span> <span 
class="p">{</span>
+    <span 
class="nt">&quot;org.apache.samza.container.TaskInstanceMetrics&quot;</span><span
 class="p">:</span> <span class="p">{</span>
+      <span class="nt">&quot;commit-calls&quot;</span><span class="p">:</span> 
<span class="mi">7</span><span class="p">,</span>
+      <span class="nt">&quot;commit-skipped&quot;</span><span 
class="p">:</span> <span class="mi">77948</span><span class="p">,</span>
+      <span class="nt">&quot;kafka-input-topic-offset&quot;</span><span 
class="p">:</span> <span class="s2">&quot;1606&quot;</span><span 
class="p">,</span>
+      <span class="nt">&quot;messages-sent&quot;</span><span 
class="p">:</span> <span class="mi">985</span><span class="p">,</span>
+      <span class="nt">&quot;process-calls&quot;</span><span 
class="p">:</span> <span class="mi">1093</span><span class="p">,</span>
+      <span class="nt">&quot;send-calls&quot;</span><span class="p">:</span> 
<span class="mi">985</span><span class="p">,</span>
+      <span class="nt">&quot;send-skipped&quot;</span><span class="p">:</span> 
<span class="mi">76970</span><span class="p">,</span>
+      <span class="nt">&quot;window-calls&quot;</span><span class="p">:</span> 
<span class="mi">0</span><span class="p">,</span>
+      <span class="nt">&quot;window-skipped&quot;</span><span 
class="p">:</span> <span class="mi">77955</span>
+    <span class="p">}</span>
+  <span class="p">}</span>
+<span class="p">}</span></code></pre></div>
+
+<p>There is a separate message for each task instance, and the header tells 
you the job name, job ID and partition of the task. The metrics allow you to 
see how many messages have been processed and sent, the current offset in the 
input stream partition, and other details. There are additional messages which 
give you metrics about the JVM (heap size, garbage collection information, 
threads etc.), internal metrics of the Kafka producers and consumers, and 
more.</p>
+
+<p>It&rsquo;s easy to generate custom metrics in your job, if there&rsquo;s 
some value you want to keep an eye on. You can use Samza&rsquo;s built-in 
metrics framework, which is similar in design to Coda Hale&rsquo;s <a 
href="http://metrics.codahale.com/";>metrics</a> library. </p>
+
+<p>You can register your custom metrics through a <a 
href="../api/javadocs/org/apache/samza/metrics/MetricsRegistry.html">MetricsRegistry</a>.
 Your stream task needs to implement <a 
href="../api/javadocs/org/apache/samza/task/InitableTask.html">InitableTask</a>,
 so that you can get the metrics registry from the <a 
href="../api/javadocs/org/apache/samza/task/TaskContext.html">TaskContext</a>. 
This simple example shows how to count the number of messages processed by your 
task:</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">class</span> <span class="nc">MyJavaStreamTask</span> <span 
class="kd">implements</span> <span class="n">StreamTask</span><span 
class="o">,</span> <span class="n">InitableTask</span> <span class="o">{</span>
+  <span class="kd">private</span> <span class="n">Counter</span> <span 
class="n">messageCount</span><span class="o">;</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">init</span><span class="o">(</span><span class="n">Config</span> 
<span class="n">config</span><span class="o">,</span> <span 
class="n">TaskContext</span> <span class="n">context</span><span 
class="o">)</span> <span class="o">{</span>
+    <span class="k">this</span><span class="o">.</span><span 
class="na">messageCount</span> <span class="o">=</span> <span 
class="n">context</span>
+      <span class="o">.</span><span class="na">getMetricsRegistry</span><span 
class="o">()</span>
+      <span class="o">.</span><span class="na">newCounter</span><span 
class="o">(</span><span class="n">getClass</span><span 
class="o">().</span><span class="na">getName</span><span class="o">(),</span> 
<span class="s">&quot;message-count&quot;</span><span class="o">);</span>
+  <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">process</span><span class="o">(</span><span 
class="n">IncomingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">,</span>
+                      <span class="n">MessageCollector</span> <span 
class="n">collector</span><span class="o">,</span>
+                      <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">messageCount</span><span class="o">.</span><span 
class="na">inc</span><span class="o">();</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Samza currently supports two kind of metrics: <a 
href="../api/javadocs/org/apache/samza/metrics/Counter.html">counters</a> and 
<a href="../api/javadocs/org/apache/samza/metrics/Gauge.html">gauges</a>. Use a 
counter when you want to track how often something occurs, and a gauge when you 
want to report the level of something, such as the size of a buffer. Each task 
instance (for each input stream partition) gets its own set of metrics.</p>
+
+<p>If you want to report metrics in some other way, e.g. directly to a 
graphing system (without going via Kafka), you can implement a <a 
href="../api/javadocs/org/apache/samza/metrics/MetricsReporterFactory.html">MetricsReporterFactory</a>
 and reference it in your job configuration.</p>
+
+<h2 id="windowing-&raquo;"><a href="windowing.html">Windowing &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: 
incubator/samza/site/learn/documentation/latest/container/samza-container.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/samza-container.html?rev=1618097&view=auto
==============================================================================
--- 
incubator/samza/site/learn/documentation/latest/container/samza-container.html 
(added)
+++ 
incubator/samza/site/learn/documentation/latest/container/samza-container.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,244 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - SamzaContainer</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/samza-container.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>SamzaContainer</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>The SamzaContainer is responsible for managing the startup, execution, and 
shutdown of one or more <a href="../api/overview.html">StreamTask</a> 
instances. Each SamzaContainer typically runs as an indepentent Java virtual 
machine. A Samza job can consist of several SamzaContainers, potentially 
running on different machines.</p>
+
+<p>When a SamzaContainer starts up, it does the following:</p>
+
+<ol>
+<li>Get last checkpointed offset for each input stream partition that it 
consumes</li>
+<li>Create a &ldquo;reader&rdquo; thread for every input stream partition that 
it consumes</li>
+<li>Start metrics reporters to report metrics</li>
+<li>Start a checkpoint timer to save your task&rsquo;s input stream offsets 
every so often</li>
+<li>Start a window timer to trigger your task&rsquo;s <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">window 
method</a>, if it is defined</li>
+<li>Instantiate and initialize your StreamTask once for each input stream 
partition</li>
+<li>Start an event loop that takes messages from the input stream reader 
threads, and gives them to your StreamTasks</li>
+<li>Notify lifecycle listeners during each one of these steps</li>
+</ol>
+
+<p>Let&rsquo;s start in the middle, with the instantiation of a StreamTask. 
The following sections of the documentation cover the other steps.</p>
+
+<h3 id="tasks-and-partitions">Tasks and Partitions</h3>
+
+<p>When the container starts, it creates instances of the <a 
href="../api/overview.html">task class</a> that you&rsquo;ve written. If the 
task class implements the <a 
href="../api/javadocs/org/apache/samza/task/InitableTask.html">InitableTask</a> 
interface, the SamzaContainer will also call the init() method.</p>
+
+<div class="highlight"><pre><code class="java"><span class="cm">/** Implement 
this if you want a callback when your task starts up. */</span>
+<span class="kd">public</span> <span class="kd">interface</span> <span 
class="nc">InitableTask</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">init</span><span 
class="o">(</span><span class="n">Config</span> <span 
class="n">config</span><span class="o">,</span> <span 
class="n">TaskContext</span> <span class="n">context</span><span 
class="o">);</span>
+<span class="o">}</span></code></pre></div>
+
+<p>By default, how many instances of your task class are created depends on 
the number of partitions in the job&rsquo;s input streams. If your Samza job 
has ten partitions, there will be ten instantiations of your task class: one 
for each partition. The first task instance will receive all messages for 
partition one, the second instance will receive all messages for partition two, 
and so on.</p>
+
+<p><img 
src="/img/latest/learn/documentation/container/tasks-and-partitions.svg" 
alt="Illustration of tasks consuming partitions" class="diagram-large"></p>
+
+<p>The number of partitions in the input streams is determined by the systems 
from which you are consuming. For example, if your input system is Kafka, you 
can specify the number of partitions when you create a topic from the command 
line or using the num.partitions in Kafka&rsquo;s server properties file.</p>
+
+<p>If a Samza job has more than one input stream, the number of task instances 
for the Samza job is the maximum number of partitions across all input streams. 
For example, if a Samza job is reading from PageViewEvent (12 partitions), and 
ServiceMetricEvent (14 partitions), then the Samza job would have 14 task 
instances (numbered 0 through 13). Task instances 12 and 13 only receive events 
from ServiceMetricEvent, because there is no corresponding PageViewEvent 
partition.</p>
+
+<p>With this default approach to assigning input streams to task instances, 
Samza is effectively performing a group-by operation on the input streams with 
their partitions as the key. Other strategies for grouping input stream 
partitions are possible by implementing a new <a 
href="../api/javadocs/org/apache/samza/container/SystemStreamPartitionGrouper.html">SystemStreamPartitionGrouper</a>
 and factory, and configuring the job to use it via the 
job.systemstreampartition.grouper.factory configuration value.</p>
+
+<p>Samza provides the above-discussed per-partition grouper as well as the <a 
href="../api/javadocs/org/apache/samza/container/systemstreampartition/groupers/GroupBySystemStreamPartition">GroupBySystemStreamPartitionGrouper</a>,
 which provides a separate task class instance for every input stream 
partition, effectively grouping by the input stream itself. This provides 
maximum scalability in terms of how many containers can be used to process 
those input streams and is appropriate for very high volume jobs that need no 
grouping of the input streams.</p>
+
+<p>Considering the above example of a PageViewEvent partitioned 12 ways and a 
ServiceMetricEvent partitioned 14 ways, the GroupBySystemStreamPartitionGrouper 
would create 12 + 14 = 26 task instances, which would then be distributed 
across the number of containers configured, as discussed below.</p>
+
+<p>Note that once a job has been started using a particular 
SystemStreamPartitionGrouper and that job is using state or checkpointing, it 
is not possible to change that grouping in subsequent job starts, as the 
previous checkpoints and state information would likely be incorrect under the 
new grouping approach.</p>
+
+<h3 id="containers-and-resource-allocation">Containers and resource 
allocation</h3>
+
+<p>Although the number of task instances is fixed &mdash; determined by the 
number of input partitions &mdash; you can configure how many containers you 
want to use for your job. If you are <a href="../jobs/yarn-jobs.html">using 
YARN</a>, the number of containers determines what CPU and memory resources are 
allocated to your job.</p>
+
+<p>If the data volume on your input streams is small, it might be sufficient 
to use just one SamzaContainer. In that case, Samza still creates one task 
instance per input partition, but all those tasks run within the same 
container. At the other extreme, you can create as many containers as you have 
partitions, and Samza will assign one task instance to each container.</p>
+
+<p>Each SamzaContainer is designed to use one CPU core, so it uses a <a 
href="event-loop.html">single-threaded event loop</a> for execution. It&rsquo;s 
not advisable to create your own threads within a SamzaContainer. If you need 
more parallelism, please configure your job to use more containers.</p>
+
+<p>Any <a href="state-management.html">state</a> in your job belongs to a task 
instance, not to a container. This is a key design decision for Samza&rsquo;s 
scalability: as your job&rsquo;s resource requirements grow and shrink, you can 
simply increase or decrease the number of containers, but the number of task 
instances remains unchanged. As you scale up or down, the same state remains 
attached to each task instance. Task instances may be moved from one container 
to another, and any persistent state managed by Samza will be moved with it. 
This allows the job&rsquo;s processing semantics to remain unchanged, even as 
you change the job&rsquo;s parallelism.</p>
+
+<h3 id="joining-multiple-input-streams">Joining multiple input streams</h3>
+
+<p>If your job has multiple input streams, Samza provides a simple but 
powerful mechanism for joining data from different streams: each task instance 
receives messages from one partition of <em>each</em> of the input streams. For 
example, say you have two input streams, A and B, each with four partitions. 
Samza creates four task instances to process them, and assigns the partitions 
as follows:</p>
+
+<table class="table table-condensed table-bordered table-striped">
+  <thead>
+    <tr>
+      <th>Task instance</th>
+      <th>Consumes stream partitions</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>0</td><td>stream A partition 0, stream B partition 0</td>
+    </tr>
+    <tr>
+      <td>1</td><td>stream A partition 1, stream B partition 1</td>
+    </tr>
+    <tr>
+      <td>2</td><td>stream A partition 2, stream B partition 2</td>
+    </tr>
+    <tr>
+      <td>3</td><td>stream A partition 3, stream B partition 3</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Thus, if you want two events in different streams to be processed by the 
same task instance, you need to ensure they are sent to the same partition 
number. You can achieve this by using the same partitioning key when <a 
href="../api/overview.html">sending the messages</a>. Joining streams is 
discussed in detail in the <a href="state-management.html">state management</a> 
section.</p>
+
+<p>There is one caveat in all of this: Samza currently assumes that a 
stream&rsquo;s partition count will never change. Partition splitting or 
repartitioning is not supported. If an input stream has N partitions, it is 
expected that it has always had, and will always have N partitions. If you want 
to re-partition a stream, you can write a job that reads messages from the 
stream, and writes them out to a new stream with the required number of 
partitions. For example, you could read messages from PageViewEvent, and write 
them to PageViewEventRepartition.</p>
+
+<h2 id="streams-&raquo;"><a href="streams.html">Streams &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: 
incubator/samza/site/learn/documentation/latest/container/serialization.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/serialization.html?rev=1618097&view=auto
==============================================================================
--- 
incubator/samza/site/learn/documentation/latest/container/serialization.html 
(added)
+++ 
incubator/samza/site/learn/documentation/latest/container/serialization.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,203 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Serialization</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/serialization.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Serialization</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Every message that is read from or written to a <a 
href="streams.html">stream</a> or a <a href="state-management.html">persistent 
state store</a> needs to eventually be serialized to bytes (which are sent over 
the network or written to disk). There are various places where that 
serialization and deserialization can happen:</p>
+
+<ol>
+<li>In the client library: for example, the library for publishing to Kafka 
and consuming from Kafka supports pluggable serialization.</li>
+<li>In the task implementation: your <a href="../api/overview.html">process 
method</a> can use raw byte arrays as inputs and outputs, and do any parsing 
and serialization itself.</li>
+<li>Between the two: Samza provides a layer of serializers and deserializers, 
or <em>serdes</em> for short.</li>
+</ol>
+
+<p>You can use whatever makes sense for your job; Samza doesn&rsquo;t impose 
any particular data model or serialization scheme on you. However, the cleanest 
solution is usually to use Samza&rsquo;s serde layer. The following 
configuration example shows how to use it.</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Define 
a system called &quot;kafka&quot;</span>
+<span class="na">systems.kafka.samza.factory</span><span 
class="o">=</span><span 
class="s">org.apache.samza.system.kafka.KafkaSystemFactory</span>
+
+<span class="c"># The job is going to consume a topic called 
&quot;PageViewEvent&quot; from the &quot;kafka&quot; system</span>
+<span class="na">task.inputs</span><span class="o">=</span><span 
class="s">kafka.PageViewEvent</span>
+
+<span class="c"># Define a serde called &quot;json&quot; which 
parses/serializes JSON objects</span>
+<span class="na">serializers.registry.json.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.JsonSerdeFactory</span>
+
+<span class="c"># Define a serde called &quot;integer&quot; which encodes an 
integer as 4 binary bytes (big-endian)</span>
+<span class="na">serializers.registry.integer.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.IntegerSerdeFactory</span>
+
+<span class="c"># For messages in the &quot;PageViewEvent&quot; topic, the key 
(the ID of the user viewing the page)</span>
+<span class="c"># is encoded as a binary integer, and the message is encoded 
as JSON.</span>
+<span 
class="na">systems.kafka.streams.PageViewEvent.samza.key.serde</span><span 
class="o">=</span><span class="s">integer</span>
+<span 
class="na">systems.kafka.streams.PageViewEvent.samza.msg.serde</span><span 
class="o">=</span><span class="s">json</span>
+
+<span class="c"># Define a key-value store which stores the most recent page 
view for each user ID.</span>
+<span class="c"># Again, the key is an integer user ID, and the value is 
JSON.</span>
+<span class="na">stores.LastPageViewPerUser.factory</span><span 
class="o">=</span><span 
class="s">org.apache.samza.storage.kv.KeyValueStorageEngineFactory</span>
+<span class="na">stores.LastPageViewPerUser.changelog</span><span 
class="o">=</span><span class="s">kafka.last-page-view-per-user</span>
+<span class="na">stores.LastPageViewPerUser.key.serde</span><span 
class="o">=</span><span class="s">integer</span>
+<span class="na">stores.LastPageViewPerUser.msg.serde</span><span 
class="o">=</span><span class="s">json</span></code></pre></div>
+
+<p>Each serde is defined with a factory class. Samza comes with several 
builtin serdes for UTF-8 strings, binary-encoded integers, JSON (requires the 
samza-serializers dependency) and more. You can also create your own serializer 
by implementing the <a 
href="../api/javadocs/org/apache/samza/serializers/SerdeFactory.html">SerdeFactory</a>
 interface.</p>
+
+<p>The name you give to a serde (such as &ldquo;json&rdquo; and 
&ldquo;integer&rdquo; in the example above) is only for convenience in your job 
configuration; you can choose whatever name you like. For each stream and each 
state store, you can use the serde name to declare how messages should be 
serialized and deserialized.</p>
+
+<p>If you don&rsquo;t declare a serde, Samza simply passes objects through 
between your task instance and the system stream. In that case your task needs 
to send and receive whatever type of object the underlying client library 
uses.</p>
+
+<p>All the Samza APIs for sending and receiving messages are typed as 
<em>Object</em>. This means that you have to cast messages to the correct type 
before you can use them. It&rsquo;s a little bit more code, but it has the 
advantage that Samza is not restricted to any particular data model.</p>
+
+<h2 id="checkpointing-&raquo;"><a href="checkpointing.html">Checkpointing 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>


Reply via email to