Modified: incubator/samza/site/learn/documentation/0.7.0/container/streams.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/container/streams.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/container/streams.html 
(original)
+++ incubator/samza/site/learn/documentation/0.7.0/container/streams.html Wed 
Jul  9 16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -123,48 +124,49 @@
 -->
 
 <p>The <a href="samza-container.html">samza container</a> reads and writes 
messages using the <a 
href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">SystemConsumer</a>
 and <a 
href="../api/javadocs/org/apache/samza/system/SystemProducer.html">SystemProducer</a>
 interfaces. You can integrate any message broker with Samza by implementing 
these two interfaces.</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">public interface SystemConsumer {
-  void start();
 
-  void stop();
+<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">interface</span> <span 
class="nc">SystemConsumer</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span 
class="o">();</span>
 
-  void register(
-      SystemStreamPartition systemStreamPartition,
-      String lastReadOffset);
+  <span class="kt">void</span> <span class="nf">stop</span><span 
class="o">();</span>
 
-  List&lt;IncomingMessageEnvelope&gt; poll(
-      Map&lt;SystemStreamPartition, Integer&gt; systemStreamPartitions,
-      long timeout)
-    throws InterruptedException;
-}
+  <span class="kt">void</span> <span class="nf">register</span><span 
class="o">(</span>
+      <span class="n">SystemStreamPartition</span> <span 
class="n">systemStreamPartition</span><span class="o">,</span>
+      <span class="n">String</span> <span class="n">lastReadOffset</span><span 
class="o">);</span>
 
-public class IncomingMessageEnvelope {
-  public Object getMessage() { ... }
+  <span class="n">List</span><span class="o">&lt;</span><span 
class="n">IncomingMessageEnvelope</span><span class="o">&gt;</span> <span 
class="nf">poll</span><span class="o">(</span>
+      <span class="n">Map</span><span class="o">&lt;</span><span 
class="n">SystemStreamPartition</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">systemStreamPartitions</span><span class="o">,</span>
+      <span class="kt">long</span> <span class="n">timeout</span><span 
class="o">)</span>
+    <span class="kd">throws</span> <span 
class="n">InterruptedException</span><span class="o">;</span>
+<span class="o">}</span>
 
-  public Object getKey() { ... }
+<span class="kd">public</span> <span class="kd">class</span> <span 
class="nc">IncomingMessageEnvelope</span> <span class="o">{</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
 
-  public SystemStreamPartition getSystemStreamPartition() { ... }
-}
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
 
-public interface SystemProducer {
-  void start();
+  <span class="kd">public</span> <span class="n">SystemStreamPartition</span> 
<span class="nf">getSystemStreamPartition</span><span class="o">()</span> <span 
class="o">{</span> <span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span>
 
-  void stop();
+<span class="kd">public</span> <span class="kd">interface</span> <span 
class="nc">SystemProducer</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span 
class="o">();</span>
 
-  void register(String source);
+  <span class="kt">void</span> <span class="nf">stop</span><span 
class="o">();</span>
 
-  void send(String source, OutgoingMessageEnvelope envelope);
+  <span class="kt">void</span> <span class="nf">register</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">);</span>
 
-  void flush(String source);
-}
+  <span class="kt">void</span> <span class="nf">send</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">,</span> <span 
class="n">OutgoingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">);</span>
 
-public class OutgoingMessageEnvelope {
-  ...
-  public Object getKey() { ... }
+  <span class="kt">void</span> <span class="nf">flush</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">);</span>
+<span class="o">}</span>
+
+<span class="kd">public</span> <span class="kd">class</span> <span 
class="nc">OutgoingMessageEnvelope</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
 
-  public Object getMessage() { ... }
-}
-</code></pre></div>
 <p>Out of the box, Samza supports Kafka (KafkaSystemConsumer and 
KafkaSystemProducer). However, any message bus system can be plugged in, as 
long as it can provide the semantics required by Samza, as described in the <a 
href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">javadoc</a>.</p>
 
 <p>SystemConsumers and SystemProducers may read and write messages of any data 
type. It&rsquo;s ok if they only support byte arrays &mdash; Samza has a 
separate <a href="serialization.html">serialization layer</a> which converts to 
and from objects that application code can use. Samza does not prescribe any 
particular data model or serialization format.</p>
@@ -182,16 +184,18 @@ public class OutgoingMessageEnvelope {
 <p>When a Samza container has several incoming messages on different stream 
partitions, how does it decide which to process first? The behavior is 
determined by a <a 
href="../api/javadocs/org/apache/samza/system/chooser/MessageChooser.html">MessageChooser</a>.
 The default chooser is RoundRobinChooser, but you can override it by 
implementing a custom chooser.</p>
 
 <p>To plug in your own message chooser, you need to implement the <a 
href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a>
 interface, and set the &ldquo;task.chooser.class&rdquo; configuration to the 
fully-qualified class name of your implementation:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">task.chooser.class=com.example.samza.YourMessageChooserFactory
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-jproperties" 
data-lang="jproperties"><span class="na">task.chooser.class</span><span 
class="o">=</span><span 
class="s">com.example.samza.YourMessageChooserFactory</span></code></pre></div>
+
 <h4 id="prioritizing-input-streams">Prioritizing input streams</h4>
 
 <p>There are certain times when messages from one stream should be processed 
with higher priority than messages from another stream. For example, some Samza 
jobs consume two streams: one stream is fed by a real-time system and the other 
stream is fed by a batch system. In this case, it&rsquo;s useful to prioritize 
the real-time stream over the batch stream, so that the real-time processing 
doesn&rsquo;t slow down if there is a sudden burst of data on the batch 
stream.</p>
 
 <p>Samza provides a mechanism to prioritize one stream over another by setting 
this configuration parameter: 
systems.&lt;system&gt;.streams.&lt;stream&gt;.samza.priority=&lt;number&gt;. 
For example:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">systems.kafka.streams.my-real-time-stream.samza.priority=2
-systems.kafka.streams.my-batch-stream.samza.priority=1
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-jproperties" 
data-lang="jproperties"><span 
class="na">systems.kafka.streams.my-real-time-stream.samza.priority</span><span 
class="o">=</span><span class="s">2</span>
+<span 
class="na">systems.kafka.streams.my-batch-stream.samza.priority</span><span 
class="o">=</span><span class="s">1</span></code></pre></div>
+
 <p>This declares that my-real-time-stream&rsquo;s messages should be processed 
with higher priority than my-batch-stream&rsquo;s messages. If 
my-real-time-stream has any messages available, they are processed first. Only 
if there are no messages currently waiting on my-real-time-stream, the Samza 
job continues processing my-batch-stream.</p>
 
 <p>Each priority level gets its own MessageChooser. It is valid to define two 
streams with the same priority. If messages are available from two streams at 
the same priority level, it&rsquo;s up to the MessageChooser for that priority 
level to decide which message should be processed first.</p>
@@ -207,10 +211,11 @@ systems.kafka.streams.my-batch-stream.sa
 <p>Another difference between a bootstrap stream and a high-priority stream is 
that the bootstrap stream&rsquo;s special treatment is temporary: when it has 
been fully consumed (we say it has &ldquo;caught up&rdquo;), its priority drops 
to be the same as all the other input streams.</p>
 
 <p>To configure a stream called &ldquo;my-bootstrap-stream&rdquo; to be a 
fully-consumed bootstrap stream, use the following settings:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">systems.kafka.streams.my-bootstrap-stream.samza.bootstrap=true
-systems.kafka.streams.my-bootstrap-stream.samza.reset.offset=true
-systems.kafka.streams.my-bootstrap-stream.samza.offset.default=oldest
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-jproperties" 
data-lang="jproperties"><span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.bootstrap</span><span
 class="o">=</span><span class="s">true</span>
+<span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.reset.offset</span><span
 class="o">=</span><span class="s">true</span>
+<span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.offset.default</span><span
 class="o">=</span><span class="s">oldest</span></code></pre></div>
+
 <p>The bootstrap=true parameter enables the bootstrap behavior (prioritization 
over other streams). The combination of reset.offset=true and 
offset.default=oldest tells Samza to always start reading the stream from the 
oldest offset, every time a container starts up (rather than starting to read 
from the most recent checkpoint).</p>
 
 <p>It is valid to define multiple bootstrap streams. In this case, the order 
in which they are bootstrapped is determined by the priority.</p>
@@ -220,8 +225,9 @@ systems.kafka.streams.my-bootstrap-strea
 <p>In some cases, you can improve performance by consuming several messages 
from the same stream partition in sequence. Samza supports this mode of 
operation, called <em>batching</em>.</p>
 
 <p>For example, if you want to read 100 messages in a row from each stream 
partition (regardless of the MessageChooser), you can use this configuration 
parameter:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">task.consumer.batch.size=100
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-jproperties" 
data-lang="jproperties"><span class="na">task.consumer.batch.size</span><span 
class="o">=</span><span class="s">100</span></code></pre></div>
+
 <p>With this setting, Samza tries to read a message from the most recently 
used <a 
href="../api/javadocs/org/apache/samza/system/SystemStreamPartition.html">SystemStreamPartition</a>.
 This behavior continues either until no more messages are available for that 
SystemStreamPartition, or until the batch size has been reached. When that 
happens, Samza defers to the MessageChooser to determine the next message to 
process. It then again tries to continue consume from the chosen 
message&rsquo;s SystemStreamPartition until the batch size is reached.</p>
 
 <h2 id="serialization-&raquo;"><a href="serialization.html">Serialization 
&raquo;</a></h2>

Modified: 
incubator/samza/site/learn/documentation/0.7.0/container/windowing.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/container/windowing.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/container/windowing.html 
(original)
+++ incubator/samza/site/learn/documentation/0.7.0/container/windowing.html Wed 
Jul  9 16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -125,32 +126,34 @@
 <p>Sometimes a stream processing job needs to do something in regular time 
intervals, regardless of how many incoming messages the job is processing. For 
example, say you want to report the number of page views per minute. To do 
this, you increment a counter every time you see a page view event. Once per 
minute, you send the current counter value to an output stream and reset the 
counter to zero.</p>
 
 <p>Samza&rsquo;s <em>windowing</em> feature provides a way for tasks to do 
something in regular time intervals, for example once per minute. To enable 
windowing, you just need to set one property in your job configuration:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text"># 
Call the window() method every 60 seconds
-task.window.ms=60000
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-jproperties" 
data-lang="jproperties"><span class="c"># Call the window() method every 60 
seconds</span>
+<span class="na">task.window.ms</span><span class="o">=</span><span 
class="s">60000</span></code></pre></div>
+
 <p>Next, your stream task needs to implement the <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>
 interface. This interface defines a window() method which is called by Samza 
in the regular interval that you configured.</p>
 
 <p>For example, this is how you would implement a basic per-minute event 
counter:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">public class EventCounterTask implements StreamTask, 
WindowableTask {
 
-  public static final SystemStream OUTPUT_STREAM =
-    new SystemStream(&quot;kafka&quot;, &quot;events-per-minute&quot;);
+<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kd">class</span> <span 
class="nc">EventCounterTask</span> <span class="kd">implements</span> <span 
class="n">StreamTask</span><span class="o">,</span> <span 
class="n">WindowableTask</span> <span class="o">{</span>
+
+  <span class="kd">public</span> <span class="kd">static</span> <span 
class="kd">final</span> <span class="n">SystemStream</span> <span 
class="n">OUTPUT_STREAM</span> <span class="o">=</span>
+    <span class="k">new</span> <span class="nf">SystemStream</span><span 
class="o">(</span><span class="s">&quot;kafka&quot;</span><span 
class="o">,</span> <span class="s">&quot;events-per-minute&quot;</span><span 
class="o">);</span>
+
+  <span class="kd">private</span> <span class="kt">int</span> <span 
class="n">eventsSeen</span> <span class="o">=</span> <span 
class="mi">0</span><span class="o">;</span>
 
-  private int eventsSeen = 0;
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">process</span><span class="o">(</span><span 
class="n">IncomingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">,</span>
+                      <span class="n">MessageCollector</span> <span 
class="n">collector</span><span class="o">,</span>
+                      <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">eventsSeen</span><span class="o">++;</span>
+  <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">window</span><span class="o">(</span><span 
class="n">MessageCollector</span> <span class="n">collector</span><span 
class="o">,</span>
+                     <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">collector</span><span class="o">.</span><span 
class="na">send</span><span class="o">(</span><span class="k">new</span> <span 
class="nf">OutgoingMessageEnvelope</span><span class="o">(</span><span 
class="n">OUTPUT_STREAM</span><span class="o">,</span> <span 
class="n">eventsSeen</span><span class="o">));</span>
+    <span class="n">eventsSeen</span> <span class="o">=</span> <span 
class="mi">0</span><span class="o">;</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
 
-  public void process(IncomingMessageEnvelope envelope,
-                      MessageCollector collector,
-                      TaskCoordinator coordinator) {
-    eventsSeen++;
-  }
-
-  public void window(MessageCollector collector,
-                     TaskCoordinator coordinator) {
-    collector.send(new OutgoingMessageEnvelope(OUTPUT_STREAM, eventsSeen));
-    eventsSeen = 0;
-  }
-}
-</code></pre></div>
 <p>If you need to send messages to output streams, you can use the <a 
href="../api/javadocs/org/apache/samza/task/MessageCollector.html">MessageCollector</a>
 object passed to the window() method. Please only use that MessageCollector 
object for sending messages, and don&rsquo;t use it outside of the call to 
window().</p>
 
 <p>Note that Samza uses <a href="event-loop.html">single-threaded 
execution</a>, so the window() call can never happen concurrently with a 
process() call. This has the advantage that you don&rsquo;t need to worry about 
thread safety in your code (no need to synchronize anything), but the downside 
that the window() call may be delayed if your process() method takes a long 
time to return.</p>

Modified: incubator/samza/site/learn/documentation/0.7.0/index.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/index.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/index.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/index.html Wed Jul  9 
16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -172,6 +173,7 @@
   <li><a href="jobs/packaging.html">Packaging</a></li>
   <li><a href="jobs/yarn-jobs.html">YARN Jobs</a></li>
   <li><a href="jobs/logging.html">Logging</a></li>
+  <li><a href="jobs/reprocessing.html">Reprocessing</a></li>
 </ul>
 
 <h4>YARN</h4>

Modified: 
incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- 
incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html 
(original)
+++ 
incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html 
Wed Jul  9 16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -201,8 +202,9 @@
 <h3 id="example">Example</h3>
 
 <p>Let&rsquo;s take a look at a real example: suppose we want to count the 
number of page views. In SQL, you would write something like:</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">SELECT user_id, COUNT(*) FROM PageViewEvent GROUP BY user_id
-</code></pre></div>
+
+<div class="highlight"><pre><code class="language-sql" data-lang="sql"><span 
class="k">SELECT</span> <span class="n">user_id</span><span class="p">,</span> 
<span class="k">COUNT</span><span class="p">(</span><span 
class="o">*</span><span class="p">)</span> <span class="k">FROM</span> <span 
class="n">PageViewEvent</span> <span class="k">GROUP</span> <span 
class="k">BY</span> <span class="n">user_id</span></code></pre></div>
+
 <p>Although Samza doesn&rsquo;t support SQL right now, the idea is the same. 
Two jobs are required to calculate this query: one to group messages by user 
ID, and the other to do the counting.</p>
 
 <p>In the first job, the grouping is done by sending all messages with the 
same user ID to the same partition of an intermediate topic. You can do this by 
using the user ID as key of the messages that are emitted by the first job, and 
this key is mapped to one of the intermediate topic&rsquo;s partitions (usually 
by taking a hash of the key mod the number of partitions). The second job 
consumes the intermediate topic. Each task in the second job consumes one 
partition of the intermediate topic, i.e. all the messages for a subset of user 
IDs. The task has a counter for each user ID in its partition, and the 
appropriate counter is incremented every time the task receives a message with 
a particular user ID.</p>

Modified: 
incubator/samza/site/learn/documentation/0.7.0/introduction/background.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/background.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/introduction/background.html 
(original)
+++ incubator/samza/site/learn/documentation/0.7.0/introduction/background.html 
Wed Jul  9 16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: 
incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html?rev=1609232&r1=1609231&r2=1609232&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html 
(original)
+++ incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html 
Wed Jul  9 16:37:01 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>


Reply via email to