Modified: incubator/samza/site/learn/documentation/latest/api/javadocs/stylesheet.css URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/api/javadocs/stylesheet.css?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/api/javadocs/stylesheet.css (original) +++ incubator/samza/site/learn/documentation/latest/api/javadocs/stylesheet.css Thu Nov 6 15:03:11 2014 @@ -2,16 +2,19 @@ /* Overall document style */ + +@import url('resources/fonts/dejavu.css'); + body { background-color:#ffffff; color:#353833; - font-family:Arial, Helvetica, sans-serif; - font-size:76%; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; margin:0; } a:link, a:visited { text-decoration:none; - color:#4c6b87; + color:#4A6782; } a:hover, a:focus { text-decoration:none; @@ -19,7 +22,7 @@ a:hover, a:focus { } a:active { text-decoration:none; - color:#4c6b87; + color:#4A6782; } a[name] { color:#353833; @@ -29,41 +32,51 @@ a[name]:hover { color:#353833; } pre { - font-size:1.3em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; } h1 { - font-size:1.8em; + font-size:20px; } h2 { - font-size:1.5em; + font-size:18px; } h3 { - font-size:1.4em; + font-size:16px; + font-style:italic; } h4 { - font-size:1.3em; + font-size:13px; } h5 { - font-size:1.2em; + font-size:12px; } h6 { - font-size:1.1em; + font-size:11px; } ul { list-style-type:disc; } code, tt { - font-size:1.2em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; } dt code { - font-size:1.2em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; } table tr td dt code { - font-size:1.2em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; vertical-align:top; + padding-top:4px; } sup { - font-size:.6em; + font-size:8px; } /* Document title and Copyright styles @@ -76,9 +89,9 @@ Document title and Copyright styles .aboutLanguage { float:right; padding:0px 21px; - font-size:.8em; + font-size:11px; z-index:200; - margin-top:-7px; + margin-top:-9px; } .legalCopy { margin-left:.5em; @@ -92,9 +105,6 @@ Document title and Copyright styles } .tab { background-color:#0066FF; - background-image:url(resources/titlebar.gif); - background-position:left top; - background-repeat:no-repeat; color:#ffffff; padding:8px; width:5em; @@ -104,17 +114,15 @@ Document title and Copyright styles Navigation bar styles */ .bar { - background-image:url(resources/background.gif); - background-repeat:repeat-x; + background-color:#4D7A97; color:#FFFFFF; padding:.8em .5em .4em .8em; height:auto;/*height:1.8em;*/ - font-size:1em; + font-size:11px; margin:0; } .topNav { - background-image:url(resources/background.gif); - background-repeat:repeat-x; + background-color:#4D7A97; color:#FFFFFF; float:left; padding:0; @@ -123,11 +131,11 @@ Navigation bar styles height:2.8em; padding-top:10px; overflow:hidden; + font-size:12px; } .bottomNav { margin-top:10px; - background-image:url(resources/background.gif); - background-repeat:repeat-x; + background-color:#4D7A97; color:#FFFFFF; float:left; padding:0; @@ -136,18 +144,20 @@ Navigation bar styles height:2.8em; padding-top:10px; overflow:hidden; + font-size:12px; } .subNav { background-color:#dee3e9; - border-bottom:1px solid #9eadc0; float:left; width:100%; overflow:hidden; + font-size:12px; } .subNav div { clear:left; float:left; padding:0 0 5px 6px; + text-transform:uppercase; } ul.navList, ul.subNavList { float:left; @@ -157,27 +167,33 @@ ul.navList, ul.subNavList { ul.navList li{ list-style:none; float:left; - padding:3px 6px; + padding: 5px 6px; + text-transform:uppercase; } ul.subNavList li{ list-style:none; float:left; - font-size:90%; } .topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { color:#FFFFFF; text-decoration:none; + text-transform:uppercase; } .topNav a:hover, .bottomNav a:hover { text-decoration:none; color:#bb7a2a; + text-transform:uppercase; } .navBarCell1Rev { - background-image:url(resources/tab.gif); - background-color:#a88834; - color:#FFFFFF; + background-color:#F8981D; + color:#253441; margin: auto 5px; - border:1px solid #c9aa44; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; } /* Page header and footer styles @@ -191,8 +207,11 @@ Page header and footer styles margin:10px; position:relative; } +.indexHeader span{ + margin-right:15px; +} .indexHeader h1 { - font-size:1.3em; + font-size:13px; } .title { color:#2c4557; @@ -202,7 +221,7 @@ Page header and footer styles margin:5px 0 0 0; } .header ul { - margin:0 0 25px 0; + margin:0 0 15px 0; padding:0; } .footer ul { @@ -210,24 +229,22 @@ Page header and footer styles } .header ul li, .footer ul li { list-style:none; - font-size:1.2em; + font-size:13px; } /* Heading styles */ div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { background-color:#dee3e9; - border-top:1px solid #9eadc0; - border-bottom:1px solid #9eadc0; + border:1px solid #d0d9e0; margin:0 0 6px -8px; - padding:2px 5px; + padding:7px 5px; } ul.blockList ul.blockList ul.blockList li.blockList h3 { background-color:#dee3e9; - border-top:1px solid #9eadc0; - border-bottom:1px solid #9eadc0; + border:1px solid #d0d9e0; margin:0 0 6px -8px; - padding:2px 5px; + padding:7px 5px; } ul.blockList ul.blockList li.blockList h3 { padding:0; @@ -247,10 +264,10 @@ Page layout container styles .indexContainer { margin:10px; position:relative; - font-size:1.0em; + font-size:12px; } .indexContainer h2 { - font-size:1.1em; + font-size:13px; padding:0 0 3px 0; } .indexContainer ul { @@ -259,15 +276,18 @@ Page layout container styles } .indexContainer ul li { list-style:none; + padding-top:2px; } .contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { - font-size:1.1em; + font-size:12px; font-weight:bold; margin:10px 0 0 0; color:#4E4E4E; } .contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { - margin:10px 0 10px 20px; + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; } .serializedFormContainer dl.nameValue dt { margin-left:1px; @@ -306,25 +326,24 @@ ul.blockList, ul.blockListLast { } ul.blockList li.blockList, ul.blockListLast li.blockList { list-style:none; - margin-bottom:25px; + margin-bottom:15px; + line-height:1.4; } ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { padding:0px 20px 5px 10px; - border:1px solid #9eadc0; - background-color:#f9f9f9; + border:1px solid #ededed; + background-color:#f8f8f8; } ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { padding:0 0 5px 8px; background-color:#ffffff; - border:1px solid #9eadc0; - border-top:none; + border:none; } ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { margin-left:0; padding-left:0; padding-bottom:15px; border:none; - border-bottom:1px solid #9eadc0; } ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { list-style:none; @@ -338,107 +357,155 @@ table tr td dl, table tr td dl dt, table /* Table styles */ -.contentContainer table, .classUseContainer table, .constantValuesContainer table { - border-bottom:1px solid #9eadc0; - width:100%; -} -.contentContainer ul li table, .classUseContainer ul li table, .constantValuesContainer ul li table { +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; } -.contentContainer .description table, .contentContainer .details table { - border-bottom:none; -} -.contentContainer ul li table th.colOne, .contentContainer ul li table th.colFirst, .contentContainer ul li table th.colLast, .classUseContainer ul li table th, .constantValuesContainer ul li table th, .contentContainer ul li table td.colOne, .contentContainer ul li table td.colFirst, .contentContainer ul li table td.colLast, .classUseContainer ul li table td, .constantValuesContainer ul li table td{ - vertical-align:top; - padding-right:20px; -} -.contentContainer ul li table th.colLast, .classUseContainer ul li table th.colLast,.constantValuesContainer ul li table th.colLast, -.contentContainer ul li table td.colLast, .classUseContainer ul li table td.colLast,.constantValuesContainer ul li table td.colLast, -.contentContainer ul li table th.colOne, .classUseContainer ul li table th.colOne, -.contentContainer ul li table td.colOne, .classUseContainer ul li table td.colOne { - padding-right:3px; +.overviewSummary, .memberSummary { + padding:0px; } -.overviewSummary caption, .packageSummary caption, .contentContainer ul.blockList li.blockList caption, .summary caption, .classUseContainer caption, .constantValuesContainer caption { +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { position:relative; text-align:left; background-repeat:no-repeat; - color:#FFFFFF; + color:#253441; font-weight:bold; clear:none; overflow:hidden; padding:0px; + padding-top:10px; + padding-left:1px; margin:0px; + white-space:pre; } -caption a:link, caption a:hover, caption a:active, caption a:visited { +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { color:#FFFFFF; } -.overviewSummary caption span, .packageSummary caption span, .contentContainer ul.blockList li.blockList caption span, .summary caption span, .classUseContainer caption span, .constantValuesContainer caption span { +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { white-space:nowrap; - padding-top:8px; - padding-left:8px; - display:block; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; float:left; - background-image:url(resources/titlebar.gif); - height:18px; + background-color:#F8981D; + border: none; + height:16px; } -.overviewSummary .tabEnd, .packageSummary .tabEnd, .contentContainer ul.blockList li.blockList .tabEnd, .summary .tabEnd, .classUseContainer .tabEnd, .constantValuesContainer .tabEnd { - width:10px; - background-image:url(resources/titlebar_end.gif); - background-repeat:no-repeat; - background-position:top right; - position:relative; +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; float:left; + background-color:#F8981D; + height:16px; } -ul.blockList ul.blockList li.blockList table { - margin:0 0 12px 0px; - width:100%; +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; } -.tableSubHeadingColor { - background-color: #EEEEFF; +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; } -.altColor { - background-color:#eeeeef; +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; } -.rowColor { - background-color:#ffffff; +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + } -.overviewSummary td, .packageSummary td, .contentContainer ul.blockList li.blockList td, .summary td, .classUseContainer td, .constantValuesContainer td { +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { text-align:left; - padding:3px 3px 3px 7px; + padding:0px 0px 12px 10px; + width:100%; } -th.colFirst, th.colLast, th.colOne, .constantValuesContainer th { +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { background:#dee3e9; - border-top:1px solid #9eadc0; - border-bottom:1px solid #9eadc0; text-align:left; - padding:3px 3px 3px 7px; -} -td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { - font-weight:bold; + padding:8px 3px 3px 7px; } td.colFirst, th.colFirst { - border-left:1px solid #9eadc0; white-space:nowrap; + font-size:13px; } td.colLast, th.colLast { - border-right:1px solid #9eadc0; + font-size:13px; } td.colOne, th.colOne { - border-right:1px solid #9eadc0; - border-left:1px solid #9eadc0; -} -table.overviewSummary { - padding:0px; - margin-left:0px; + font-size:13px; } -table.overviewSummary td.colFirst, table.overviewSummary th.colFirst, -table.overviewSummary td.colOne, table.overviewSummary th.colOne { +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ width:25%; - vertical-align:middle; + vertical-align:top; } -table.packageSummary td.colFirst, table.overviewSummary th.colFirst { - width:25%; - vertical-align:middle; +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; } /* Content styles @@ -453,6 +520,24 @@ Content styles .docSummary { padding:0; } + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} /* Formatting effect styles */ @@ -463,12 +548,27 @@ Formatting effect styles h1.hidden { visibility:hidden; overflow:hidden; - font-size:.9em; + font-size:10px; } .block { display:block; - margin:3px 0 0 0; + margin:3px 10px 2px 0px; + color:#474747; } -.strong { +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { font-weight:bold; } +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +}
Modified: incubator/samza/site/learn/documentation/latest/container/event-loop.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/event-loop.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/container/event-loop.html (original) +++ incubator/samza/site/learn/documentation/latest/container/event-loop.html Thu Nov 6 15:03:11 2014 @@ -162,21 +162,11 @@ <p>The container does this, in a loop, until it is shut down. Note that although there can be multiple task instances within a container (depending on the number of input stream partitions), their process() and window() methods are all called on the same thread, never concurrently on different threads.</p> -<h3 id="lifecycle-listeners">Lifecycle Listeners</h3> +<h3 id="lifecycle">Lifecycle</h3> -<p>Sometimes, you need to run your own code at specific points in a task’s lifecycle. For example, you might want to set up some context in the container whenever a new message arrives, or perform some operations on startup or shutdown.</p> +<p>The only way in which a developer can hook into a SamzaContainer’s lifecycle is through the standard InitableTask, ClosableTask, StreamTask, and WindowableTask. In cases where pluggable logic needs to be added to wrap a StreamTask, the StreamTask can be wrapped by another StreamTask implementation that handles the custom logic before calling into the wrapped StreamTask.</p> -<p>To receive notifications when such events happen, you can implement the <a href="../api/javadocs/org/apache/samza/task/TaskLifecycleListenerFactory.html">TaskLifecycleListenerFactory</a> interface. It returns a <a href="../api/javadocs/org/apache/samza/task/TaskLifecycleListener.html">TaskLifecycleListener</a>, whose methods are called by Samza at the appropriate times.</p> - -<p>You can then tell Samza to use your lifecycle listener with the following properties in your job configuration:</p> - -<div class="highlight"><pre><code class="jproperties"><span class="c"># Define a listener called "my-listener" by giving the factory class name</span> -<span class="na">task.lifecycle.listener.my-listener.class</span><span class="o">=</span><span class="s">com.example.foo.MyListenerFactory</span> - -<span class="c"># Enable it in this job (multiple listeners can be separated by commas)</span> -<span class="na">task.lifecycle.listeners</span><span class="o">=</span><span class="s">my-listener</span></code></pre></div> - -<p>The Samza container creates one instance of your <a href="../api/javadocs/org/apache/samza/task/TaskLifecycleListener.html">TaskLifecycleListener</a>. If the container has multiple task instances (processing different input stream partitions), the beforeInit, afterInit, beforeClose and afterClose methods are called for each task instance. The <a href="../api/javadocs/org/apache/samza/task/TaskContext.html">TaskContext</a> argument of those methods gives you more information about the partitions.</p> +<p>A concrete example is a set of StreamTasks that all want to share the same try/catch logic in their process() method. A StreamTask can be implemented that wraps the original StreamTasks, and surrounds the original process() call with the appropriate try/catch logic. For more details, see <a href="https://issues.apache.org/jira/browse/SAMZA-437">this discussion</a>.</p> <h2 id="jmx-»"><a href="jmx.html">JMX »</a></h2> Modified: incubator/samza/site/learn/documentation/latest/container/state-management.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/state-management.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/container/state-management.html (original) +++ incubator/samza/site/learn/documentation/latest/container/state-management.html Thu Nov 6 15:03:11 2014 @@ -255,16 +255,16 @@ <h3 id="key-value-storage">Key-value storage</h3> -<p>Any storage engine can be plugged into Samza, as described below. Out of the box, Samza ships with a key-value store implementation that is built on <a href="https://code.google.com/p/leveldb">LevelDB</a> using a <a href="https://github.com/fusesource/leveldbjni">JNI API</a>.</p> +<p>Any storage engine can be plugged into Samza, as described below. Out of the box, Samza ships with a key-value store implementation that is built on <a href="http://rocksdb.org">RocksDB</a> using a <a href="https://github.com/facebook/rocksdb/wiki/RocksJava-Basics">JNI API</a>.</p> -<p>LevelDB has several nice properties. Its memory allocation is outside of the Java heap, which makes it more memory-efficient and less prone to garbage collection pauses than a Java-based storage engine. It is very fast for small datasets that fit in memory; datasets larger than memory are slower but still possible. It is <a href="http://www.igvita.com/2012/02/06/sstable-and-log-structured-storage-leveldb/">log-structured</a>, allowing very fast writes. It also includes support for block compression, which helps to reduce I/O and memory usage.</p> +<p>RocksDB has several nice properties. Its memory allocation is outside of the Java heap, which makes it more memory-efficient and less prone to garbage collection pauses than a Java-based storage engine. It is very fast for small datasets that fit in memory; datasets larger than memory are slower but still possible. It is <a href="http://www.igvita.com/2012/02/06/sstable-and-log-structured-storage-leveldb/">log-structured</a>, allowing very fast writes. It also includes support for block compression, which helps to reduce I/O and memory usage.</p> -<p>Samza includes an additional in-memory caching layer in front of LevelDB, which avoids the cost of deserialization for frequently-accessed objects and batches writes. If the same key is updated multiple times in quick succession, the batching coalesces those updates into a single write. The writes are flushed to the changelog when a task <a href="checkpointing.html">commits</a>.</p> +<p>Samza includes an additional in-memory caching layer in front of RocksDB, which avoids the cost of deserialization for frequently-accessed objects and batches writes. If the same key is updated multiple times in quick succession, the batching coalesces those updates into a single write. The writes are flushed to the changelog when a task <a href="checkpointing.html">commits</a>.</p> <p>To use a key-value store in your job, add the following to your job config:</p> <div class="highlight"><pre><code class="jproperties"><span class="c"># Use the key-value store implementation for a store called "my-store"</span> -<span class="na">stores.my-store.factory</span><span class="o">=</span><span class="s">org.apache.samza.storage.kv.KeyValueStorageEngineFactory</span> +<span class="na">stores.my-store.factory</span><span class="o">=</span><span class="s">org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory</span> <span class="c"># Use the Kafka topic "my-store-changelog" as the changelog stream for this store.</span> <span class="c"># This enables automatic recovery of the store after a failure. If you don't</span> @@ -305,11 +305,21 @@ <span class="n">KeyValueIterator</span><span class="o"><</span><span class="n">K</span><span class="o">,</span><span class="n">V</span><span class="o">></span> <span class="n">all</span><span class="o">();</span> <span class="o">}</span></code></pre></div> -<p>Additional configuration properties for the key-value store are documented in the <a href="../jobs/configuration-table.html#keyvalue">configuration reference</a>.</p> +<p>Additional configuration properties for the key-value store are documented in the <a href="../jobs/configuration-table.html#keyvalue-rocksdb">configuration reference</a>.</p> + +<h4 id="known-issues">Known Issues</h4> + +<p>RocksDB has several rough edges. It’s recommended that you read the RocksDB <a href="https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide">tuning guide</a>. Some other notes to be aware of are:</p> + +<ol> +<li>RocksDB is heavily optimized to run with SSD hard disks. Performance on non-SSDs degrades significantly.</li> +<li>Samza’s KeyValueStorageEngine.putAll() method does not currently use RocksDB’s batching-put API because it’s <a href="https://github.com/facebook/rocksdb/issues/262">non-functional in Java</a>.</li> +<li>Calling iterator.seekToFirst() is very slow <a href="https://github.com/facebook/rocksdb/issues/261">if there are a lot of deletes in the store</a>.</li> +</ol> <h3 id="implementing-common-use-cases-with-the-key-value-store">Implementing common use cases with the key-value store</h3> -<p>Earlier in this section we discussed some example use cases for stateful stream processing. Let’s look at how each of these could be implemented using a key-value storage engine such as Samza’s LevelDB.</p> +<p>Earlier in this section we discussed some example use cases for stateful stream processing. Let’s look at how each of these could be implemented using a key-value storage engine such as Samza’s RocksDB store.</p> <h4 id="windowed-aggregation">Windowed aggregation</h4> Modified: incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html (original) +++ incubator/samza/site/learn/documentation/latest/jobs/configuration-table.html Thu Nov 6 15:03:11 2014 @@ -188,8 +188,8 @@ </tr> <tr> - <td class="property" id="job-systemstreampartition-grouper-factory">job.systemstreampartition.grouper.factory</td> - <td class="default">org.apache.samza.container.grouper.stream.GroupByPartitionFactory</td> + <td class="property" id="job-systemstreampartition-grouper-factory">job.systemstreampartition.<br>grouper.factory</td> + <td class="default">org.apache.samza.<br>container.grouper.stream.<br>GroupByPartitionFactory</td> <td class="description"> A factory class that is used to determine how input SystemStreamPartitions are grouped together for processing in individual StreamTask instances. The factory must implement the SystemStreamPartitionGrouperFactory interface. Once this configuration is set, it can't be changed, since doing so could violate state semantics, and lead to a loss of data. @@ -354,31 +354,6 @@ </tr> <tr> - <td class="property" id="task-lifecycle-listener-class">task.lifecycle.listener.<br><span class="listener">listener-name</span>.class</td> - <td class="default"></td> - <td class="description"> - Use this property to register a - <a href="../container/event-loop.html#lifecycle-listeners">lifecycle listener</a>, which can receive - a notification when a container starts up or shuts down, or when a message is processed. - The value is the fully-qualified name of a Java class that implements - <a href="../api/javadocs/org/apache/samza/task/TaskLifecycleListenerFactory.html">TaskLifecycleListenerFactory</a>. - You can define multiple lifecycle listeners, each with a different <span class="listener">listener-name</span>, - and reference them in <a href="#task-lifecycle-listeners" class="property">task.lifecycle.listeners</a>. - </td> - </tr> - - <tr> - <td class="property" id="task-lifecycle-listeners">task.lifecycle.listeners</td> - <td class="default"></td> - <td class="description"> - If you have defined <a href="../container/event-loop.html#lifecycle-listeners">lifecycle listeners</a> with - <a href="#task-lifecycle-listener-class" class="property">task.lifecycle.listener.*.class</a>, - you need to list them here in order to enable them. The value of this property is a - comma-separated list of <span class="listener">listener-name</span> tokens. - </td> - </tr> - - <tr> <td class="property" id="task-drop-deserialization-errors">task.drop.deserialization.errors</td> <td class="default"></td> <td class="description"> @@ -844,13 +819,18 @@ <a href="../api/javadocs/org/apache/samza/task/InitableTask.html#init(org.apache.samza.config.Config, org.apache.samza.task.TaskContext)">init()</a> method). The value of this property is the fully-qualified name of a Java class that implements <a href="../api/javadocs/org/apache/samza/storage/StorageEngineFactory.html">StorageEngineFactory</a>. - Samza currently ships with one storage engine implementation: + Samza currently ships with two storage engine implementations: <dl> - <dt><code>org.apache.samza.storage.kv.KeyValueStorageEngineFactory</code></dt> + <dt><code>org.apache.samza.storage.kv.LevelDbKeyValueStorageEngineFactory</code></dt> <dd>An on-disk storage engine with a key-value interface, implemented using <a href="https://code.google.com/p/leveldb/">LevelDB</a>. It supports fast random-access reads and writes, as well as range queries on keys. LevelDB can be configured with - various <a href="#keyvalue">additional tuning parameters</a>.</dd> + various <a href="#keyvalue-leveldb">additional tuning parameters</a>.</dd> + <dt><code>org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory</code></dt> + <dd>An on-disk storage engine with a key-value interface, implemented using + <a href="http://rocksdb.org/">RocksDB</a>. It supports fast random-access + reads and writes, as well as range queries on keys. RocksDB can be configured with + various <a href="#keyvalue-rocksdb">additional tuning parameters</a>.</dd> </dl> </td> </tr> @@ -900,18 +880,143 @@ </tr> <tr> - <th colspan="3" class="section" id="keyvalue"> + <th colspan="3" class="section" id="keyvalue-rocksdb"> + Using RocksDB for key-value storage<br> + <span class="subtitle"> + (This section applies if you have set + <a href="#stores-factory" class="property">stores.*.factory</a> + <code>= org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory</code>) + </span> + </th> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-write-batch-size">stores.<span class="store">store-name</span>.<br>write.batch.size</td> + <td class="default">500</td> + <td class="description"> + For better write performance, the storage engine buffers writes and applies them + to the underlying store in a batch. If the same key is written multiple times + in quick succession, this buffer also deduplicates writes to the same key. This + property is set to the number of key/value pairs that should be kept in this + in-memory buffer, per task instance. The number cannot be greater than + <a href="#stores-rocksdb-object-cache-size" class="property">stores.*.object.cache.size</a>. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-object-cache-size">stores.<span class="store">store-name</span>.<br>object.cache.size</td> + <td class="default">1000</td> + <td class="description"> + Samza maintains an additional cache in front of RocksDB for frequently-accessed + objects. This cache contains deserialized objects (avoiding the deserialization + overhead on cache hits), in contrast to the RocksDB block cache + (<a href="#stores-rocksdb-container-cache-size-bytes" class="property">stores.*.container.cache.size.bytes</a>), + which caches serialized objects. This property determines the number of objects + to keep in Samza's cache, per task instance. This same cache is also used for + write buffering (see <a href="#stores-rocksdb-write-batch-size" class="property">stores.*.write.batch.size</a>). + A value of 0 disables all caching and batching. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-container-cache-size-bytes">stores.<span class="store">store-name</span>.container.<br>cache.size.bytes</td> + <td class="default">104857600</td> + <td class="description"> + The size of RocksDB's block cache in bytes, per container. If there are several + task instances within one container, each is given a proportional share of this cache. + Note that this is an off-heap memory allocation, so the container's total memory use + is the maximum JVM heap size <em>plus</em> the size of this cache. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-container-write-buffer-size-bytes">stores.<span class="store">store-name</span>.container.<br>write.buffer.size.bytes</td> + <td class="default">33554432</td> + <td class="description"> + The amount of memory (in bytes) that RocksDB uses for buffering writes before they are + written to disk, per container. If there are several task instances within one + container, each is given a proportional share of this buffer. This setting also + determines the size of RocksDB's segment files. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-compression">stores.<span class="store">store-name</span>.<br>rocksdb.compression</td> + <td class="default">snappy</td> + <td class="description"> + This property controls whether RocksDB should compress data on disk and in the + block cache. The following values are valid: + <dl> + <dt><code>snappy</code></dt> + <dd>Compress data using the <a href="https://code.google.com/p/snappy/">Snappy</a> codec.</dd> + <dt><code>bzip2</code></dt> + <dd>Compress data using the <a href="http://en.wikipedia.org/wiki/Bzip2">bzip2</a> codec.</dd> + <dt><code>zlib</code></dt> + <dd>Compress data using the <a href="http://en.wikipedia.org/wiki/Zlib">zlib</a> codec.</dd> + <dt><code>lz4</code></dt> + <dd>Compress data using the <a href="https://code.google.com/p/lz4/">lz4</a> codec.</dd> + <dt><code>lz4hc</code></dt> + <dd>Compress data using the <a href="https://code.google.com/p/lz4/">lz4hc</a> (high compression) codec.</dd> + <dt><code>none</code></dt> + <dd>Do not compress data.</dd> + </dl> + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-block-size-bytes">stores.<span class="store">store-name</span>.<br>rocksdb.block.size.bytes</td> + <td class="default">4096</td> + <td class="description"> + If compression is enabled, RocksDB groups approximately this many uncompressed bytes + into one compressed block. You probably don't need to change this property. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-bloomfilter-bits">stores.<span class="store">store-name</span>.<br>rocksdb.bloomfilter.bits</td> + <td class="default">10</td> + <td class="description"> + In RocksDB, every SST file <a href="https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter">contains a Bloom filter</a>, which is used to determine if the file may contain a given key. Setting the bloom filter bit size allows developers to make the trade-off between the accuracy of the bloom filter, and its memory usage. + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-compaction-style">stores.<span class="store">store-name</span>.<br>rocksdb.compaction.style</td> + <td class="default">universal</td> + <td class="description"> + This property controls the compaction style that RocksDB will employ when compacting its levels. The following values are valid: + <dl> + <dt><code>universal</code></dt> + <dd>Use <a href="https://github.com/facebook/rocksdb/wiki/Universal-Compaction">universal</a> compaction.</dd> + <dt><code>fifo</code></dt> + <dd>Use <a href="https://github.com/facebook/rocksdb/wiki/FIFO-compaction-style">FIFO</a> compaction.</dd> + <dt><code>level</code></dt> + <dd>Use LevelDB's standard leveled compaction.</dd> + </dl> + </td> + </tr> + + <tr> + <td class="property" id="stores-rocksdb-num-write-buffers">stores.<span class="store">store-name</span>.<br>rocksdb.num.write.buffers</td> + <td class="default">3</td> + <td class="description"> + Configures the <a href="https://github.com/facebook/rocksdb/wiki/Basic-Operations#write-buffer">number of write buffers</a> that a RocksDB store uses. This allows RocksDB to continue taking writes to other buffers even while a given write buffer is being flushed to disk. + </td> + </tr> + + <tr> + <th colspan="3" class="section" id="keyvalue-leveldb"> Using LevelDB for key-value storage<br> <span class="subtitle"> (This section applies if you have set <a href="#stores-factory" class="property">stores.*.factory</a> - <code>= org.apache.samza.storage.kv.KeyValueStorageEngineFactory</code>) + <code>= org.apache.samza.storage.kv.LevelDbKeyValueStorageEngineFactory</code>) </span> </th> </tr> <tr> - <td class="property" id="stores-write-batch-size">stores.<span class="store">store-name</span>.<br>write.batch.size</td> + <td class="property" id="stores-leveldb-write-batch-size">stores.<span class="store">store-name</span>.<br>write.batch.size</td> <td class="default">500</td> <td class="description"> For better write performance, the storage engine buffers writes and applies them @@ -919,27 +1024,27 @@ in quick succession, this buffer also deduplicates writes to the same key. This property is set to the number of key/value pairs that should be kept in this in-memory buffer, per task instance. The number cannot be greater than - <a href="#stores-object-cache-size" class="property">stores.*.object.cache.size</a>. + <a href="#stores-leveldb-object-cache-size" class="property">stores.*.object.cache.size</a>. </td> </tr> <tr> - <td class="property" id="stores-object-cache-size">stores.<span class="store">store-name</span>.<br>object.cache.size</td> + <td class="property" id="stores-leveldb-object-cache-size">stores.<span class="store">store-name</span>.<br>object.cache.size</td> <td class="default">1000</td> <td class="description"> Samza maintains an additional cache in front of LevelDB for frequently-accessed objects. This cache contains deserialized objects (avoiding the deserialization overhead on cache hits), in contrast to the LevelDB block cache - (<a href="#stores-container-cache-size-bytes" class="property">stores.*.container.cache.size.bytes</a>), + (<a href="#stores-leveldb-container-cache-size-bytes" class="property">stores.*.container.cache.size.bytes</a>), which caches serialized objects. This property determines the number of objects to keep in Samza's cache, per task instance. This same cache is also used for - write buffering (see <a href="#stores-write-batch-size" class="property">stores.*.write.batch.size</a>). + write buffering (see <a href="#stores-leveldb-write-batch-size" class="property">stores.*.write.batch.size</a>). A value of 0 disables all caching and batching. </td> </tr> <tr> - <td class="property" id="stores-container-cache-size-bytes">stores.<span class="store">store-name</span>.container.<br>cache.size.bytes</td> + <td class="property" id="stores-leveldb-container-cache-size-bytes">stores.<span class="store">store-name</span>.container.<br>cache.size.bytes</td> <td class="default">104857600</td> <td class="description"> The size of LevelDB's block cache in bytes, per container. If there are several @@ -950,7 +1055,7 @@ </tr> <tr> - <td class="property" id="stores-container-write-buffer-size-bytes">stores.<span class="store">store-name</span>.container.<br>write.buffer.size.bytes</td> + <td class="property" id="stores-leveldb-container-write-buffer-size-bytes">stores.<span class="store">store-name</span>.container.<br>write.buffer.size.bytes</td> <td class="default">33554432</td> <td class="description"> The amount of memory (in bytes) that LevelDB uses for buffering writes before they are @@ -961,7 +1066,7 @@ </tr> <tr> - <td class="property" id="stores-compaction-delete-threshold">stores.<span class="store">store-name</span>.<br>compaction.delete.threshold</td> + <td class="property" id="stores-leveldb-compaction-delete-threshold">stores.<span class="store">store-name</span>.<br>compaction.delete.threshold</td> <td class="default">-1</td> <td class="description"> Setting this property forces a LevelDB compaction to be performed after a certain @@ -1041,7 +1146,7 @@ memory use remains below the limit. The amount of memory used is normally the JVM heap size (configured with <a href="#task-opts" class="property">task.opts</a>), plus the size of any off-heap memory allocation (for example - <a href="#stores-container-cache-size-bytes" class="property">stores.*.container.cache.size.bytes</a>), + <a href="#stores-rocksdb-container-cache-size-bytes" class="property">stores.*.container.cache.size.bytes</a>), plus a safety margin to allow for JVM overheads. </td> </tr> Modified: incubator/samza/site/learn/documentation/latest/jobs/configuration.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/configuration.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/jobs/configuration.html (original) +++ incubator/samza/site/learn/documentation/latest/jobs/configuration.html Thu Nov 6 15:03:11 2014 @@ -143,7 +143,7 @@ <p>All Samza jobs have a configuration file that defines the job. A very basic configuration file looks like this:</p> <div class="highlight"><pre><code class="jproperties"><span class="c"># Job</span> -<span class="na">job.factory.class</span><span class="o">=</span><span class="s">samza.job.local.ThreadJobFactory</span> +<span class="na">job.factory.class</span><span class="o">=</span><span class="s">org.apache.samza.job.local.ThreadJobFactory</span> <span class="na">job.name</span><span class="o">=</span><span class="s">hello-world</span> <span class="c"># Task</span> @@ -162,7 +162,7 @@ <p>There are four major sections to a configuration file:</p> <ol> -<li>The job section defines things like the name of the job, and whether to use the YarnJobFactory or ProcessJobFactory/ThreadJobFactory.</li> +<li>The job section defines things like the name of the job, and whether to use the YarnJobFactory or ProcessJobFactory/ThreadJobFactory (See the job.factory.class property in <a href="configuration-table.html">Configuration Table</a>).</li> <li>The task section is where you specify the class name for your <a href="../api/overview.html">StreamTask</a>. It’s also where you define what the <a href="../container/streams.html">input streams</a> are for your task.</li> <li>The serializers section defines the classes of the <a href="../container/serialization.html">serdes</a> used for serialization and deserialization of specific objects that are received and sent along different streams.</li> <li>The system section defines systems that your StreamTask can read from along with the types of serdes used for sending keys and messages from that system. Usually, you’ll define a Kafka system, if you’re reading from Kafka, although you can also specify your own self-implemented Samza-compatible systems. See the <a href="/startup/hello-samza/latest">hello-samza example project</a>’s Wikipedia system for a good example of a self-implemented system.</li> Modified: incubator/samza/site/learn/documentation/latest/jobs/logging.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/jobs/logging.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/documentation/latest/jobs/logging.html (original) +++ incubator/samza/site/learn/documentation/latest/jobs/logging.html Thu Nov 6 15:03:11 2014 @@ -163,7 +163,15 @@ <p>The <a href="packaging.html">run-class.sh</a> script will also set the following Java system properties:</p> -<div class="highlight"><pre><code class="bash">-Dsamza.log.dir<span class="o">=</span><span class="nv">$SAMZA_LOG_DIR</span> -Dsamza.container.name<span class="o">=</span><span class="nv">$SAMZA_CONTAINER_NAME</span></code></pre></div> +<div class="highlight"><pre><code class="bash">-Dsamza.log.dir<span class="o">=</span><span class="nv">$SAMZA_LOG_DIR</span></code></pre></div> + +<p>The <a href="packaging.html">run-container.sh</a> will also set:</p> + +<div class="highlight"><pre><code class="bash">-Dsamza.container.id<span class="o">=</span><span class="nv">$SAMZA_CONTAINER_ID</span> -Dsamza.container.name<span class="o">=</span>samza-container-<span class="nv">$SAMZA_CONTAINER_ID</span><span class="err">"</span></code></pre></div> + +<p>Likewise, <a href="packaging.html">run-am.sh</a> sets:</p> + +<div class="highlight"><pre><code class="bash">-Dsamza.container.name<span class="o">=</span>samza-application-master</code></pre></div> <p>These settings are very useful if you’re using a file-based appender. For example, you can use a daily rolling appender by configuring log4j.xml like this:</p> Added: incubator/samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html?rev=1637130&view=auto ============================================================================== --- incubator/samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html (added) +++ incubator/samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html Thu Nov 6 15:03:11 2014 @@ -0,0 +1,218 @@ +<!DOCTYPE html> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Samza - Deploy Samza Job To CDH</title> + <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/> + <link href="/css/bootstrap.min.css" rel="stylesheet"/> + <link href="/css/font-awesome.min.css" rel="stylesheet"/> + <link href="/css/main.css" rel="stylesheet"/> + <link href="/css/syntax.css" rel="stylesheet"/> + <link rel="icon" type="image/png" href="/img/samza-icon.png"> + <script src="/js/jquery-1.11.1.min.js"></script> + </head> + <body> + <div class="wrapper"> + <div class="wrapper-content"> + + <div class="masthead"> + <div class="container"> + <div class="masthead-logo"> + <a href="/" class="logo">samza</a> + </div> + <div class="masthead-icons"> + <div class="pull-right"> + <a href="/startup/download"><i class="fa fa-arrow-circle-o-down masthead-icon"></i></a> + <a href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree" target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: bold;"></i></a> + <a href="https://twitter.com/samzastream" target="_blank"><i class="fa fa-twitter masthead-icon"></i></a> + <!-- this icon only shows in versioned pages --> + + + + + <a href="http://samza.incubator.apache.org/learn/tutorials/0.7.0/deploy-samza-to-CDH.html"><i id="switch-version-button"></i></a> + <!-- links for the navigation bar --> + + + </div> + </div> + </div><!-- /.container --> + </div> + + <div class="container"> + <div class="menu"> + <h1><i class="fa fa-rocket"></i> Getting Started</h1> + <ul> + <li><a href="/startup/hello-samza/latest">Hello Samza</a></li> + <li><a href="/startup/download">Download</a></li> + </ul> + + <h1><i class="fa fa-book"></i> Learn</h1> + <ul> + <li><a href="/learn/documentation/latest">Documentation</a></li> + <li><a href="/learn/documentation/latest/jobs/configuration-table.html">Configuration</a></li> + <li><a href="/learn/documentation/latest/api/javadocs/">Javadocs</a></li> + <li><a href="/learn/tutorials/latest">Tutorials</a></li> + <li><a href="http://wiki.apache.org/samza/FAQ">FAQ</a></li> + <li><a href="http://wiki.apache.org/samza">Wiki</a></li> + <li><a href="http://wiki.apache.org/samza/PapersAndTalks">Papers & Talks</a></li> + <li><a href="http://blogs.apache.org/samza">Blog</a></li> + </ul> + + <h1><i class="fa fa-comments"></i> Community</h1> + <ul> + <li><a href="/community/mailing-lists.html">Mailing Lists</a></li> + <li><a href="/community/irc.html">IRC</a></li> + <li><a href="https://issues.apache.org/jira/browse/SAMZA">Bugs</a></li> + <li><a href="http://wiki.apache.org/samza/PoweredBy">Powered by</a></li> + <li><a href="http://wiki.apache.org/samza/Ecosystem">Ecosystem</a></li> + <li><a href="/community/committers.html">Committers</a></li> + </ul> + + <h1><i class="fa fa-code"></i> Contribute</h1> + <ul> + <li><a href="/contribute/rules.html">Rules</a></li> + <li><a href="/contribute/coding-guide.html">Coding Guide</a></li> + <li><a href="/contribute/projects.html">Projects</a></li> + <li><a href="/contribute/design-documents.html">Design Documents</a></li> + <li><a href="/contribute/code.html">Code</a></li> + <li><a href="https://reviews.apache.org/groups/samza">Review Board</a></li> + <li><a href="/contribute/tests.html">Tests</a></li> + <li><a href="/contribute/disclaimer.html">Disclaimer</a></li> + </ul> + + <h1><i class="fa fa-history"></i> Archive</h1> + <ul> + <li><a href="/archive/index.html">0.7.0</a></li> + </ul> + </div> + + <div class="content"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<h2>Deploy Samza Job To CDH</h2> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<p>The tutorial assumes you have successfully run <a href="../../../startup/hello-samza/latest/">hello-samza</a> and now you want to deploy the job to your Cloudera Data Hub (<a href="http://www.cloudera.com/content/cloudera/en/products-and-services/cdh.html">CDH</a>). This tutorial is based on CDH 5.0.0 and uses hello-samza as the example job.</p> + +<h3 id="upload-package-to-cluster">Upload Package to Cluster</h3> + +<p>There are a few ways of uploading the package to the cluster’s HDFS. If you do not have the job package in your cluster, <strong>scp</strong> from you local machine to the cluster. Then run</p> + +<div class="highlight"><pre><code class="bash">hadoop fs -put path/to/hello-samza-0.8.0-dist.tar.gz /path/for/tgz</code></pre></div> + +<h3 id="get-deloying-scripts">Get Deloying Scripts</h3> + +<p>Untar the job package (assume you will run from the current directory)</p> + +<div class="highlight"><pre><code class="bash">tar -xvf path/to/samza-job-package-0.8.0-dist.tar.gz -C ./</code></pre></div> + +<h3 id="add-package-path-to-properties-file">Add Package Path to Properties File</h3> + +<div class="highlight"><pre><code class="bash">vim config/wikipedia-parser.properties</code></pre></div> + +<p>Change the yarn package path:</p> + +<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">hdfs://<hdfs name node ip>:<hdfs name node port>/path/to/tgz</span></code></pre></div> + +<h3 id="set-yarn-environment-variable">Set Yarn Environment Variable</h3> + +<div class="highlight"><pre><code class="bash"><span class="nb">export </span><span class="nv">HADOOP_CONF_DIR</span><span class="o">=</span>/etc/hadoop/conf</code></pre></div> + +<h3 id="run-samza-job">Run Samza Job</h3> + +<div class="highlight"><pre><code class="bash">bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/config/wikipedia-parser.properties</code></pre></div> + + + </div> + </div> + + </div><!-- /.wrapper-content --> + </div><!-- /.wrapper --> + + <div class="footer"> + <div class="container"> + <!-- nothing for now. --> + </div> + </div> + + + <script> + $( document ).ready(function() { + if ( $.fn.urlExists( "/learn/tutorials/0.7.0/deploy-samza-to-CDH.html" ) ) { + $("#switch-version-button").addClass("fa fa-history masthead-icon"); + } + }); + + /* a function to test whether the url exists or not */ + (function( $ ) { + $.fn.urlExists = function(url) { + var http = new XMLHttpRequest(); + http.open('HEAD', url, false); + http.send(); + return http.status != 404; + }; + }( jQuery )); + </script> + + + <!-- Google Analytics --> + <script> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-43122768-1', 'apache.org'); + ga('send', 'pageview'); + + </script> + </body> +</html> Modified: incubator/samza/site/learn/tutorials/latest/index.html URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/latest/index.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/learn/tutorials/latest/index.html (original) +++ incubator/samza/site/learn/tutorials/latest/index.html Thu Nov 6 15:03:11 2014 @@ -144,6 +144,8 @@ <p><a href="deploy-samza-job-from-hdfs.html">Deploying a Samza Job from HDFS</a></p> +<p><a href="deploy-samza-to-CDH.html">Deoloy Samza to CDH</a></p> + <p><a href="run-in-multi-node-yarn.html">Run Hello-samza in Multi-node YARN</a></p> <p><a href="run-hello-samza-without-internet.html">Run Hello-samza without Internet</a></p> Modified: incubator/samza/site/sitemap.xml URL: http://svn.apache.org/viewvc/incubator/samza/site/sitemap.xml?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/sitemap.xml (original) +++ incubator/samza/site/sitemap.xml Thu Nov 6 15:03:11 2014 @@ -20,7 +20,7 @@ <url> <loc>http://samza.incubator.apache.org/</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> <changefreq>daily</changefreq> <priority>1.0</priority> </url> @@ -30,329 +30,336 @@ <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/yarn/application-master.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/architecture.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/background.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/checkpointing.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/code.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/coding-guide.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/community/committers.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/concepts.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/configuration.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/deploy-samza-job-from-hdfs.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/contribute/design-documents.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/deploy-samza-to-CDH.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/contribute/disclaimer.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/contribute/design-documents.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/event-loop.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/contribute/disclaimer.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/archive/index.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/event-loop.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/index.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/index.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/archive/index.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/index.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> - <loc>http://samza.incubator.apache.org/startup/download/index.html</loc> - <lastmod>2014-09-24</lastmod> + <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/index.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/startup/hello-samza/versioned/index.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> + + + </url> + + <url> + <loc>http://samza.incubator.apache.org/startup/download/index.html</loc> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/introduction.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/community/irc.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/yarn/isolation.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/jmx.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/job-runner.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/operations/kafka.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/logging.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/community/mailing-lists.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/metrics.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/mupd8.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/api/overview.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/packaging.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/projects.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/remote-debugging-samza.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/reprocessing.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/rules.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/run-hello-samza-without-internet.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/tutorials/versioned/run-in-multi-node-yarn.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/samza-container.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/operations/security.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/serialization.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/spark-streaming.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/state-management.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/storm.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/streams.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/tests.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/windowing.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/yarn-jobs.html</loc> - <lastmod>2014-09-24</lastmod> + <lastmod>2014-11-06</lastmod> </url> Modified: incubator/samza/site/startup/hello-samza/latest/index.html URL: http://svn.apache.org/viewvc/incubator/samza/site/startup/hello-samza/latest/index.html?rev=1637130&r1=1637129&r2=1637130&view=diff ============================================================================== --- incubator/samza/site/startup/hello-samza/latest/index.html (original) +++ incubator/samza/site/startup/hello-samza/latest/index.html Thu Nov 6 15:03:11 2014 @@ -147,7 +147,8 @@ <p>Check out the hello-samza project:</p> <div class="highlight"><pre><code class="bash">git clone git://git.apache.org/incubator-samza-hello-samza.git hello-samza -<span class="nb">cd </span>hello-samza</code></pre></div> +<span class="nb">cd </span>hello-samza +git checkout latest</code></pre></div> <p>This project contains everything you’ll need to run your first Samza jobs.</p>
