[39/51] [partial] storm-site git commit: Update latest 1.x-branch version to 1.2.2

kabhwan Sat, 09 Jun 2018 07:09:02 -0700

http://git-wip-us.apache.org/repos/asf/storm-site/blob/6e122a12/content/releases/1.2.1/Resource_Aware_Scheduler_overview.html
----------------------------------------------------------------------
diff --git a/content/releases/1.2.1/Resource_Aware_Scheduler_overview.html 
b/content/releases/1.2.1/Resource_Aware_Scheduler_overview.html
deleted file mode 100644
index 561fb47..0000000
--- a/content/releases/1.2.1/Resource_Aware_Scheduler_overview.html
+++ /dev/null
@@ -1,695 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <title>Resource Aware Scheduler</title>
-
-    <!-- Bootstrap core CSS -->
-    <link href="/assets/css/bootstrap.min.css" rel="stylesheet">
-    <!-- Bootstrap theme -->
-    <link href="/assets/css/bootstrap-theme.min.css" rel="stylesheet">
-
-    <!-- Custom styles for this template -->
-    <link rel="stylesheet" 
href="http://fortawesome.github.io/Font-Awesome/assets/font-awesome/css/font-awesome.css";>
-    <link href="/css/style.css" rel="stylesheet">
-    <link href="/assets/css/owl.theme.css" rel="stylesheet">
-    <link href="/assets/css/owl.carousel.css" rel="stylesheet">
-    <script type="text/javascript" src="/assets/js/jquery.min.js"></script>
-    <script type="text/javascript" src="/assets/js/bootstrap.min.js"></script>
-    <script type="text/javascript" 
src="/assets/js/owl.carousel.min.js"></script>
-    <script type="text/javascript" src="/assets/js/storm.js"></script>
-    <!-- Just for debugging purposes. Don't actually copy these 2 lines! -->
-    <!--[if lt IE 9]><script 
src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
-    
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-
-
-  <body>
-    <header>
-  <div class="container-fluid">
-     <div class="row">
-          <div class="col-md-5">
-            <a href="/index.html"><img src="/images/logo.png" class="logo" 
/></a>
-          </div>
-          <div class="col-md-5">
-            
-              <h1>Version: 1.2.1</h1>
-            
-          </div>
-          <div class="col-md-2">
-            <a href="/downloads.html" class="btn-std btn-block 
btn-download">Download</a>
-          </div>
-        </div>
-    </div>
-</header>
-<!--Header End-->
-<!--Navigation Begin-->
-<div class="navbar" role="banner">
-  <div class="container-fluid">
-      <div class="navbar-header">
-          <button class="navbar-toggle" type="button" data-toggle="collapse" 
data-target=".bs-navbar-collapse">
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-            </button>
-        </div>
-        <nav class="collapse navbar-collapse bs-navbar-collapse" 
role="navigation">
-          <ul class="nav navbar-nav">
-              <li><a href="/index.html" id="home">Home</a></li>
-                <li><a href="/getting-help.html" id="getting-help">Getting 
Help</a></li>
-                <li><a href="/about/integrates.html" id="project-info">Project 
Information</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="documentation">Documentation <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                      
-                        
-                          <li><a 
href="/releases/2.0.0-SNAPSHOT/index.html">2.0.0-SNAPSHOT</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.2.1/index.html">1.2.1</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.2/index.html">1.1.2</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.6/index.html">1.0.6</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                    </ul>
-                </li>
-                <li><a href="/talksAndVideos.html">Talks and 
Slideshows</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="contribute">Community <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                        <li><a 
href="/contribute/Contributing-to-Storm.html">Contributing</a></li>
-                        <li><a href="/contribute/People.html">People</a></li>
-                        <li><a href="/contribute/BYLAWS.html">ByLaws</a></li>
-                    </ul>
-                </li>
-                <li><a href="/2018/06/04/storm122-released.html" 
id="news">News</a></li>
-            </ul>
-        </nav>
-    </div>
-</div>
-
-
-
-    <div class="container-fluid">
-    <h1 class="page-title">Resource Aware Scheduler</h1>
-          <div class="row">
-               <div class="col-md-12">
-                    <!-- Documentation -->
-
-<p class="post-meta"></p>
-
-<div class="documentation-content"><h1 id="introduction">Introduction</h1>
-
-<p>The purpose of this document is to provide a description of the Resource 
Aware Scheduler for the Storm distributed real-time computation system.  This 
document will provide you with both a high level description of the resource 
aware scheduler in Storm.  Some of the benefits are using a resource aware 
scheduler on top of Storm is outlined in the following presentation at Hadoop 
Summit 2016:</p>
-
-<p><a 
href="http://www.slideshare.net/HadoopSummit/resource-aware-scheduling-in-apache-storm";>http://www.slideshare.net/HadoopSummit/resource-aware-scheduling-in-apache-storm</a></p>
-
-<h1 id="table-of-contents">Table of Contents</h1>
-
-<ol>
-<li><a href="#Using-Resource-Aware-Scheduler">Using Resource Aware 
Scheduler</a></li>
-<li><a href="#API-Overview">API Overview</a>
-
-<ol>
-<li><a href="#Setting-Memory-Requirement">Setting Memory Requirement</a></li>
-<li><a href="#Setting-CPU-Requirement">Setting CPU Requirement</a></li>
-<li><a href="#Limiting-the-Heap-Size-per-Worker-(JVM)Process">Limiting the 
Heap Size per Worker (JVM) Process</a></li>
-<li><a href="#Setting-Available-Resources-on-Node">Setting Available Resources 
on Node</a></li>
-<li><a href="#Other-Configurations">Other Configurations</a></li>
-</ol></li>
-<li><a href="#Topology-Priorities-and-Per-User-Resource">Topology Priorities 
and Per User Resource</a>
-
-<ol>
-<li><a href="#Setup">Setup</a></li>
-<li><a href="#Specifying-Topology-Priority">Specifying Topology 
Priority</a></li>
-<li><a href="#Specifying-Scheduling-Strategy">Specifying Scheduling 
Strategy</a></li>
-<li><a href="#Specifying-Topology-Prioritization-Strategy">Specifying Topology 
Prioritization Strategy</a></li>
-<li><a href="#Specifying-Eviction-Strategy">Specifying Eviction 
Strategy</a></li>
-</ol></li>
-<li><a href="#Profiling-Resource-Usage">Profiling Resource Usage</a></li>
-<li><a 
href="#Enhancements-on-original-DefaultResourceAwareStrategy">Enhancements on 
original DefaultResourceAwareStrategy</a></li>
-</ol>
-
-<p><div id='Using-Resource-Aware-Scheduler'/></p>
-
-<h2 id="using-resource-aware-scheduler">Using Resource Aware Scheduler</h2>
-
-<p>The user can switch to using the Resource Aware Scheduler by setting the 
following in <em>conf/storm.yaml</em></p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">storm.scheduler: 
âorg.apache.storm.scheduler.resource.ResourceAwareSchedulerâ
-</code></pre></div>
-<p><div id='API-Overview'/></p>
-
-<h2 id="api-overview">API Overview</h2>
-
-<p>For use with Trident, please see the <a href="Trident-RAS-API.html">Trident 
RAS API</a></p>
-
-<p>For a Storm Topology, the user can now specify the amount of resources a 
topology component (i.e. Spout or Bolt) is required to run a single instance of 
the component.  The user can specify the resource requirement for a topology 
component by using the following API calls.</p>
-
-<p><div id='Setting-Memory-Requirement'/></p>
-
-<h3 id="setting-memory-requirement">Setting Memory Requirement</h3>
-
-<p>API to set component memory requirement:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">public T 
setMemoryLoad(Number onHeap, Number offHeap)
-</code></pre></div>
-<p>Parameters:
-* Number onHeap â The amount of on heap memory an instance of this component 
will consume in megabytes
-* Number offHeap â The amount of off heap memory an instance of this 
component will consume in megabytes</p>
-
-<p>The user also has to option to just specify the on heap memory requirement 
if the component does not have an off heap memory need.</p>
-<div class="highlight"><pre><code class="language-" data-lang="">public T 
setMemoryLoad(Number onHeap)
-</code></pre></div>
-<p>Parameters:
-* Number onHeap â The amount of on heap memory an instance of this component 
will consume</p>
-
-<p>If no value is provided for offHeap, 0.0 will be used. If no value is 
provided for onHeap, or if the API is never called for a component, the default 
value will be used.</p>
-
-<p>Example of Usage:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">SpoutDeclarer 
s1 = builder.setSpout("word", new TestWordSpout(), 10);
-s1.setMemoryLoad(1024.0, 512.0);
-builder.setBolt("exclaim1", new ExclamationBolt(), 3)
-            .shuffleGrouping("word").setMemoryLoad(512.0);
-</code></pre></div>
-<p>The entire memory requested for this topology is 16.5 GB. That is from 10 
spouts with 1GB on heap memory and 0.5 GB off heap memory each and 3 bolts with 
0.5 GB on heap memory each.</p>
-
-<p><div id='Setting-CPU-Requirement'/></p>
-
-<h3 id="setting-cpu-requirement">Setting CPU Requirement</h3>
-
-<p>API to set component CPU requirement:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">public T 
setCPULoad(Double amount)
-</code></pre></div>
-<p>Parameters:
-* Number amount â The amount of on CPU an instance of this component will 
consume.</p>
-
-<p>Currently, the amount of CPU resources a component requires or is available 
on a node is represented by a point system. CPU usage is a difficult concept to 
define. Different CPU architectures perform differently depending on the task 
at hand. They are so complex that expressing all of that in a single precise 
portable number is impossible. Instead we take a convention over configuration 
approach and are primarily concerned with rough level of CPU usage while still 
providing the possibility to specify amounts more fine grained.</p>
-
-<p>By convention a CPU core typically will get 100 points. If you feel that 
your processors are more or less powerful you can adjust this accordingly. 
Heavy tasks that are CPU bound will get 100 points, as they can consume an 
entire core. Medium tasks should get 50, light tasks 25, and tiny tasks 10. In 
some cases you have a task that spawns other threads to help with processing. 
These tasks may need to go above 100 points to express the amount of CPU they 
are using. If these conventions are followed the common case for a single 
threaded task the reported Capacity * 100 should be the number of CPU points 
that the task needs.</p>
-
-<p>Example of Usage:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">SpoutDeclarer 
s1 = builder.setSpout("word", new TestWordSpout(), 10);
-s1.setCPULoad(15.0);
-builder.setBolt("exclaim1", new ExclamationBolt(), 3)
-            .shuffleGrouping("word").setCPULoad(10.0);
-builder.setBolt("exclaim2", new HeavyBolt(), 1)
-                .shuffleGrouping("exclaim1").setCPULoad(450.0);
-</code></pre></div>
-<p><div id='Limiting-the-Heap-Size-per-Worker-(JVM)Process'/></p>
-
-<h3 id="limiting-the-heap-size-per-worker-jvm-process">Limiting the Heap Size 
per Worker (JVM) Process</h3>
-<div class="highlight"><pre><code class="language-" data-lang="">public void 
setTopologyWorkerMaxHeapSize(Number size)
-</code></pre></div>
-<p>Parameters:
-* Number size â The memory limit a worker process will be allocated in 
megabytes</p>
-
-<p>The user can limit the amount of memory resources the resource aware 
scheduler allocates to a single worker on a per topology basis by using the 
above API.  This API is in place so that the users can spread executors to 
multiple workers.  However, spreading executors to multiple workers may 
increase the communication latency since executors will not be able to use 
Disruptor Queue for intra-process communication.</p>
-
-<p>Example of Usage:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">Config conf = 
new Config();
-conf.setTopologyWorkerMaxHeapSize(512.0);
-</code></pre></div>
-<p><div id='Setting-Available-Resources-on-Node'/></p>
-
-<h3 id="setting-available-resources-on-node">Setting Available Resources on 
Node</h3>
-
-<p>A storm administrator can specify node resource availability by modifying 
the <em>conf/storm.yaml</em> file located in the storm home directory of that 
node.</p>
-
-<p>A storm administrator can specify how much available memory a node has in 
megabytes adding the following to <em>storm.yaml</em></p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">supervisor.memory.capacity.mb: [amount&lt;Double&gt;]
-</code></pre></div>
-<p>A storm administrator can also specify how much available CPU resources a 
node has available adding the following to <em>storm.yaml</em></p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">supervisor.cpu.capacity: [amount&lt;Double&gt;]
-</code></pre></div>
-<p>Note: that the amount the user can specify for the available CPU is 
represented using a point system like discussed earlier.</p>
-
-<p>Example of Usage:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">supervisor.memory.capacity.mb: 20480.0
-supervisor.cpu.capacity: 100.0
-</code></pre></div>
-<p><div id='Other-Configurations'/></p>
-
-<h3 id="other-configurations">Other Configurations</h3>
-
-<p>The user can set some default configurations for the Resource Aware 
Scheduler in <em>conf/storm.yaml</em>:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">//default 
value if on heap memory requirement is not specified for a component 
-topology.component.resources.onheap.memory.mb: 128.0
-
-//default value if off heap memory requirement is not specified for a 
component 
-topology.component.resources.offheap.memory.mb: 0.0
-
-//default value if CPU requirement is not specified for a component 
-topology.component.cpu.pcore.percent: 10.0
-
-//default value for the max heap size for a worker  
-topology.worker.max.heap.size.mb: 768.0
-</code></pre></div>
-<p><div id='Topology-Priorities-and-Per-User-Resource'/></p>
-
-<h2 id="topology-priorities-and-per-user-resource">Topology Priorities and Per 
User Resource</h2>
-
-<p>The Resource Aware Scheduler or RAS also has multitenant capabilities since 
many Storm users typically share a Storm cluster.  Resource Aware Scheduler can 
allocate resources on a per user basis.  Each user can be guaranteed a certain 
amount of resources to run his or her topologies and the Resource Aware 
Scheduler will meet those guarantees when possible.  When the Storm cluster has 
extra free resources, Resource Aware Scheduler will to be able allocate 
additional resources to user in a fair manner. The importance of topologies can 
also vary.  Topologies can be used for actual production or just 
experimentation, thus Resource Aware Scheduler will take into account the 
importance of a topology when determining the order in which to schedule 
topologies or when to evict topologies</p>
-
-<p><div id='Setup'/></p>
-
-<h3 id="setup">Setup</h3>
-
-<p>The resource guarantees of a user can be specified 
<em>conf/user-resource-pools.yaml</em>.  Specify the resource guarantees of a 
user in the following format:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">resource.aware.scheduler.user.pools:
-[UserId]
-    cpu: [Amount of Guarantee CPU Resources]
-    memory: [Amount of Guarantee Memory Resources]
-</code></pre></div>
-<p>An example of what <em>user-resource-pools.yaml</em> can look like:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">resource.aware.scheduler.user.pools:
-    jerry:
-        cpu: 1000
-        memory: 8192.0
-    derek:
-        cpu: 10000.0
-        memory: 32768
-    bobby:
-        cpu: 5000.0
-        memory: 16384.0
-</code></pre></div>
-<p>Please note that the specified amount of Guaranteed CPU and Memory can be 
either a integer or double</p>
-
-<p><div id='Specifying-Topology-Priority'/></p>
-
-<h3 id="specifying-topology-priority">Specifying Topology Priority</h3>
-
-<p>The range of topology priorities can range form 0-29.  The topologies 
priorities will be partitioned into several priority levels that may contain a 
range of priorities. 
-For example we can create a priority level mapping:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">PRODUCTION 
=&gt; 0 â 9
-STAGING =&gt; 10 â 19
-DEV =&gt; 20 â 29
-</code></pre></div>
-<p>Thus, each priority level contains 10 sub priorities. Users can set the 
priority level of a topology by using the following API</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">conf.setTopologyPriority(int priority)
-</code></pre></div>
-<p>Parameters:
-* priority â an integer representing the priority of the topology</p>
-
-<p>Please note that the 0-29 range is not a hard limit.  Thus, a user can set 
a priority number that is higher than 29. However, the property of higher the 
priority number, lower the importance still holds</p>
-
-<p><div id='Specifying-Scheduling-Strategy'/></p>
-
-<h3 id="specifying-scheduling-strategy">Specifying Scheduling Strategy</h3>
-
-<p>A user can specify on a per topology basis what scheduling strategy to use. 
 Users can implement the IStrategy interface and define new strategies to 
schedule specific topologies.  This pluggable interface was created since we 
realize different topologies might have different scheduling needs.  A user can 
set the topology strategy within the topology definition by using the API:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">public void 
setTopologyStrategy(Class&lt;? extends IStrategy&gt; clazz)
-</code></pre></div>
-<p>Parameters:
-* clazz â The strategy class that implements the IStrategy interface</p>
-
-<p>Example Usage:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">conf.setTopologyStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy.class);
-</code></pre></div>
-<p>A default scheduling is provided.  The DefaultResourceAwareStrategy is 
implemented based off the scheduling algorithm in the original paper describing 
resource aware scheduling in Storm:</p>
-
-<p>Peng, Boyang, Mohammad Hosseini, Zhihao Hong, Reza Farivar, and Roy 
Campbell. &quot;R-storm: Resource-aware scheduling in storm.&quot; In 
Proceedings of the 16th Annual Middleware Conference, pp. 149-161. ACM, 
2015.</p>
-
-<p><a 
href="http://dl.acm.org/citation.cfm?id=2814808";>http://dl.acm.org/citation.cfm?id=2814808</a></p>
-
-<p><strong>Please Note: Enhancements have to made on top of the original 
scheduling strategy as described in the paper.  Please see section 
&quot;Enhancements on original DefaultResourceAwareStrategy&quot;</strong></p>
-
-<p><div id='Specifying-Topology-Prioritization-Strategy'/></p>
-
-<h3 id="specifying-topology-prioritization-strategy">Specifying Topology 
Prioritization Strategy</h3>
-
-<p>The order of scheduling is a pluggable interface in which a user could 
define a strategy that prioritizes topologies.  For a user to define his or her 
own prioritization strategy, he or she needs to implement the 
ISchedulingPriorityStrategy interface.  A user can set the scheduling priority 
strategy by setting the 
<em>Config.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY</em> to point to the 
class that implements the strategy. For instance:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">resource.aware.scheduler.priority.strategy: 
"org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy"
-</code></pre></div>
-<p>A default strategy will be provided.  The following explains how the 
default scheduling priority strategy works.</p>
-
-<p><strong>DefaultSchedulingPriorityStrategy</strong></p>
-
-<p>The order of scheduling should be based on the distance between a userâs 
current resource allocation and his or her guaranteed allocation.  We should 
prioritize the users who are the furthest away from their resource guarantee. 
The difficulty of this problem is that a user may have multiple resource 
guarantees, and another user can have another set of resource guarantees, so 
how can we compare them in a fair manner?  Let&#39;s use the average percentage 
of resource guarantees satisfied as a method of comparison.</p>
-
-<p>For example:</p>
-
-<table><thead>
-<tr>
-<th>User</th>
-<th>Resource Guarantee</th>
-<th>Resource Allocated</th>
-</tr>
-</thead><tbody>
-<tr>
-<td>A</td>
-<td><10 CPU, 50GB></td>
-<td><2 CPU, 40 GB></td>
-</tr>
-<tr>
-<td>B</td>
-<td>&lt; 20 CPU, 25GB&gt;</td>
-<td><15 CPU, 10 GB></td>
-</tr>
-</tbody></table>
-
-<p>User Aâs average percentage satisfied of resource guarantee: </p>
-
-<p>(2/10+40/50)/2  = 0.5</p>
-
-<p>User Bâs average percentage satisfied of resource guarantee: </p>
-
-<p>(15/20+10/25)/2  = 0.575</p>
-
-<p>Thus, in this example User A has a smaller average percentage of his or her 
resource guarantee satisfied than User B.  Thus, User A should get priority to 
be allocated more resource, i.e., schedule a topology submitted by User A.</p>
-
-<p>When scheduling, RAS sorts users by the average percentage satisfied of 
resource guarantee and schedule topologies from users based on that ordering 
starting from the users with the lowest average percentage satisfied of 
resource guarantee.  When a userâs resource guarantee is completely 
satisfied, the userâs average percentage satisfied of resource guarantee will 
be greater than or equal to 1.</p>
-
-<p><div id='Specifying-Eviction-Strategy'/></p>
-
-<h3 id="specifying-eviction-strategy">Specifying Eviction Strategy</h3>
-
-<p>The eviction strategy is used when there are not enough free resources in 
the cluster to schedule new topologies. If the cluster is full, we need a 
mechanism to evict topologies so that user resource guarantees can be met and 
additional resource can be shared fairly among users. The strategy for evicting 
topologies is also a pluggable interface in which the user can implement his or 
her own topology eviction strategy.  For a user to implement his or her own 
eviction strategy, he or she needs to implement the IEvictionStrategy Interface 
and set <em>Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY</em> to point to 
the implemented strategy class. For instance:</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">resource.aware.scheduler.eviction.strategy: 
"org.apache.storm.scheduler.resource.strategies.eviction.DefaultEvictionStrategy"
-</code></pre></div>
-<p>A default eviction strategy is provided.  The following explains how the 
default topology eviction strategy works</p>
-
-<p><strong>DefaultEvictionStrategy</strong></p>
-
-<p>To determine if topology eviction should occur we should take into account 
the priority of the topology that we are trying to schedule and whether the 
resource guarantees for the owner of the topology have been met.  </p>
-
-<p>We should never evict a topology from a user that does not have his or her 
resource guarantees satisfied.  The following flow chart should describe the 
logic for the eviction process.</p>
-
-<p><img src="images/resource_aware_scheduler_default_eviction_strategy.png" 
alt="Viewing metrics with VisualVM"></p>
-
-<p><div id='Profiling-Resource-Usage'/></p>
-
-<h2 id="profiling-resource-usage">Profiling Resource Usage</h2>
-
-<p>Figuring out resource usage for your topology:</p>
-
-<p>To get an idea of how much memory/CPU your topology is actually using you 
can add the following to your topology launch code.</p>
-<div class="highlight"><pre><code class="language-" data-lang="">//Log all 
storm metrics
-conf.registerMetricsConsumer(backtype.storm.metric.LoggingMetricsConsumer.class);
-
-//Add in per worker CPU measurement
-Map&lt;String, String&gt; workerMetrics = new HashMap&lt;String, String&gt;();
-workerMetrics.put("CPU", "org.apache.storm.metrics.sigar.CPUMetric");
-conf.put(Config.TOPOLOGY_WORKER_METRICS, workerMetrics);
-</code></pre></div>
-<p>The CPU metrics will require you to add</p>
-<div class="highlight"><pre><code class="language-" 
data-lang="">&lt;dependency&gt;
-    &lt;groupId&gt;org.apache.storm&lt;/groupId&gt;
-    &lt;artifactId&gt;storm-metrics&lt;/artifactId&gt;
-    &lt;version&gt;1.0.0&lt;/version&gt;
-&lt;/dependency&gt;
-</code></pre></div>
-<p>as a topology dependency (1.0.0 or higher).</p>
-
-<p>You can then go to your topology on the UI, turn on the system metrics, and 
find the log that the LoggingMetricsConsumer is writing to.  It will output 
results in the log like.</p>
-<div class="highlight"><pre><code class="language-" data-lang="">1454526100 
node1.nodes.com:6707 -1:__system CPU {user-ms=74480, sys-ms=10780}
-1454526100 node1.nodes.com:6707 -1:__system memory/nonHeap     
{unusedBytes=2077536, virtualFreeBytes=-64621729, initBytes=2555904, 
committedBytes=66699264, maxBytes=-1, usedBytes=64621728}
-1454526100 node1.nodes.com:6707 -1:__system memory/heap  
{unusedBytes=573861408, virtualFreeBytes=694644256, initBytes=805306368, 
committedBytes=657719296, maxBytes=778502144, usedBytes=83857888}
-</code></pre></div>
-<p>The metrics with -1:__system are generally metrics for the entire worker.  
In the example above that worker is running on node1.nodes.com:6707.  These 
metrics are collected every 60 seconds.  For the CPU you can see that over the 
60 seconds this worker used  74480 + 10780 = 85260 ms of CPU time.  This is 
equivalent to 85260/60000 or about 1.5 cores.</p>
-
-<p>The Memory usage is similar but look at the usedBytes.  offHeap is 64621728 
or about 62MB, and onHeap is 83857888 or about 80MB, but you should know what 
you set your heap to in each of your workers already.  How do you divide this 
up per bolt/spout?  That is a bit harder and may require some trial and error 
from your end.</p>
-
-<p><div id='Enhancements-on-original-DefaultResourceAwareStrategy'/></p>
-
-<h2 id="enhancements-on-original-defaultresourceawarestrategy">* Enhancements 
on original DefaultResourceAwareStrategy *</h2>
-
-<p>The default resource aware scheduling strategy as described in the paper 
above has two main scheduling phases:</p>
-
-<ol>
-<li>Task Selection - Calculate the order task/executors in a topology should 
be scheduled</li>
-<li>Node Selection - Given a task/executor, find a node to schedule the 
task/executor on.</li>
-</ol>
-
-<p>Enhancements have been made for both scheduling phases</p>
-
-<h3 id="task-selection-enhancements">Task Selection Enhancements</h3>
-
-<p>Instead of using a breadth first traversal of the topology graph to create 
a ordering of components and its executors, a new heuristic is used that orders 
components by the number of in and out edges (potential connections) of the 
component.  This is discovered to be a more effective way to colocate executors 
that communicate with each other and reduce the network latency.</p>
-
-<h3 id="node-selection-enhancements">Node Selection Enhancements</h3>
-
-<p>Node selection comes down first selecting which rack (server rack) and then 
which node on that rack to choose. The gist of strategy in choosing a rack and 
node is finding the rack that has the &quot;most&quot; resource available and 
in that rack find the node with the &quot;most&quot; free resources.  The 
assumption we are making for this strategy is that the node or rack with the 
most free resources will have the highest probability that allows us to 
schedule colocate the most number of executors on the node or rack to reduce 
network communication latency</p>
-
-<p>Racks and nodes will be sorted from best choice to worst choice.  When 
finding an executor, the strategy will iterate through all racks and nodes, 
starting from best to worst, before giving up.  Racks and nodes will be sorted 
in the following matter:</p>
-
-<ol>
-<li><p>How many executors are already scheduled on the rack or node<br>
--- This is done so we move executors to schedule closer to executors that are 
already scheduled and running.  If a topology partially crashed and a subset of 
the topology&#39;s executors need to be rescheduled, we want to reschedule 
these executors as close (network wise) as possible to the executors that 
healthy and running. </p></li>
-<li><p>Subordinate resource availability or the amount &quot;effective&quot; 
resources on the rack or node<br>
--- Please refer the section on Subordinate Resource Availability</p></li>
-<li><p>Average of the all the resource availability<br>
--- This is simply taking the average of the percent available (available 
resources on node or rack divied by theavailable resources on rack or cluster, 
repectively).  This situation will only be used when &quot;effective 
resources&quot; for two objects (rack or node) are the same. Then we consider 
the average of all the percentages of resources as a metric for sorting. For 
example:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">Avail 
Resources:
-node 1: CPU = 50 Memory = 1024 Slots = 20
-node 2: CPU = 50 Memory = 8192 Slots = 40
-node 3: CPU = 1000 Memory = 0 Slots = 0
-
-Effective resources for nodes:
-node 1 = 50 / (50+50+1000) = 0.045 (CPU bound)
-node 2 = 50 / (50+50+1000) = 0.045 (CPU bound)
-node 3 = 0 (memory and slots are 0)
-</code></pre></div></li>
-</ol>
-
-<p>ode 1 and node 2 have the same effective resources but clearly node 2 has 
more resources (memory and slots) than node 1 and we would want to pick node 2 
first since there is a higher probability we will be able to schedule more 
executors on it. This is what the phase 2 averaging does</p>
-
-<p>Thus the sorting follows the following progression. Compare based on 1) and 
if equal then compare based on 2) and if equal compare based on 3) and if equal 
we break ties by arbitrarly assigning ordering based on comparing the ids of 
the node or rack.</p>
-
-<p><strong>Subordinate Resource Availability</strong></p>
-
-<p>Originally the getBestClustering algorithm for RAS finds the 
&quot;Best&quot; rack based on which rack has the &quot;most available&quot; 
resources by finding the rack with the biggest sum of available memory + 
available across all nodes in the rack. This method is not very accurate since 
memory and cpu usage aree values on a different scale and the values are not 
normailized. This method is also not effective since it does not consider the 
number of slots available and it fails to identifying racks that are not 
schedulable due to the exhaustion of one of the resources either memory, cpu, 
or slots. Also the previous method does not consider failures of workers. When 
executors of a topology gets unassigned and needs to be scheduled again, the 
current logic in getBestClustering may be inadequate since it will likely 
return a cluster that is different from where the majority of executors from 
the topology is originally scheduling in.</p>
-
-<p>The new strategy/algorithm to find the &quot;best&quot; rack or node, I dub 
subordinate resource availability ordering (inspired by Dominant Resource 
Fairness), sorts racks and nodes by the subordinate (not dominant) resource 
availability.</p>
-
-<p>For example given 4 racks with the following resource availabilities</p>
-<div class="highlight"><pre><code class="language-" data-lang="">//generate 
some that has alot of memory but little of cpu
-rack-3 Avail [ CPU 100.0 MEM 200000.0 Slots 40 ] Total [ CPU 100.0 MEM 
200000.0 Slots 40 ]
-//generate some supervisors that are depleted of one resource
-rack-2 Avail [ CPU 0.0 MEM 80000.0 Slots 40 ] Total [ CPU 0.0 MEM 80000.0 
Slots 40 ]
-//generate some that has a lot of cpu but little of memory
-rack-4 Avail [ CPU 6100.0 MEM 10000.0 Slots 40 ] Total [ CPU 6100.0 MEM 
10000.0 Slots 40 ]
-//generate another rack of supervisors with less resources than rack-0
-rack-1 Avail [ CPU 2000.0 MEM 40000.0 Slots 40 ] Total [ CPU 2000.0 MEM 
40000.0 Slots 40 ]
-//best rack to choose
-rack-0 Avail [ CPU 4000.0 MEM 80000.0 Slots 40( ] Total [ CPU 4000.0 MEM 
80000.0 Slots 40 ]
-Cluster Overall Avail [ CPU 12200.0 MEM 410000.0 Slots 200 ] Total [ CPU 
12200.0 MEM 410000.0 Slots 200 ]
-</code></pre></div>
-<p>It is clear that rack-0 is the best cluster since its the most balanced and 
can potentially schedule the most executors, while rack-2 is the worst rack 
since rack-2 is depleted of cpu resource thus rendering it unschedulable even 
though there are other resources available.</p>
-
-<p>We first calculate the resource availability percentage of all the racks 
for each resource by computing:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">(resource 
available on rack) / (resource available in cluster)
-</code></pre></div>
-<p>We do this calculation to normalize the values otherwise the resource 
values would not be comparable.</p>
-
-<p>So for our example:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">rack-3 Avail 
[ CPU 0.819672131147541% MEM 48.78048780487805% Slots 20.0% ] effective 
resources: 0.00819672131147541
-rack-2 Avail [ 0.0% MEM 19.51219512195122% Slots 20.0% ] effective resources: 
0.0
-rack-4 Avail [ CPU 50.0% MEM 2.4390243902439024% Slots 20.0% ] effective 
resources: 0.024390243902439025
-rack-1 Avail [ CPU 16.39344262295082% MEM 9.75609756097561% Slots 20.0% ] 
effective resources: 0.0975609756097561
-rack-0 Avail [ CPU 32.78688524590164% MEM 19.51219512195122% Slots 20.0% ] 
effective resources: 0.1951219512195122
-</code></pre></div>
-<p>The effective resource of a rack, which is also the subordinate resource, 
is computed by: </p>
-<div class="highlight"><pre><code class="language-" data-lang="">MIN(resource 
availability percentage of {CPU, Memory, # of free Slots}).
-</code></pre></div>
-<p>Then we order the racks by the effective resource.</p>
-
-<p>Thus for our example:</p>
-<div class="highlight"><pre><code class="language-" data-lang="">Sorted rack: 
[rack-0, rack-1, rack-4, rack-3, rack-2]
-</code></pre></div>
-<p>This metric is used in sorting for both nodes and racks.  When sorting 
racks, we consider resources available on the rack and in the whole cluster 
(containing all racks).  When sorting nodes, we consider resources available on 
the node and the resources available in the rack (sum of all resources 
available for all nodes in rack)</p>
-
-<p>Original Jira for this enhancement: <a 
href="https://issues.apache.org/jira/browse/STORM-1766";>STORM-1766</a></p>
-
-<h3 id="improvements-in-scheduling">Improvements in Scheduling</h3>
-
-<p>This section provides some experimental results on the performance benefits 
with the enhancements on top of the original scheduling strategy.  The 
experiments are based off of running simulations using:</p>
-
-<p><a 
href="https://github.com/jerrypeng/storm-scheduler-test-framework";>https://github.com/jerrypeng/storm-scheduler-test-framework</a></p>
-
-<p>Random topologies and clusters are used in the simulation as well as a 
comprehensive dataset consisting of all real topologies running in all the 
storm clusters at Yahoo.</p>
-
-<p>The below graphs provides a comparison of how well the various strategies 
schedule topologies to minimize network latency.  A network metric is 
calculated for each scheduling of a topology by each scheduling strategy.  The 
network metric is calculated based on how many connections each executor in a 
topology has to make to another executor residing in the same worker (JVM 
process), in different worker but same host, different host, different rack.  
The assumption we are making is the following</p>
-
-<ol>
-<li>Intra-worker communication is the fastest</li>
-<li>Inter-worker communication is fast</li>
-<li>Inter-node communication is slower</li>
-<li>Inter-rack communication is the slowest</li>
-</ol>
-
-<p>For this network metric, the larger the number is number is the more 
potential network latency the topology will have for this scheduling.  Two 
types of experiments are performed.  One set experiments are performed with 
randomly generated topologies and randomly generate clusters.  The other set of 
experiments are performed with a dataset containing all of the running 
topologies at yahoo and semi-randomly generated clusters based on the size of 
the topology.  Both set of experiments are run millions of iterations until 
results converge.  </p>
-
-<p>For the experiments involving randomly generated topologies, an optimal 
strategy is implemented that exhausively finds the most optimal solution if a 
solution exists.  The topologies and clusters used in this experiment are 
relatively small so that the optimal strategy traverse to solution space to 
find a optimal solution in a reasonable amount of time.  This strategy is not 
run with the Yahoo topologies since the topologies are large and would take 
unreasonable amount of time to run, since the solutions space is W^N (ordering 
doesn&#39;t matter within a worker) where W is the number of workers and N is 
the number of executors. The NextGenStrategy represents the scheduling strategy 
with these enhancements.  The DefaultResourceAwareStrategy represents the 
original scheduling strategy.  The RoundRobinStrategy represents a naive 
strategy that simply schedules executors in a round robin fashion while 
respecting the resource constraints.  The graph below presents averages of the 
netwo
 rk metric.  A CDF graph is also presented further down.</p>
-
-<table><thead>
-<tr>
-<th>Random Topologies</th>
-<th>Yahoo topologies</th>
-</tr>
-</thead><tbody>
-<tr>
-<td><img src="images/ras_new_strategy_network_metric_random.png" alt=""></td>
-<td><img src="images/ras_new_strategy_network_metric_yahoo_topologies.png" 
alt=""></td>
-</tr>
-</tbody></table>
-
-<p>The next graph displays how close the schedulings from the respectively 
scheduling strategies are to the schedulings of the optimal strategy.  As 
explained earlier, this is only done for the random generated topologies and 
clusters.</p>
-
-<table><thead>
-<tr>
-<th>Random Topologies</th>
-</tr>
-</thead><tbody>
-<tr>
-<td><img src="images/ras_new_strategy_network_metric_improvement_random.png" 
alt=""></td>
-</tr>
-</tbody></table>
-
-<p>The below graph is a CDF of the network metric:</p>
-
-<table><thead>
-<tr>
-<th>Random Topologies</th>
-<th>Yahoo topologies</th>
-</tr>
-</thead><tbody>
-<tr>
-<td><img src="images/ras_new_strategy_network_cdf_random.png" alt=""></td>
-<td><img src="images/ras_new_strategy_network_metric_cdf_yahoo_topologies.png" 
alt=""></td>
-</tr>
-</tbody></table>
-
-<p>Below is a comparison of the how long the strategies take to run:</p>
-
-<table><thead>
-<tr>
-<th>Random Topologies</th>
-<th>Yahoo topologies</th>
-</tr>
-</thead><tbody>
-<tr>
-<td><img src="images/ras_new_strategy_runtime_random.png" alt=""></td>
-<td><img src="images/ras_new_strategy_runtime_yahoo.png" alt=""></td>
-</tr>
-</tbody></table>
-</div>
-
-
-                 </div>
-              </div>
-         </div>
-<footer>
-    <div class="container-fluid">
-        <div class="row">
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Meetups</h5>
-                    <ul class="latest-news">
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Apache-Kafka/";>Apache Storm & Apache 
Kafka</a> <span class="small">(Sunnyvale, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Apache Storm & Kafka 
Users</a> <span class="small">(Seattle, WA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/New-York-City-Storm-User-Group/";>NYC Storm User 
Group</a> <span class="small">(New York, NY)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Bay-Area-Stream-Processing";>Bay Area Stream 
Processing</a> <span class="small">(Emeryville, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Boston-Storm-Users/";>Boston Realtime Data</a> <span 
class="small">(Boston, MA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/storm-london";>London Storm User Group</a> <span 
class="small">(London, UK)</span></li>
-                        
-                        <!-- <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Seatle, WA</a> <span 
class="small">(27 Jun 2015)</span></li> -->
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>About Storm</h5>
-                    <p>Storm integrates with any queueing system and any 
database system. Storm's spout abstraction makes it easy to integrate a new 
queuing system. Likewise, integrating Storm with database systems is easy.</p>
-               </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>First Look</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/Rationale.html">Rationale</a></li>
-                        <li><a 
href="/releases/current/Tutorial.html">Tutorial</a></li>
-                        <li><a 
href="/releases/current/Setting-up-development-environment.html">Setting up 
development environment</a></li>
-                        <li><a 
href="/releases/current/Creating-a-new-Storm-project.html">Creating a new Storm 
project</a></li>
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Documentation</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/index.html">Index</a></li>
-                        <li><a 
href="/releases/current/javadocs/index.html">Javadoc</a></li>
-                        <li><a href="/releases/current/FAQ.html">FAQ</a></li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        <hr/>
-        <div class="row">   
-            <div class="col-md-12">
-                <p align="center">Copyright Â© 2015 <a 
href="http://www.apache.org";>Apache Software Foundation</a>. All Rights 
Reserved. 
-                    <br>Apache Storm, Apache, the Apache feather logo, and the 
Apache Storm project logos are trademarks of The Apache Software Foundation. 
-                    <br>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p>
-            </div>
-        </div>
-    </div>
-</footer>
-<!--Footer End-->
-<!-- Scroll to top -->
-<span class="totop"><a href="#"><i class="fa fa-angle-up"></i></a></span> 
-
-</body>
-
-</html>
-


http://git-wip-us.apache.org/repos/asf/storm-site/blob/6e122a12/content/releases/1.2.1/Running-topologies-on-a-production-cluster.html
----------------------------------------------------------------------
diff --git 
a/content/releases/1.2.1/Running-topologies-on-a-production-cluster.html 
b/content/releases/1.2.1/Running-topologies-on-a-production-cluster.html
deleted file mode 100644
index 587b5a7..0000000
--- a/content/releases/1.2.1/Running-topologies-on-a-production-cluster.html
+++ /dev/null
@@ -1,290 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <title>Running Topologies on a Production Cluster</title>
-
-    <!-- Bootstrap core CSS -->
-    <link href="/assets/css/bootstrap.min.css" rel="stylesheet">
-    <!-- Bootstrap theme -->
-    <link href="/assets/css/bootstrap-theme.min.css" rel="stylesheet">
-
-    <!-- Custom styles for this template -->
-    <link rel="stylesheet" 
href="http://fortawesome.github.io/Font-Awesome/assets/font-awesome/css/font-awesome.css";>
-    <link href="/css/style.css" rel="stylesheet">
-    <link href="/assets/css/owl.theme.css" rel="stylesheet">
-    <link href="/assets/css/owl.carousel.css" rel="stylesheet">
-    <script type="text/javascript" src="/assets/js/jquery.min.js"></script>
-    <script type="text/javascript" src="/assets/js/bootstrap.min.js"></script>
-    <script type="text/javascript" 
src="/assets/js/owl.carousel.min.js"></script>
-    <script type="text/javascript" src="/assets/js/storm.js"></script>
-    <!-- Just for debugging purposes. Don't actually copy these 2 lines! -->
-    <!--[if lt IE 9]><script 
src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
-    
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-
-
-  <body>
-    <header>
-  <div class="container-fluid">
-     <div class="row">
-          <div class="col-md-5">
-            <a href="/index.html"><img src="/images/logo.png" class="logo" 
/></a>
-          </div>
-          <div class="col-md-5">
-            
-              <h1>Version: 1.2.1</h1>
-            
-          </div>
-          <div class="col-md-2">
-            <a href="/downloads.html" class="btn-std btn-block 
btn-download">Download</a>
-          </div>
-        </div>
-    </div>
-</header>
-<!--Header End-->
-<!--Navigation Begin-->
-<div class="navbar" role="banner">
-  <div class="container-fluid">
-      <div class="navbar-header">
-          <button class="navbar-toggle" type="button" data-toggle="collapse" 
data-target=".bs-navbar-collapse">
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-                <span class="icon-bar"></span>
-            </button>
-        </div>
-        <nav class="collapse navbar-collapse bs-navbar-collapse" 
role="navigation">
-          <ul class="nav navbar-nav">
-              <li><a href="/index.html" id="home">Home</a></li>
-                <li><a href="/getting-help.html" id="getting-help">Getting 
Help</a></li>
-                <li><a href="/about/integrates.html" id="project-info">Project 
Information</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="documentation">Documentation <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                      
-                        
-                          <li><a 
href="/releases/2.0.0-SNAPSHOT/index.html">2.0.0-SNAPSHOT</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.2.1/index.html">1.2.1</a></li>
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.1.2/index.html">1.1.2</a></li>
-                        
-                      
-                        
-                      
-                        
-                          <li><a 
href="/releases/1.0.6/index.html">1.0.6</a></li>
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                        
-                      
-                    </ul>
-                </li>
-                <li><a href="/talksAndVideos.html">Talks and 
Slideshows</a></li>
-                <li class="dropdown">
-                    <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
id="contribute">Community <b class="caret"></b></a>
-                    <ul class="dropdown-menu">
-                        <li><a 
href="/contribute/Contributing-to-Storm.html">Contributing</a></li>
-                        <li><a href="/contribute/People.html">People</a></li>
-                        <li><a href="/contribute/BYLAWS.html">ByLaws</a></li>
-                    </ul>
-                </li>
-                <li><a href="/2018/06/04/storm122-released.html" 
id="news">News</a></li>
-            </ul>
-        </nav>
-    </div>
-</div>
-
-
-
-    <div class="container-fluid">
-    <h1 class="page-title">Running Topologies on a Production Cluster</h1>
-          <div class="row">
-               <div class="col-md-12">
-                    <!-- Documentation -->
-
-<p class="post-meta"></p>
-
-<div class="documentation-content"><p>Running topologies on a production 
cluster is similar to running in <a href="Local-mode.html">Local mode</a>. Here 
are the steps:</p>
-
-<p>1) Define the topology (Use <a 
href="javadocs/org/apache/storm/topology/TopologyBuilder.html">TopologyBuilder</a>
 if defining using Java)</p>
-
-<p>2) Use <a 
href="javadocs/org/apache/storm/StormSubmitter.html">StormSubmitter</a> to 
submit the topology to the cluster. <code>StormSubmitter</code> takes as input 
the name of the topology, a configuration for the topology, and the topology 
itself. For example:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">Config</span> <span class="n">conf</span> <span class="o">=</span> 
<span class="k">new</span> <span class="n">Config</span><span 
class="o">();</span>
-<span class="n">conf</span><span class="o">.</span><span 
class="na">setNumWorkers</span><span class="o">(</span><span 
class="mi">20</span><span class="o">);</span>
-<span class="n">conf</span><span class="o">.</span><span 
class="na">setMaxSpoutPending</span><span class="o">(</span><span 
class="mi">5000</span><span class="o">);</span>
-<span class="n">StormSubmitter</span><span class="o">.</span><span 
class="na">submitTopology</span><span class="o">(</span><span 
class="s">"mytopology"</span><span class="o">,</span> <span 
class="n">conf</span><span class="o">,</span> <span 
class="n">topology</span><span class="o">);</span>
-</code></pre></div>
-<p>3) Create a JAR containing your topology code. You have the option to 
either bundle all of the dependencies of your code into that JAR (except for 
Storm -- the Storm JARs will be added to the classpath on the worker nodes), or 
you can leverage the <a href="Classpath-handling.html">Classpath handling</a> 
features in Storm for using external libraries without bundling them into your 
topology JAR.</p>
-
-<p>If you&#39;re using Maven, the <a 
href="http://maven.apache.org/plugins/maven-assembly-plugin/";>Maven Assembly 
Plugin</a> can do the packaging for you. Just add this to your pom.xml:</p>
-<div class="highlight"><pre><code class="language-xml" data-lang="xml">  <span 
class="nt">&lt;plugin&gt;</span>
-    <span class="nt">&lt;artifactId&gt;</span>maven-assembly-plugin<span 
class="nt">&lt;/artifactId&gt;</span>
-    <span class="nt">&lt;configuration&gt;</span>
-      <span class="nt">&lt;descriptorRefs&gt;</span>  
-        <span 
class="nt">&lt;descriptorRef&gt;</span>jar-with-dependencies<span 
class="nt">&lt;/descriptorRef&gt;</span>
-      <span class="nt">&lt;/descriptorRefs&gt;</span>
-      <span class="nt">&lt;archive&gt;</span>
-        <span class="nt">&lt;manifest&gt;</span>
-          <span class="nt">&lt;mainClass&gt;</span>com.path.to.main.Class<span 
class="nt">&lt;/mainClass&gt;</span>
-        <span class="nt">&lt;/manifest&gt;</span>
-      <span class="nt">&lt;/archive&gt;</span>
-    <span class="nt">&lt;/configuration&gt;</span>
-  <span class="nt">&lt;/plugin&gt;</span>
-</code></pre></div>
-<p>Then run mvn assembly:assembly to get an appropriately packaged jar. Make 
sure you <a 
href="http://maven.apache.org/plugins/maven-assembly-plugin/examples/single/including-and-excluding-artifacts.html";>exclude</a>
 the Storm jars since the cluster already has Storm on the classpath.</p>
-
-<p>4) Submit the topology to the cluster using the <code>storm</code> client, 
specifying the path to your jar, the classname to run, and any arguments it 
will use:</p>
-
-<p><code>storm jar path/to/allmycode.jar org.me.MyTopology arg1 arg2 
arg3</code></p>
-
-<p><code>storm jar</code> will submit the jar to the cluster and configure the 
<code>StormSubmitter</code> class to talk to the right cluster. In this 
example, after uploading the jar <code>storm jar</code> calls the main function 
on <code>org.me.MyTopology</code> with the arguments &quot;arg1&quot;, 
&quot;arg2&quot;, and &quot;arg3&quot;.</p>
-
-<p>You can find out how to configure your <code>storm</code> client to talk to 
a Storm cluster on <a href="Setting-up-development-environment.html">Setting up 
development environment</a>.</p>
-
-<h3 id="common-configurations">Common configurations</h3>
-
-<p>There are a variety of configurations you can set per topology. A list of 
all the configurations you can set can be found <a 
href="javadocs/org/apache/storm/Config.html">here</a>. The ones prefixed with 
&quot;TOPOLOGY&quot; can be overridden on a topology-specific basis (the other 
ones are cluster configurations and cannot be overridden). Here are some common 
ones that are set for a topology:</p>
-
-<ol>
-<li><strong>Config.TOPOLOGY_WORKERS</strong>: This sets the number of worker 
processes to use to execute the topology. For example, if you set this to 25, 
there will be 25 Java processes across the cluster executing all the tasks. If 
you had a combined 150 parallelism across all components in the topology, each 
worker process will have 6 tasks running within it as threads.</li>
-<li><strong>Config.TOPOLOGY_ACKER_EXECUTORS</strong>: This sets the number of 
executors that will track tuple trees and detect when a spout tuple has been 
fully processed. Ackers are an integral part of Storm&#39;s reliability model 
and you can read more about them on <a 
href="Guaranteeing-message-processing.html">Guaranteeing message 
processing</a>. By not setting this variable or setting it as null, Storm will 
set the number of acker executors to be equal to the number of workers 
configured for this topology. If this variable is set to 0, then Storm will 
immediately ack tuples as soon as they come off the spout, effectively 
disabling reliability.</li>
-<li><strong>Config.TOPOLOGY_MAX_SPOUT_PENDING</strong>: This sets the maximum 
number of spout tuples that can be pending on a single spout task at once 
(pending means the tuple has not been acked or failed yet). It is highly 
recommended you set this config to prevent queue explosion.</li>
-<li><strong>Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS</strong>: This is the maximum 
amount of time a spout tuple has to be fully completed before it is considered 
failed. This value defaults to 30 seconds, which is sufficient for most 
topologies. See <a href="Guaranteeing-message-processing.html">Guaranteeing 
message processing</a> for more information on how Storm&#39;s reliability 
model works.</li>
-<li><strong>Config.TOPOLOGY_SERIALIZATIONS</strong>: You can register more 
serializers to Storm using this config so that you can use custom types within 
tuples.</li>
-</ol>
-
-<h3 id="killing-a-topology">Killing a topology</h3>
-
-<p>To kill a topology, simply run:</p>
-
-<p><code>storm kill {stormname}</code></p>
-
-<p>Give the same name to <code>storm kill</code> as you used when submitting 
the topology.</p>
-
-<p>Storm won&#39;t kill the topology immediately. Instead, it deactivates all 
the spouts so that they don&#39;t emit any more tuples, and then Storm waits 
Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS seconds before destroying all the workers. 
This gives the topology enough time to complete any tuples it was processing 
when it got killed.</p>
-
-<h3 id="updating-a-running-topology">Updating a running topology</h3>
-
-<p>To update a running topology, the only option currently is to kill the 
current topology and resubmit a new one. A planned feature is to implement a 
<code>storm swap</code> command that swaps a running topology with a new one, 
ensuring minimal downtime and no chance of both topologies processing tuples at 
the same time. </p>
-
-<h3 id="monitoring-topologies">Monitoring topologies</h3>
-
-<p>The best place to monitor a topology is using the Storm UI. The Storm UI 
provides information about errors happening in tasks and fine-grained stats on 
the throughput and latency performance of each component of each running 
topology.</p>
-
-<p>You can also look at the worker logs on the cluster machines.</p>
-</div>
-
-
-                 </div>
-              </div>
-         </div>
-<footer>
-    <div class="container-fluid">
-        <div class="row">
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Meetups</h5>
-                    <ul class="latest-news">
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Apache-Kafka/";>Apache Storm & Apache 
Kafka</a> <span class="small">(Sunnyvale, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Apache Storm & Kafka 
Users</a> <span class="small">(Seattle, WA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/New-York-City-Storm-User-Group/";>NYC Storm User 
Group</a> <span class="small">(New York, NY)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Bay-Area-Stream-Processing";>Bay Area Stream 
Processing</a> <span class="small">(Emeryville, CA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/Boston-Storm-Users/";>Boston Realtime Data</a> <span 
class="small">(Boston, MA)</span></li>
-                        
-                        <li><a 
href="http://www.meetup.com/storm-london";>London Storm User Group</a> <span 
class="small">(London, UK)</span></li>
-                        
-                        <!-- <li><a 
href="http://www.meetup.com/Apache-Storm-Kafka-Users/";>Seatle, WA</a> <span 
class="small">(27 Jun 2015)</span></li> -->
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>About Storm</h5>
-                    <p>Storm integrates with any queueing system and any 
database system. Storm's spout abstraction makes it easy to integrate a new 
queuing system. Likewise, integrating Storm with database systems is easy.</p>
-               </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>First Look</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/Rationale.html">Rationale</a></li>
-                        <li><a 
href="/releases/current/Tutorial.html">Tutorial</a></li>
-                        <li><a 
href="/releases/current/Setting-up-development-environment.html">Setting up 
development environment</a></li>
-                        <li><a 
href="/releases/current/Creating-a-new-Storm-project.html">Creating a new Storm 
project</a></li>
-                    </ul>
-                </div>
-            </div>
-            <div class="col-md-3">
-                <div class="footer-widget">
-                    <h5>Documentation</h5>
-                    <ul class="footer-list">
-                        <li><a 
href="/releases/current/index.html">Index</a></li>
-                        <li><a 
href="/releases/current/javadocs/index.html">Javadoc</a></li>
-                        <li><a href="/releases/current/FAQ.html">FAQ</a></li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        <hr/>
-        <div class="row">   
-            <div class="col-md-12">
-                <p align="center">Copyright Â© 2015 <a 
href="http://www.apache.org";>Apache Software Foundation</a>. All Rights 
Reserved. 
-                    <br>Apache Storm, Apache, the Apache feather logo, and the 
Apache Storm project logos are trademarks of The Apache Software Foundation. 
-                    <br>All other marks mentioned may be trademarks or 
registered trademarks of their respective owners.</p>
-            </div>
-        </div>
-    </div>
-</footer>
-<!--Footer End-->
-<!-- Scroll to top -->
-<span class="totop"><a href="#"><i class="fa fa-angle-up"></i></a></span> 
-
-</body>
-
-</html>
-

[39/51] [partial] storm-site git commit: Update latest 1.x-branch version to 1.2.2

Reply via email to