metron#1053

cestella Mon, 11 Jun 2018 15:20:41 -0700
http://git-wip-us.apache.org/repos/asf/metron/blob/ae1d3eb9/site/current-book/metron-platform/metron-data-management/index.html
----------------------------------------------------------------------
diff --git 
a/site/current-book/metron-platform/metron-data-management/index.html 
b/site/current-book/metron-platform/metron-data-management/index.html
index df34389..dea600c 100644
--- a/site/current-book/metron-platform/metron-data-management/index.html
+++ b/site/current-book/metron-platform/metron-data-management/index.html
@@ -1,359 +1,190 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at 2018-01-03
- | Rendered using Apache Maven Fluido Skin 1.3.0
+ | Generated by Apache Maven Doxia Site Renderer 1.8 from 
src/site/markdown/metron-platform/metron-data-management/index.md at 2018-06-07
+ | Rendered using Apache Maven Fluido Skin 1.7
 -->
 <html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20180103" />
+    <meta name="Date-Revision-yyyymmdd" content="20180607" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Metron &#x2013; Resource Data Management</title>
-    <link rel="stylesheet" href="../../css/apache-maven-fluido-1.3.0.min.css" 
/>
+    <link rel="stylesheet" href="../../css/apache-maven-fluido-1.7.min.css" />
     <link rel="stylesheet" href="../../css/site.css" />
     <link rel="stylesheet" href="../../css/print.css" media="print" />
-
-      
-    <script type="text/javascript" 
src="../../js/apache-maven-fluido-1.3.0.min.js"></script>
-
-                          
-        
-<script type="text/javascript">$( document ).ready( function() { $( 
'.carousel' ).carousel( { interval: 3500 } ) } );</script>
-          
-            </head>
-        <body class="topBarDisabled">
-          
-                
-                    
-    
-        <div class="container-fluid">
-          <div id="banner">
-        <div class="pull-left">
-                                    <a href="http://metron.apache.org/"; 
id="bannerLeft">
-                                                                               
                 <img src="../../images/metron-logo.png"  alt="Apache Metron" 
width="148px" height="48px"/>
-                </a>
-                      </div>
-        <div class="pull-right">  </div>
+    <script type="text/javascript" 
src="../../js/apache-maven-fluido-1.7.min.js"></script>
+<script type="text/javascript">
+              $( document ).ready( function() { $( '.carousel' ).carousel( { 
interval: 3500 } ) } );
+            </script>
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href="http://metron.apache.org/"; 
id="bannerLeft"><img src="../../images/metron-logo.png"  alt="Apache Metron" 
width="148px" height="48px"/></a></div>
+        <div class="pull-right"></div>
         <div class="clear"><hr/></div>
       </div>
 
       <div id="breadcrumbs">
         <ul class="breadcrumb">
-                
-                    
-                              <li class="">
-                    <a href="http://www.apache.org"; class="externalLink" 
title="Apache">
-        Apache</a>
-        </li>
-      <li class="divider ">/</li>
-            <li class="">
-                    <a href="http://metron.apache.org/"; class="externalLink" 
title="Metron">
-        Metron</a>
-        </li>
-      <li class="divider ">/</li>
-            <li class="">
-                    <a href="../../index.html" title="Documentation">
-        Documentation</a>
-        </li>
-      <li class="divider ">/</li>
-        <li class="">Resource Data Management</li>
-        
-                
-                    
-                  <li id="publishDate" class="pull-right">Last Published: 
2018-01-03</li> <li class="divider pull-right">|</li>
-              <li id="projectVersion" class="pull-right">Version: 0.4.2</li>
-            
-                            </ul>
+      <li class=""><a href="http://www.apache.org"; class="externalLink" 
title="Apache">Apache</a><span class="divider">/</span></li>
+      <li class=""><a href="http://metron.apache.org/"; class="externalLink" 
title="Metron">Metron</a><span class="divider">/</span></li>
+      <li class=""><a href="../../index.html" 
title="Documentation">Documentation</a><span class="divider">/</span></li>
+    <li class="active ">Resource Data Management</li>
+        <li id="publishDate" class="pull-right"><span class="divider">|</span> 
Last Published: 2018-06-07</li>
+          <li id="projectVersion" class="pull-right">Version: 0.5.0</li>
+        </ul>
       </div>
-
-            
       <div class="row-fluid">
-        <div id="leftColumn" class="span3">
+        <div id="leftColumn" class="span2">
           <div class="well sidebar-nav">
-                
-                    
-                <ul class="nav nav-list">
-                    <li class="nav-header">User Documentation</li>
-                                                                               
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                      
                                                                          
-      <li>
-    
-                          <a href="../../index.html" title="Metron">
-          <i class="icon-chevron-down"></i>
-        Metron</a>
-                    <ul class="nav nav-list">
-                      
-      <li>
-    
-                          <a href="../../Upgrading.html" title="Upgrading">
-          <i class="none"></i>
-        Upgrading</a>
-            </li>
-                                                                               
                                                                       
-      <li>
-    
-                          <a href="../../metron-analytics/index.html" 
title="Analytics">
-          <i class="icon-chevron-right"></i>
-        Analytics</a>
-                  </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-contrib/metron-docker/index.html" title="Docker">
-          <i class="none"></i>
-        Docker</a>
-            </li>
-                                                                               
                                                                                
                                                                                
                                                                                
                                                                             
-      <li>
-    
-                          <a href="../../metron-deployment/index.html" 
title="Deployment">
-          <i class="icon-chevron-right"></i>
-        Deployment</a>
-                  </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-interface/metron-alerts/index.html" title="Alerts">
-          <i class="none"></i>
-        Alerts</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-interface/metron-config/index.html" title="Config">
-          <i class="none"></i>
-        Config</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-interface/metron-rest/index.html" title="Rest">
-          <i class="none"></i>
-        Rest</a>
-            </li>
-                                                                               
                                                                                
                                                                                
                                               
-      <li>
-    
-                          <a href="../../metron-platform/index.html" 
title="Platform">
-          <i class="icon-chevron-down"></i>
-        Platform</a>
-                    <ul class="nav nav-list">
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/Performance-tuning-guide.html" 
title="Performance-tuning-guide">
-          <i class="none"></i>
-        Performance-tuning-guide</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-api/index.html" title="Api">
-          <i class="none"></i>
-        Api</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-common/index.html" title="Common">
-          <i class="none"></i>
-        Common</a>
-            </li>
-                      
-      <li class="active">
-    
-            <a href="#"><i class="none"></i>Data-management</a>
-          </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-elasticsearch/index.html" 
title="Elasticsearch">
-          <i class="none"></i>
-        Elasticsearch</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-enrichment/index.html" title="Enrichment">
-          <i class="none"></i>
-        Enrichment</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-indexing/index.html" title="Indexing">
-          <i class="none"></i>
-        Indexing</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-management/index.html" title="Management">
-          <i class="none"></i>
-        Management</a>
-            </li>
-                                                                        
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-parsers/index.html" title="Parsers">
-          <i class="icon-chevron-right"></i>
-        Parsers</a>
-                  </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-pcap-backend/index.html" 
title="Pcap-backend">
-          <i class="none"></i>
-        Pcap-backend</a>
-            </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-platform/metron-writer/index.html" title="Writer">
-          <i class="none"></i>
-        Writer</a>
-            </li>
-              </ul>
-        </li>
-                                                                               
           
-      <li>
-    
-                          <a href="../../metron-sensors/index.html" 
title="Sensors">
-          <i class="icon-chevron-right"></i>
-        Sensors</a>
-                  </li>
-                      
-      <li>
-    
-                          <a 
href="../../metron-stellar/stellar-3rd-party-example/index.html" 
title="Stellar-3rd-party-example">
-          <i class="none"></i>
-        Stellar-3rd-party-example</a>
-            </li>
-                                                                        
-      <li>
-    
-                          <a 
href="../../metron-stellar/stellar-common/index.html" title="Stellar-common">
-          <i class="icon-chevron-right"></i>
-        Stellar-common</a>
-                  </li>
-                                                                               
           
-      <li>
-    
-                          <a href="../../use-cases/index.html" 
title="Use-cases">
-          <i class="icon-chevron-right"></i>
-        Use-cases</a>
-                  </li>
-              </ul>
-        </li>
-            </ul>
-                
-                    
-                
-          <hr class="divider" />
-
-           <div id="poweredBy">
-                            <div class="clear"></div>
-                            <div class="clear"></div>
-                            <div class="clear"></div>
-                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
-        <img class="builtBy" alt="Built by Maven" 
src="../../images/logos/maven-feather.png" />
-      </a>
-                  </div>
+    <ul class="nav nav-list">
+      <li class="nav-header">User Documentation</li>
+    <li><a href="../../index.html" title="Metron"><span 
class="icon-chevron-down"></span>Metron</a>
+    <ul class="nav nav-list">
+    <li><a href="../../CONTRIBUTING.html" title="CONTRIBUTING"><span 
class="none"></span>CONTRIBUTING</a></li>
+    <li><a href="../../Upgrading.html" title="Upgrading"><span 
class="none"></span>Upgrading</a></li>
+    <li><a href="../../metron-analytics/index.html" title="Analytics"><span 
class="icon-chevron-right"></span>Analytics</a></li>
+    <li><a href="../../metron-contrib/metron-docker/index.html" 
title="Docker"><span class="none"></span>Docker</a></li>
+    <li><a href="../../metron-contrib/metron-performance/index.html" 
title="Performance"><span class="none"></span>Performance</a></li>
+    <li><a href="../../metron-deployment/index.html" title="Deployment"><span 
class="icon-chevron-right"></span>Deployment</a></li>
+    <li><a href="../../metron-interface/metron-alerts/index.html" 
title="Alerts"><span class="none"></span>Alerts</a></li>
+    <li><a href="../../metron-interface/metron-config/index.html" 
title="Config"><span class="none"></span>Config</a></li>
+    <li><a href="../../metron-interface/metron-rest/index.html" 
title="Rest"><span class="none"></span>Rest</a></li>
+    <li><a href="../../metron-platform/index.html" title="Platform"><span 
class="icon-chevron-down"></span>Platform</a>
+    <ul class="nav nav-list">
+    <li><a href="../../metron-platform/Performance-tuning-guide.html" 
title="Performance-tuning-guide"><span 
class="none"></span>Performance-tuning-guide</a></li>
+    <li><a href="../../metron-platform/metron-api/index.html" 
title="Api"><span class="none"></span>Api</a></li>
+    <li><a href="../../metron-platform/metron-common/index.html" 
title="Common"><span class="none"></span>Common</a></li>
+    <li class="active"><a href="#"><span 
class="none"></span>Data-management</a></li>
+    <li><a href="../../metron-platform/metron-elasticsearch/index.html" 
title="Elasticsearch"><span class="none"></span>Elasticsearch</a></li>
+    <li><a href="../../metron-platform/metron-enrichment/index.html" 
title="Enrichment"><span class="icon-chevron-right"></span>Enrichment</a></li>
+    <li><a href="../../metron-platform/metron-indexing/index.html" 
title="Indexing"><span class="none"></span>Indexing</a></li>
+    <li><a href="../../metron-platform/metron-management/index.html" 
title="Management"><span class="none"></span>Management</a></li>
+    <li><a href="../../metron-platform/metron-parsers/index.html" 
title="Parsers"><span class="icon-chevron-right"></span>Parsers</a></li>
+    <li><a href="../../metron-platform/metron-pcap-backend/index.html" 
title="Pcap-backend"><span class="none"></span>Pcap-backend</a></li>
+    <li><a href="../../metron-platform/metron-writer/index.html" 
title="Writer"><span class="none"></span>Writer</a></li>
+    </ul>
+</li>
+    <li><a href="../../metron-sensors/index.html" title="Sensors"><span 
class="icon-chevron-right"></span>Sensors</a></li>
+    <li><a href="../../metron-stellar/stellar-3rd-party-example/index.html" 
title="Stellar-3rd-party-example"><span 
class="none"></span>Stellar-3rd-party-example</a></li>
+    <li><a href="../../metron-stellar/stellar-common/index.html" 
title="Stellar-common"><span 
class="icon-chevron-right"></span>Stellar-common</a></li>
+    <li><a href="../../metron-stellar/stellar-zeppelin/index.html" 
title="Stellar-zeppelin"><span class="none"></span>Stellar-zeppelin</a></li>
+    <li><a href="../../use-cases/index.html" title="Use-cases"><span 
class="icon-chevron-right"></span>Use-cases</a></li>
+    </ul>
+</li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href="http://maven.apache.org/"; title="Built by Maven" 
class="poweredBy"><img class="builtBy" alt="Built by Maven" 
src="../../images/logos/maven-feather.png" /></a>
+            </div>
           </div>
         </div>
-        
-                
-        <div id="bodyColumn"  class="span9" >
-                                  
-            <h1>Resource Data Management</h1>
+        <div id="bodyColumn"  class="span10" >
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<h1>Resource Data Management</h1>
 <p><a name="Resource_Data_Management"></a></p>
-<p>This project is a collection of classes to assist with loading of various 
enrichment and threat intelligence sources into Metron.</p>
 <div class="section">
-<h2><a name="Simple_HBase_EnrichmentsThreat_Intelligence"></a>Simple HBase 
Enrichments/Threat Intelligence</h2>
-<p>The vast majority of enrichments and threat intelligence processing tend 
toward the following pattern:</p>
+<h2><a name="Table_of_Contents"></a>Table of Contents</h2>
+<ul>
 
+<li><a href="#Overview">Overview</a></li>
+<li><a href="#Simple_HBase_EnrichmentsThreat_Intelligence">Simple HBase 
Enrichments/Threat Intelligence</a></li>
+<li><a href="#Extractor_Framework">Extractor Framework</a></li>
+<li><a href="#Enrichment_Config">Enrichment Config</a></li>
+<li><a href="#Loading_Utilities">Loading Utilities</a></li>
+<li><a href="#Pruning_Data_from_Elasticsearch">Pruning Data from 
Elasticsearch</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Overview"></a>Overview</h2>
+<p>This project is a collection of classes to assist with loading of various 
enrichment and threat intelligence sources into Metron.</p></div>
+<div class="section">
+<h2><a name="Simple_HBase_Enrichments.2FThreat_Intelligence"></a>Simple HBase 
Enrichments/Threat Intelligence</h2>
+<p>The vast majority of enrichments and threat intelligence processing tend 
toward the following pattern:</p>
 <ul>
-  
+
 <li>Take a field</li>
-  
 <li>Look up the field in a key/value store</li>
-  
 <li>If the key exists, then either it&#x2019;s a threat to be alerted or it 
should be enriched with the value associated with the key.</li>
 </ul>
-<p>As such, we have created this capability as a default threat intel and 
enrichment adapter. The basic primitive for simple enrichments and threat 
intelligence sources is a complex key containing the following:</p>
-
+<p>As such, we have created this capability as a default threat intel and 
enrichment adapter.  The basic primitive for simple enrichments and threat 
intelligence sources is a complex key containing the following:</p>
 <ul>
-  
+
 <li>Type : The type of threat intel or enrichment (e.g. malicious_ip)</li>
-  
 <li>Indicator : The indicator in question</li>
-  
-<li>Value : The value to associate with the type, indicator pair. This is a 
JSON map.</li>
+<li>Value : The value to associate with the type, indicator pair.  This is a 
JSON map.</li>
 </ul>
 <p>At present, all of the dataloads utilities function by converting raw data 
sources to this primitive key (type, indicator) and value to be placed in 
HBase.</p>
 <p>In the case of threat intel, a hit on the threat intel table will result 
in:</p>
-
 <ul>
-  
+
 <li>The <tt>is_alert</tt> field being set to <tt>true</tt> in the index</li>
-  
 <li>A field named 
<tt>threatintels.hbaseThreatIntel.$field.$threatintel_type</tt> is set to 
<tt>alert</tt>
-  
 <ul>
-    
+
 <li><tt>$field</tt> is the field in the original document that was a match 
(e.g. <tt>src_ip_addr</tt>)</li>
-    
 <li><tt>$threatintel_type</tt> is the type of threat intel imported (defined 
in the Extractor configuration below).</li>
-  </ul></li>
 </ul>
-<p>In the case of simple hbase enrichment, a hit on the enrichments table will 
result in the following new field for each key in the 
value:<tt>enrichments.hbaseEnrichment.$field.$enrichment_type.$key</tt> </p>
-
+</li>
+</ul>
+<p>In the case of simple hbase enrichment, a hit on the enrichments table will 
result in the following new field for each key in the 
value:<tt>enrichments.hbaseEnrichment.$field.$enrichment_type.$key</tt></p>
 <ul>
-  
-<li><tt>$field</tt> is the field in the original document that was a match 
(e.g. <tt>src_ip_addr</tt>)</li>
-  
+
+<li><tt>$field</tt> is the field in the original document that was a match 
(e.g.  <tt>src_ip_addr</tt>)</li>
 <li><tt>$enrichment_type</tt> is the type of enrichment imported (defined in 
the Extractor configuration below).</li>
-  
 <li><tt>$key</tt> is a key in the JSON map associated with the row in 
HBase.</li>
 </ul>
 <p>For instance, in the situation where we had the following very silly 
key/value in HBase in the enrichment table:</p>
-
 <ul>
-  
+
 <li>indicator: <tt>127.0.0.1</tt></li>
-  
 <li>type : <tt>important_addresses</tt></li>
-  
 <li>value: <tt>{ &quot;name&quot; : &quot;localhost&quot;, 
&quot;location&quot; : &quot;home&quot; }</tt></li>
 </ul>
 <p>If we had a document whose <tt>ip_src_addr</tt> came through with a value 
of <tt>127.0.0.1</tt>, we would have the following fields added to the indexed 
document:</p>
-
 <ul>
-  
+
 <li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.name</tt> 
: <tt>localhost</tt></li>
-  
 
<li><tt>enrichments.hbaseEnrichment.ip_src_addr.important_addresses.location</tt>
 : <tt>home</tt></li>
 </ul></div>
 <div class="section">
 <h2><a name="Extractor_Framework"></a>Extractor Framework</h2>
-<p>For the purpose of ingesting data of a variety of formats, we have created 
an Extractor framework which allows for common data formats to be interpreted 
as enrichment or threat intelligence sources. The formats supported at present 
are:</p>
-
+<p>For the purpose of ingesting data of a variety of formats, we have created 
an Extractor framework which allows for common data formats to be interpreted 
as enrichment or threat intelligence sources.  The formats supported at present 
are:</p>
 <ul>
-  
+
 <li>CSV (both threat intel and enrichment)</li>
-  
 <li>STIX (threat intel only)</li>
-  
 <li>Custom (pass your own class)</li>
 </ul>
-<p>All of the current utilities take a JSON file to configure how to interpret 
input data. This JSON describes the type of data and the schema if necessary 
for the data if it is not fixed (as in STIX, e.g.).</p>
+<p>All of the current utilities take a JSON file to configure how to interpret 
input data.  This JSON describes the type of data and the schema if necessary 
for the data if it is not fixed (as in STIX, e.g.).</p>
 <div class="section">
 <h3><a name="CSV_Extractor"></a>CSV Extractor</h3>
 <p>Consider the following example configuration file which describes how to 
process a CSV file.</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
   &quot;config&quot; : {
     &quot;columns&quot; : {
          &quot;ip&quot; : 0
@@ -366,129 +197,86 @@
   ,&quot;extractor&quot; : &quot;CSV&quot;
 }
 </pre></div></div>
-<p>In this example, we have instructed the extractor of the schema (i.e. the 
columns field), two columns at the first and third position. We have indicated 
that the <tt>ip</tt> column is the indicator type and that the enrichment type 
is named <tt>malicious_ip</tt>. We have also indicated that the extractor to 
use is the CSV Extractor. The other option is the STIX extractor or a fully 
qualified classname for your own extractor.</p>
-<p>The meta column values will show up in the value in HBase because it is 
called out as a non-indicator column. The key for the value will be 
&#x2018;meta&#x2019;. For instance, given an input string of 
<tt>123.45.123.12,something,the grapevine</tt>, the following key, value would 
be extracted:</p>
 
+<p>In this example, we have instructed the extractor of the schema (i.e. the 
columns field), two columns at the first and third position.  We have indicated 
that the <tt>ip</tt> column is the indicator type and that the enrichment type 
is named <tt>malicious_ip</tt>.  We have also indicated that the extractor to 
use is the CSV Extractor. The other option is the STIX extractor or a fully 
qualified classname for your own extractor.</p>
+<p>The meta column values will show up in the value in HBase because it is 
called out as a non-indicator column.  The key for the value will be 
&#x2018;meta&#x2019;.  For instance, given an input string of 
<tt>123.45.123.12,something,the grapevine</tt>, the following key, value would 
be extracted:</p>
 <ul>
-  
+
 <li>Indicator : <tt>123.45.123.12</tt></li>
-  
 <li>Type : <tt>malicious_ip</tt></li>
-  
 <li>Value : <tt>{ &quot;ip&quot; : &quot;123.45.123.12&quot;, 
&quot;source&quot; : &quot;the grapevine&quot; }</tt></li>
 </ul></div>
 <div class="section">
 <h3><a name="STIX_Extractor"></a>STIX Extractor</h3>
-<p>Consider the following config for importing STIX documents. This is a 
threat intelligence interchange format, so it is particularly relevant and 
attractive data to import for our purposes. Because STIX is a standard format, 
there is no need to specify the schema or how to interpret the documents.</p>
+<p>Consider the following config for importing STIX documents.  This is a 
threat intelligence interchange format, so it is particularly relevant and 
attractive data to import for our purposes.  Because STIX is a standard format, 
there is no need to specify the schema or how to interpret the documents.</p>
 <p>We support the versions of Stix and Cybox supported by <a 
class="externalLink" 
href="https://github.com/STIXProject/java-stix/tree/v1.2.0.2";>java-stix</a>:</p>
-
 <ul>
-  
+
 <li>Stix - <a class="externalLink" 
href="https://github.com/STIXProject/schemas/blob/356cc4f6b06625465f0808388eb166807313b4e0/stix_core.xsd";>1.2</a>
 and earlier</li>
-  
 <li>Cybox - <a class="externalLink" 
href="https://github.com/CybOXProject/schemas/blob/97beb32c376a9223e91b52cb3e4c8d2af6baf786/cybox_core.xsd";>2.1</a>
 and earlier</li>
 </ul>
 <p>We support a subset of STIX messages for importation:</p>
-
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>STIX Type </th>
-      
-<th>Specific Type </th>
-      
-<th>Enrichment Type Name </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> STIX Type </th>
+<th> Specific Type </th>
+<th> Enrichment Type Name </th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>Address </td>
-      
-<td>IPV_4_ADDR </td>
-      
-<td>address:IPV_4_ADDR </td>
-    </tr>
-    
+<td> Address   </td>
+<td> IPV_4_ADDR    </td>
+<td> address:IPV_4_ADDR   </td></tr>
 <tr class="a">
-      
-<td>Address </td>
-      
-<td>IPV_6_ADDR </td>
-      
-<td>address:IPV_6_ADDR </td>
-    </tr>
-    
+<td> Address   </td>
+<td> IPV_6_ADDR    </td>
+<td> address:IPV_6_ADDR   </td></tr>
 <tr class="b">
-      
-<td>Address </td>
-      
-<td>E_MAIL </td>
-      
-<td>address:E_MAIL </td>
-    </tr>
-    
+<td> Address   </td>
+<td> E_MAIL        </td>
+<td> address:E_MAIL       </td></tr>
 <tr class="a">
-      
-<td>Address </td>
-      
-<td>MAC </td>
-      
-<td>address:MAC </td>
-    </tr>
-    
+<td> Address   </td>
+<td> MAC           </td>
+<td> address:MAC          </td></tr>
 <tr class="b">
-      
-<td>Domain </td>
-      
-<td>FQDN </td>
-      
-<td>domain:FQDN </td>
-    </tr>
-    
+<td> Domain    </td>
+<td> FQDN          </td>
+<td> domain:FQDN          </td></tr>
 <tr class="a">
-      
-<td>Hostname </td>
-      
-<td> </td>
-      
-<td>hostname </td>
-    </tr>
-    
+<td> Hostname  </td>
+<td>               </td>
+<td> hostname             </td></tr>
 <tr class="b">
-      
-<td>URI </td>
-      
-<td> </td>
-      
-<td>uriobjecttype </td>
-    </tr>
-  </tbody>
+<td> URI       </td>
+<td>               </td>
+<td> uriobjecttype        </td></tr>
+</tbody>
 </table>
 <p>NOTE: The enrichment type will be used as the type above.</p>
 <p>Consider the following configuration for an Extractor</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
   &quot;config&quot; : {
     &quot;stix_address_categories&quot; : &quot;IPV_4_ADDR&quot;
   }
   ,&quot;extractor&quot; : &quot;STIX&quot;
 }
 </pre></div></div>
-<p>In here, we&#x2019;re configuring the STIX extractor to load from a series 
of STIX files, however we only want to bring in IPv4 addresses from the set of 
all possible addresses. Note that if no categories are specified for import, 
all are assumed. Also, only address and domain types allow filtering via 
<tt>stix_address_categories</tt> and <tt>stix_domain_categories</tt> config 
parameters.</p></div>
+
+<p>In here, we&#x2019;re configuring the STIX extractor to load from a series 
of STIX files, however we only want to bring in IPv4 addresses from the set of 
all possible addresses.  Note that if no categories are specified for import, 
all are assumed. Also, only address and domain types allow filtering via 
<tt>stix_address_categories</tt> and <tt>stix_domain_categories</tt> config 
parameters.</p></div>
 <div class="section">
 <h3><a name="Common_Extractor_Properties"></a>Common Extractor Properties</h3>
 <p>Users also have the ability to transform and filter enrichment and threat 
intel data using Stellar as it is loaded into HBase. This feature is available 
to all extractor types.</p>
 <p>As an example, we will be providing a CSV list of top domains as an 
enrichment and filtering the value metadata, as well as the indicator column, 
with Stellar expressions.</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
   &quot;config&quot; : {
     &quot;zk_quorum&quot; : &quot;node1:2181&quot;,
     &quot;columns&quot; : {
@@ -510,154 +298,117 @@
   &quot;extractor&quot; : &quot;CSV&quot;
 }
 </pre></div></div>
-<p>There are 2 property maps that work with full Stellar expressions, and 2 
properties that will work with Stellar predicates.</p>
 
+<p>There are 2 property maps that work with full Stellar expressions, and 2 
properties that will work with Stellar predicates.</p>
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
+<tr class="a">
+<th> Property             </th>
+<th> Description</th></tr>
+</thead><tbody>
+
+<tr class="b">
+<td> <tt>value_transform</tt>    </td>
+<td> Transform fields defined in the &#x201c;columns&#x201d; mapping with 
Stellar transformations. New keys introduced in the transform will be added to 
the key metadata.</td></tr>
 <tr class="a">
-      
-<th>Property </th>
-      
-<th>Description</th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<td> <tt>value_filter</tt>       </td>
+<td> Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose domain property is 
empty after removing the TLD will be omitted.</td></tr>
 <tr class="b">
-      
-<td>value_transform </td>
-      
-<td>Transform fields defined in the &#x201c;columns&#x201d; mapping with 
Stellar transformations. New keys introduced in the transform will be added to 
the key metadata.</td>
-    </tr>
-    
+<td> <tt>indicator_transform</tt></td>
+<td> Transform the indicator column independent of the value transformations. 
You can refer to the original indicator value by using 
&#x201c;indicator&#x201d; as the variable name, as shown in the example above. 
In addition, if you prefer to piggyback your transformations, you can refer to 
the variable &#x201c;domain&#x201d;, which will allow your indicator transforms 
to inherit transformations done to this value during the value 
transformations.</td></tr>
 <tr class="a">
-      
-<td>value_filter </td>
-      
-<td>Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose domain property is 
empty after removing the TLD will be omitted.</td>
-    </tr>
-    
+<td> <tt>indicator_filter</tt>   </td>
+<td> Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose indicator value is 
empty after removing the TLD will be omitted.</td></tr>
 <tr class="b">
-      
-<td>indicator_transform </td>
-      
-<td>Transform the indicator column independent of the value transformations. 
You can refer to the original indicator value by using 
&#x201c;indicator&#x201d; as the variable name, as shown in the example above. 
In addition, if you prefer to piggyback your transformations, you can refer to 
the variable &#x201c;domain&#x201d;, which will allow your indicator transforms 
to inherit transformations done to this value during the value 
transformations.</td>
-    </tr>
-    
+<td> <tt>state_init</tt>         </td>
+<td> Allows a state object to be initialized.  This is a string, so a single 
expression is created.  The output of this expression will be available as the 
<tt>state</tt> variable.  This is to be used with the 
<tt>flatfile_summarizer.sh</tt> rather than the loader.</td></tr>
 <tr class="a">
-      
-<td>indicator_filter </td>
-      
-<td>Allows additional filtering with Stellar predicates based on results from 
the value transformations. In this example, records whose indicator value is 
empty after removing the TLD will be omitted.</td>
-    </tr>
-  </tbody>
+<td> <tt>state_update</tt>       </td>
+<td> Allows a state object to be updated.  This is a map, so you can have 
temporary variables here.  Note that you can reference the <tt>state</tt> 
variable from this.  This is to be used with the 
<tt>flatfile_summarizer.sh</tt> rather than the loader.</td></tr>
+<tr class="b">
+<td> <tt>state_merge</tt>        </td>
+<td> Allows a list of states to be merged. This is a string, so a single 
expression.  There is a special field called <tt>states</tt> available, which 
is a list of the states (one per thread).  This is to be used with the 
<tt>flatfile_summarizer.sh</tt> rather than the loader.</td></tr>
+</tbody>
 </table>
 <p>top-list.csv</p>
 
-<div class="source">
-<div class="source">
-<pre>1,google.com
+<div>
+<div>
+<pre class="source">1,google.com
 2,youtube.com
 ...
 </pre></div></div>
-<p>Running a file import with the above data and extractor configuration would 
result in the following 2 extracted data records:</p>
 
+<p>Running a file import with the above data and extractor configuration would 
result in the following 2 extracted data records:</p>
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>Indicator </th>
-      
-<th>Type </th>
-      
-<th>Value </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> Indicator </th>
+<th> Type        </th>
+<th> Value                                 </th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>google </td>
-      
-<td>top_domains </td>
-      
-<td>{ &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d; } </td>
-    </tr>
-    
+<td> google    </td>
+<td> top_domains </td>
+<td> { &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d; } </td></tr>
 <tr class="a">
-      
-<td>yahoo </td>
-      
-<td>top_domains </td>
-      
-<td>{ &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d; } </td>
-    </tr>
-  </tbody>
+<td> yahoo     </td>
+<td> top_domains </td>
+<td> { &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d; }  </td></tr>
+</tbody>
 </table>
 <p>Similar to the parser framework, providing a Zookeeper quorum via the 
zk_quorum property will enable Stellar to access properties that reside in the 
global config. Expanding on our example above, if the global config looks as 
follows:</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
     &quot;global_property&quot; : &quot;metron-ftw&quot;
 }
 </pre></div></div>
+
 <p>And we expand our value_tranform:</p>
 
-<div class="source">
-<div class="source">
-<pre>...
+<div>
+<div>
+<pre class="source">...
     &quot;value_transform&quot; : {
        &quot;domain&quot; : &quot;DOMAIN_REMOVE_TLD(domain)&quot;,
        &quot;a-new-prop&quot; : &quot;global_property&quot;
     },
 ...
 </pre></div></div>
-<p>The resulting value data would look like the following:</p>
 
+<p>The resulting value data would look like the following:</p>
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>Indicator </th>
-      
-<th>Type </th>
-      
-<th>Value </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> Indicator </th>
+<th> Type        </th>
+<th> Value                                                              
</th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>google </td>
-      
-<td>top_domains </td>
-      
-<td>{ &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
} </td>
-    </tr>
-    
+<td> google    </td>
+<td> top_domains </td>
+<td> { &#x201c;rank&#x201d; : &#x201c;1&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;google&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
} </td></tr>
 <tr class="a">
-      
-<td>yahoo </td>
-      
-<td>top_domains </td>
-      
-<td>{ &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
} </td>
-    </tr>
-  </tbody>
+<td> yahoo     </td>
+<td> top_domains </td>
+<td> { &#x201c;rank&#x201d; : &#x201c;2&#x201d;, &#x201c;domain&#x201d; : 
&#x201c;yahoo&#x201d;, &#x201c;a-new-prop&#x201d; : &#x201c;metron-ftw&#x201d; 
}  </td></tr>
+</tbody>
 </table></div></div>
 <div class="section">
 <h2><a name="Enrichment_Config"></a>Enrichment Config</h2>
-<p>In order to automatically add new enrichment and threat intel types to 
existing, running enrichment topologies, you will need to add new fields and 
new types to the zookeeper configuration. A convenience parameter has been made 
to assist in this when doing an import. Namely, you can specify the enrichment 
configs and how they associate with the fields of the documents flowing through 
the enrichment topology.</p>
-<p>Consider the following Enrichment Configuration JSON. This one is for a 
threat intelligence type:</p>
+<p>In order to automatically add new enrichment and threat intel types to 
existing, running enrichment topologies, you will need to add new fields and 
new types to the zookeeper configuration.  A convenience parameter has been 
made to assist in this when doing an import.  Namely, you can specify the 
enrichment configs and how they associate with the fields of the documents 
flowing through the enrichment topology.</p>
+<p>Consider the following Enrichment Configuration JSON.  This one is for a 
threat intelligence type:</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
   &quot;zkQuorum&quot; : &quot;localhost:2181&quot;
  ,&quot;sensorToFieldList&quot; : {
     &quot;bro&quot; : {
@@ -670,35 +421,32 @@
                         }
 }
 </pre></div></div>
-<p>We have to specify the following:</p>
 
+<p>We have to specify the following:</p>
 <ul>
-  
+
 <li>The zookeeper quorum which holds the cluster configuration</li>
-  
 <li>The mapping between the fields in the enriched documents and the 
enrichment types.</li>
 </ul>
 <p>This configuration allows the ingestion tools to update zookeeper 
post-ingestion so that the enrichment topology can take advantage immediately 
of the new type.</p></div>
 <div class="section">
 <h2><a name="Loading_Utilities"></a>Loading Utilities</h2>
 <p>The two configurations above are used in the three separate ingestion 
tools:</p>
-
 <ul>
-  
+
 <li>Taxii Loader</li>
-  
 <li>Bulk load from HDFS via MapReduce</li>
-  
 <li>Flat File ingestion</li>
 </ul>
 <div class="section">
 <h3><a name="Taxii_Loader"></a>Taxii Loader</h3>
-<p>The shell script <tt>$METRON_HOME/bin/threatintel_taxii_load.sh</tt> can be 
used to poll a Taxii server for STIX documents and ingest them into HBase.<br 
/>It is quite common for this Taxii server to be an aggregation server such as 
Soltra Edge.</p>
-<p>In addition to the Enrichment and Extractor configs described above, this 
loader requires a configuration file describing the connection information to 
the Taxii server. An illustrative example of such a configuration file is:</p>
+<p>The shell script <tt>$METRON_HOME/bin/threatintel_taxii_load.sh</tt> can be 
used to poll a Taxii server for STIX documents and ingest them into HBase.<br />
+It is quite common for this Taxii server to be an aggregation server such as 
Soltra Edge.</p>
+<p>In addition to the Enrichment and Extractor configs described above, this 
loader requires a configuration file describing the connection information to 
the Taxii server.  An illustrative example of such a configuration file is:</p>
 
-<div class="source">
-<div class="source">
-<pre>{
+<div>
+<div>
+<pre class="source">{
    &quot;endpoint&quot; : 
&quot;http://localhost:8282/taxii-discovery-service&quot;
   ,&quot;type&quot; : &quot;DISCOVER&quot;
   ,&quot;collection&quot; : &quot;guest.Abuse_ch&quot;
@@ -707,363 +455,379 @@
   ,&quot;allowedIndicatorTypes&quot; : [ &quot;domainname:FQDN&quot;, 
&quot;address:IPV_4_ADDR&quot; ]
 }
 </pre></div></div>
-<p>As you can see, we are specifying the following information:</p>
 
+<p>As you can see, we are specifying the following information:</p>
 <ul>
-  
+
 <li>endpoint : The URL of the endpoint</li>
-  
 <li>type : <tt>POLL</tt> or <tt>DISCOVER</tt> depending on the endpoint.</li>
-  
 <li>collection : The Taxii collection to ingest</li>
-  
 <li>table : The HBase table to import into</li>
-  
 <li>columnFamily : The column family to import into</li>
-  
 <li>allowedIndicatorTypes : an array of acceptable threat intel types (see the 
&#x201c;Enrichment Type Name&#x201d; column of the Stix table above for the 
possibilities).</li>
 </ul>
 <p>The parameters for the utility are as follows:</p>
-
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>Short Code </th>
-      
-<th>Long Code </th>
-      
-<th>Is Required? </th>
-      
-<th>Description </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> Short Code </th>
+<th> Long Code                 </th>
+<th> Is Required? </th>
+<th> Description                                                               
                                                                         
</th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>-h </td>
-      
-<td> </td>
-      
-<td>No </td>
-      
-<td>Generate the help screen/set of options </td>
-    </tr>
-    
+<td> -h         </td>
+<td>                           </td>
+<td> No           </td>
+<td> Generate the help screen/set of options                                   
                                                                         
</td></tr>
 <tr class="a">
-      
-<td>-e </td>
-      
-<td>&#x2013;extractor_config </td>
-      
-<td>Yes </td>
-      
-<td>JSON Document describing the extractor for this input data source </td>
-    </tr>
-    
+<td> -e         </td>
+<td> &#x2013;extractor_config        </td>
+<td> Yes          </td>
+<td> JSON Document describing the extractor for this input data source         
                                                                         
</td></tr>
 <tr class="b">
-      
-<td>-c </td>
-      
-<td>&#x2013;taxii_connection_config </td>
-      
-<td>Yes </td>
-      
-<td>The JSON config file to configure the connection </td>
-    </tr>
-    
+<td> -c         </td>
+<td> &#x2013;taxii_connection_config </td>
+<td> Yes          </td>
+<td> The JSON config file to configure the connection                          
                                                                         
</td></tr>
 <tr class="a">
-      
-<td>-p </td>
-      
-<td>&#x2013;time_between_polls </td>
-      
-<td>No </td>
-      
-<td>The time between polling the Taxii server in milliseconds. (default: 1 
hour) </td>
-    </tr>
-    
+<td> -p         </td>
+<td> &#x2013;time_between_polls      </td>
+<td> No           </td>
+<td> The time between polling the Taxii server in milliseconds. (default: 1 
hour)                                                                       
</td></tr>
 <tr class="b">
-      
-<td>-b </td>
-      
-<td>&#x2013;begin_time </td>
-      
-<td>No </td>
-      
-<td>Start time to poll the Taxii server (all data from that point will be 
gathered in the first pull). The format for the date is yyyy-MM-dd HH:mm:ss 
</td>
-    </tr>
-    
+<td> -b         </td>
+<td> &#x2013;begin_time              </td>
+<td> No           </td>
+<td> Start time to poll the Taxii server (all data from that point will be 
gathered in the first pull).  The format for the date is yyyy-MM-dd HH:mm:ss 
</td></tr>
 <tr class="a">
-      
-<td>-l </td>
-      
-<td>&#x2013;log4j </td>
-      
-<td>No </td>
-      
-<td>The Log4j Properties to load </td>
-    </tr>
-    
+<td> -l         </td>
+<td> &#x2013;log4j                   </td>
+<td> No           </td>
+<td> The Log4j Properties to load                                              
                                                                         
</td></tr>
 <tr class="b">
-      
-<td>-n </td>
-      
-<td>&#x2013;enrichment_config </td>
-      
-<td>No </td>
-      
-<td>The JSON document describing the enrichments to configure. Unlike other 
loaders, this is run first if specified. </td>
-    </tr>
-  </tbody>
+<td> -n         </td>
+<td> &#x2013;enrichment_config       </td>
+<td> No           </td>
+<td> The JSON document describing the enrichments to configure.  Unlike other 
loaders, this is run first if specified.                                  
</td></tr>
+</tbody>
 </table></div>
 <div class="section">
 <h3><a name="Flatfile_Loader"></a>Flatfile Loader</h3>
-<p>The shell script <tt>$METRON_HOME/bin/flatfile_loader.sh</tt> will read 
data from local disk, HDFS or URLs and load the enrichment or threat intel data 
into an HBase table.<br />Note: This utility works for enrichment as well as 
threat intel due to the underlying infrastructure being the same.</p>
+<p>The shell script <tt>$METRON_HOME/bin/flatfile_loader.sh</tt> will read 
data from local disk, HDFS or URLs and load the enrichment or threat intel data 
into an HBase table.<br />
+Note: This utility works for enrichment as well as threat intel due to the 
underlying infrastructure being the same.</p>
 <p>One special thing to note here is that there is a special configuration 
parameter to the Extractor config that is only considered during this 
loader:</p>
-
 <ul>
-  
-<li>inputFormat : This specifies how to consider the data. The two 
implementations are <tt>BY_LINE</tt> and <tt>WHOLE_FILE</tt>.</li>
+
+<li>inputFormat : This specifies how to consider the data.  The two 
implementations are <tt>BY_LINE</tt> and <tt>WHOLE_FILE</tt>.</li>
 </ul>
 <p>The default is <tt>BY_LINE</tt>, which makes sense for a list of CSVs where 
each line indicates a unit of information which can be imported. However, if 
you are importing a set of STIX documents, then you want each document to be 
considered as input to the Extractor.</p>
 <p>The parameters for the utility are as follows:</p>
-
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>Short Code </th>
-      
-<th>Long Code </th>
-      
-<th>Is Required? </th>
-      
-<th>Description </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> Short Code </th>
+<th> Long Code           </th>
+<th> Is Required? </th>
+<th> Description                                                               
                                                                                
                          </th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>-h </td>
-      
-<td> </td>
-      
-<td>No </td>
-      
-<td>Generate the help screen/set of options </td>
-    </tr>
-    
+<td> -h         </td>
+<td>                     </td>
+<td> No           </td>
+<td> Generate the help screen/set of options                                   
                                                                                
                          </td></tr>
 <tr class="a">
-      
-<td>-q </td>
-      
-<td>&#x2013;quiet </td>
-      
-<td>No </td>
-      
-<td>Do not update progress </td>
-    </tr>
-    
+<td> -q         </td>
+<td> &#x2013;quiet             </td>
+<td> No           </td>
+<td> Do not update progress                                                    
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-e </td>
-      
-<td>&#x2013;extractor_config </td>
-      
-<td>Yes </td>
-      
-<td>JSON Document describing the extractor for this input data source </td>
-    </tr>
-    
+<td> -e         </td>
+<td> &#x2013;extractor_config  </td>
+<td> Yes          </td>
+<td> JSON Document describing the extractor for this input data source         
                                                                                
                          </td></tr>
 <tr class="a">
-      
-<td>-m </td>
-      
-<td>&#x2013;import_mode </td>
-      
-<td>No </td>
-      
-<td>The Import mode to use: LOCAL, MR. Default: LOCAL </td>
-    </tr>
-    
+<td> -m         </td>
+<td> &#x2013;import_mode       </td>
+<td> No           </td>
+<td> The Import mode to use: LOCAL, MR.  Default: LOCAL                        
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-t </td>
-      
-<td>&#x2013;hbase_table </td>
-      
-<td>Yes </td>
-      
-<td>The HBase table to import into </td>
-    </tr>
-    
+<td> -t         </td>
+<td> &#x2013;hbase_table       </td>
+<td> Yes          </td>
+<td> The HBase table to import into                                            
                                                                                
                          </td></tr>
 <tr class="a">
-      
-<td>-c </td>
-      
-<td>&#x2013;hbase_cf </td>
-      
-<td>Yes </td>
-      
-<td>The HBase table column family to import into </td>
-    </tr>
-    
+<td> -c         </td>
+<td> &#x2013;hbase_cf          </td>
+<td> Yes          </td>
+<td> The HBase table column family to import into                              
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-i </td>
-      
-<td>&#x2013;input </td>
-      
-<td>Yes </td>
-      
-<td>The input data location on local disk. If this is a file, then that file 
will be loaded. If this is a directory, then the files will be loaded 
recursively under that directory. </td>
-    </tr>
-    
+<td> -i         </td>
+<td> &#x2013;input             </td>
+<td> Yes          </td>
+<td> The input data location on local disk.  If this is a file, then that file 
will be loaded.  If this is a directory, then the files will be loaded 
recursively under that directory.  </td></tr>
 <tr class="a">
-      
-<td>-l </td>
-      
-<td>&#x2013;log4j </td>
-      
-<td>No </td>
-      
-<td>The log4j properties file to load </td>
-    </tr>
-    
+<td> -l         </td>
+<td> &#x2013;log4j             </td>
+<td> No           </td>
+<td> The log4j properties file to load                                         
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-n </td>
-      
-<td>&#x2013;enrichment_config </td>
-      
-<td>No </td>
-      
-<td>The JSON document describing the enrichments to configure. Unlike other 
loaders, this is run first if specified. </td>
-    </tr>
-    
+<td> -n         </td>
+<td> &#x2013;enrichment_config </td>
+<td> No           </td>
+<td> The JSON document describing the enrichments to configure.  Unlike other 
loaders, this is run first if specified.                                        
                           </td></tr>
 <tr class="a">
-      
-<td>-p </td>
-      
-<td>&#x2013;threads </td>
-      
-<td>No </td>
-      
-<td>The number of threads to use when extracting data. The default is the 
number of cores. </td>
-    </tr>
-    
+<td> -p         </td>
+<td> &#x2013;threads           </td>
+<td> No           </td>
+<td> The number of threads to use when extracting data.  The default is the 
number of cores.                                                                
                             </td></tr>
 <tr class="b">
-      
-<td>-b </td>
-      
-<td>&#x2013;batchSize </td>
-      
-<td>No </td>
-      
-<td>The batch size to use for HBase puts </td>
-    </tr>
-  </tbody>
+<td> -b         </td>
+<td> &#x2013;batchSize         </td>
+<td> No           </td>
+<td> The batch size to use for HBase puts                                      
                                                                                
                          </td></tr>
+</tbody>
 </table></div>
 <div class="section">
 <h3><a name="GeoLite2_Loader"></a>GeoLite2 Loader</h3>
 <p>The shell script <tt>$METRON_HOME/bin/geo_enrichment_load.sh</tt> will 
retrieve MaxMind GeoLite2 data and load data into HDFS, and update the 
configuration.</p>
-<p>THIS SCRIPT WILL NOT UPDATE AMBARI&#x2019;S GLOBAL.JSON, JUST THE ZK 
CONFIGS. CHANGES WILL GO INTO EFFECT, BUT WILL NOT PERSIST PAST AN AMBARI 
RESTART UNTIL UPDATED THERE.</p>
+<p>THIS SCRIPT WILL NOT UPDATE AMBARI&#x2019;S GLOBAL.JSON, JUST THE ZK 
CONFIGS.  CHANGES WILL GO INTO EFFECT, BUT WILL NOT PERSIST PAST AN AMBARI 
RESTART UNTIL UPDATED THERE.</p>
 <p>The parameters for the utility are as follows:</p>
+<table border="0" class="table table-striped">
+<thead>
 
+<tr class="a">
+<th> Short Code </th>
+<th> Long Code           </th>
+<th> Is Required? </th>
+<th> Description                                                               
                       </th></tr>
+</thead><tbody>
+
+<tr class="b">
+<td> -h         </td>
+<td>                     </td>
+<td> No           </td>
+<td> Generate the help screen/set of options                                   
                       </td></tr>
+<tr class="a">
+<td> -g         </td>
+<td> &#x2013;geo_url           </td>
+<td> No           </td>
+<td> GeoIP URL - defaults to <a class="externalLink" 
href="http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz";>http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz</a>
 </td></tr>
+<tr class="b">
+<td> -r         </td>
+<td> &#x2013;remote_dir        </td>
+<td> No           </td>
+<td> HDFS directory to land formatted GeoIP file - defaults to 
/apps/metron/geo/&lt;epoch millis&gt;/     </td></tr>
+<tr class="a">
+<td> -t         </td>
+<td> &#x2013;tmp_dir           </td>
+<td> No           </td>
+<td> Directory for landing the temporary GeoIP data - defaults to /tmp         
                       </td></tr>
+<tr class="b">
+<td> -z         </td>
+<td> &#x2013;zk_quorum         </td>
+<td> Yes          </td>
+<td> Zookeeper Quorum URL (zk1:port,zk2:port,&#x2026;)                         
                            </td></tr>
+</tbody>
+</table></div>
+<div class="section">
+<h3><a name="Flatfile_Summarizer"></a>Flatfile Summarizer</h3>
+<p>The shell script <tt>$METRON_HOME/bin/flatfile_summarizer.sh</tt> will read 
data from local disk, HDFS or URLs and generate a summary object. The object 
will be serialized and written to disk, either HDFS or local disk depending on 
the output mode specified.</p>
+<p>It should be noted that this utility uses the same extractor config as the 
<tt>flatfile_loader.sh</tt>, but as the output target is not a key value store 
(but rather a summary object), it is not necessary to specify certain 
configs:</p>
+<ul>
+
+<li><tt>indicator</tt>, <tt>indicator_filter</tt> and 
<tt>indicator_transform</tt> are not required, but will be executed if present. 
As in the loader, there will be an indicator field available if you so specify 
it (by using <tt>indicator</tt> in the config).</li>
+<li><tt>type</tt> is neither required nor used</li>
+</ul>
+<p>Indeed, some new configs are expected:</p>
+<ul>
+
+<li><tt>state_init</tt> : Executed once to initialize the state object (the 
object written out).</li>
+<li><tt>state_update</tt>: Called once per message.  The fields available are 
the fields for the row as well as
+<ul>
+
+<li><tt>indicator</tt> - the indicator value if you&#x2019;ve specified it in 
the config</li>
+<li><tt>state</tt> - the current state.  Useful for adding to the state (e.g. 
<tt>BLOOM_ADD(state, val)</tt> where <tt>val</tt> is the name of a field).</li>
+</ul>
+</li>
+<li><tt>state_merge</tt> : If you are running this multi-threaded and your 
objects can be merged, this is the statement that will merge the state objects 
created per thread.  There is a special field available to this config:
+<ul>
+
+<li><tt>states</tt> - a list of the state objects</li>
+</ul>
+</li>
+</ul>
+<p>One special thing to note here is that there is a special configuration 
parameter to the Extractor config that is only considered during this 
loader:</p>
+<ul>
+
+<li>inputFormat : This specifies how to consider the data.  The two 
implementations are <tt>BY_LINE</tt> and <tt>WHOLE_FILE</tt>.</li>
+</ul>
+<p>The default is <tt>BY_LINE</tt>, which makes sense for a list of CSVs where 
each line indicates a unit of information which can be imported. However, if 
you are importing a set of STIX documents, then you want each document to be 
considered as input to the Extractor.</p>
+<div class="section">
+<h4><a name="Example"></a>Example</h4>
+<p>Consider the possibility that you want to generate a bloom filter with all 
of the domains in a CSV structured similarly to the Alexa top 1M domains, so 
the columns are:</p>
+<ul>
+
+<li>rank</li>
+<li>domain name</li>
+</ul>
+<p>You want to generate a bloom filter with just the domains, not considering 
the TLD. You would execute the following to:</p>
+<ul>
+
+<li>read data from <tt>./top-1m.csv</tt></li>
+<li>write data to <tt>./filter.ser</tt></li>
+<li>use 5 threads</li>
+</ul>
+
+<div>
+<div>
+<pre class="source">$METRON_HOME/bin/flatfile_summarizer.sh -i ./top-1m.csv -o 
./filter.ser -e ./extractor.json -p 5 -b 128
+</pre></div></div>
+
+<p>To configure this, <tt>extractor.json</tt> would look like:</p>
+
+<div>
+<div>
+<pre class="source">{
+  &quot;config&quot; : {
+    &quot;columns&quot; : {
+      &quot;rank&quot; : 0,
+      &quot;domain&quot; : 1
+    },
+    &quot;value_transform&quot; : {
+      &quot;domain&quot; : &quot;DOMAIN_REMOVE_TLD(domain)&quot;
+    },
+    &quot;value_filter&quot; : &quot;LENGTH(domain) &gt; 0&quot;,
+    &quot;state_init&quot; : &quot;BLOOM_INIT()&quot;,
+    &quot;state_update&quot; : {
+      &quot;state&quot; : &quot;BLOOM_ADD(state, domain)&quot;
+    },
+    &quot;state_merge&quot; : &quot;BLOOM_MERGE(states)&quot;,
+    &quot;separator&quot; : &quot;,&quot;
+  },
+  &quot;extractor&quot; : &quot;CSV&quot;
+}
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Parameters"></a>Parameters</h4>
+<p>The parameters for the utility are as follows:</p>
 <table border="0" class="table table-striped">
-  <thead>
-    
+<thead>
+
 <tr class="a">
-      
-<th>Short Code </th>
-      
-<th>Long Code </th>
-      
-<th>Is Required? </th>
-      
-<th>Description </th>
-    </tr>
-  </thead>
-  <tbody>
-    
+<th> Short Code </th>
+<th> Long Code           </th>
+<th> Is Required? </th>
+<th> Description                                                               
                                                                                
                          </th></tr>
+</thead><tbody>
+
 <tr class="b">
-      
-<td>-h </td>
-      
-<td> </td>
-      
-<td>No </td>
-      
-<td>Generate the help screen/set of options </td>
-    </tr>
-    
+<td> -h         </td>
+<td>                     </td>
+<td> No           </td>
+<td> Generate the help screen/set of options                                   
                                                                                
                          </td></tr>
 <tr class="a">
-      
-<td>-g </td>
-      
-<td>&#x2013;geo_url </td>
-      
-<td>No </td>
-      
-<td>GeoIP URL - defaults to <a class="externalLink" 
href="http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz";>http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz</a>
 </td>
-    </tr>
-    
+<td> -q         </td>
+<td> &#x2013;quiet             </td>
+<td> No           </td>
+<td> Do not update progress                                                    
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-r </td>
-      
-<td>&#x2013;remote_dir </td>
-      
-<td>No </td>
-      
-<td>HDFS directory to land formatted GeoIP file - defaults to 
/apps/metron/geo/&lt;epoch millis&gt;/ </td>
-    </tr>
-    
+<td> -e         </td>
+<td> &#x2013;extractor_config  </td>
+<td> Yes          </td>
+<td> JSON Document describing the extractor for this input data source         
                                                                                
                          </td></tr>
 <tr class="a">
-      
-<td>-t </td>
-      
-<td>&#x2013;tmp_dir </td>
-      
-<td>No </td>
-      
-<td>Directory for landing the temporary GeoIP data - defaults to /tmp </td>
-    </tr>
-    
+<td> -m         </td>
+<td> &#x2013;import_mode       </td>
+<td> No           </td>
+<td> The Import mode to use: LOCAL, MR.  Default: LOCAL                        
                                                                                
                          </td></tr>
 <tr class="b">
-      
-<td>-z </td>
-      
-<td>&#x2013;zk_quorum </td>
-      
-<td>Yes </td>
-      
-<td>Zookeeper Quorum URL (zk1:port,zk2:port,&#x2026;) </td>
-    </tr>
-  </tbody>
-</table></div></div>
-                  </div>
-            </div>
-          </div>
+<td> -om        </td>
+<td> &#x2013;output_mode       </td>
+<td> No           </td>
+<td> The Output mode to use: LOCAL, HDFS.  Default: LOCAL                      
                                                                                
                            </td></tr>
+<tr class="a">
+<td> -i         </td>
+<td> &#x2013;input             </td>
+<td> Yes          </td>
+<td> The input data location on local disk.  If this is a file, then that file 
will be loaded.  If this is a directory, then the files will be loaded 
recursively under that directory.  </td></tr>
+<tr class="b">
+<td> -o         </td>
+<td> &#x2013;output            </td>
+<td> Yes          </td>
+<td> The output data location.    </td></tr>
+<tr class="a">
+<td> -l         </td>
+<td> &#x2013;log4j             </td>
+<td> No           </td>
+<td> The log4j properties file to load                                         
                                                                                
                          </td></tr>
+<tr class="b">
+<td> -p         </td>
+<td> &#x2013;threads           </td>
+<td> No           </td>
+<td> The number of threads to use when extracting data.  The default is the 
number of cores.                                                                
                             </td></tr>
+<tr class="a">
+<td> -b         </td>
+<td> &#x2013;batchSize         </td>
+<td> No           </td>
+<td> The batch size to use for HBase puts                                      
                                                                                
                          </td></tr>
+</tbody>
+</table></div></div></div>
+<div class="section">
+<h2><a name="Pruning_Data_from_Elasticsearch"></a>Pruning Data from 
Elasticsearch</h2>
+<p><b>Note</b> - As of the Metron upgrade from Elasticsearch 2.3.3 to 5.6.2, 
the included Data Pruner is no longer supported. It is replaced in favor of the 
Curator utility provided by Elasticsearch. The current Curator version is 5.4 
as of this version of Metron and does not match exactly with ES and Kibana.</p>
+<p>Elasticsearch provides tooling to prune index data through <a 
class="externalLink" 
href="https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/index.html";>Curator</a>.</p>
+<p>Here is a sample invocation that you can configure through Cron to prune 
indexes based on timestamp in the index name.</p>
+
+<div>
+<div>
+<pre class="source">/opt/elasticsearch-curator/curator_cli --host localhost 
delete_indices --filter_list '
+    {
+      &quot;filtertype&quot;: &quot;age&quot;,
+      &quot;source&quot;: &quot;name&quot;,
+      &quot;timestring&quot;: &quot;%Y.%m.%d&quot;,
+      &quot;unit&quot;: &quot;days&quot;,
+      &quot;unit_count&quot;: 10,
+      &quot;direction&quot;: &quot;older&#x201d;
+    }'
+</pre></div></div>
 
-    <hr/>
+<p>From the ES documentation:</p>
+<blockquote>
+
+<p>Using name as the source tells Curator to look for a timestring within the 
index or snapshot name, and convert that into an epoch timestamp (epoch implies 
UTC).</p>
+</blockquote>
+<p>You can also provide multiple filters as an array of JSON objects to 
filter_list if you want finer-grained control over the indexes that will be 
pruned. There is an implicit logical AND when chaining multiple filters.</p>
+
+<div>
+<div>
+<pre class="source">--filter_list 
'[{&quot;filtertype&quot;:&quot;age&quot;,&quot;source&quot;:&quot;creation_date&quot;,&quot;direction&quot;:&quot;older&quot;,&quot;unit&quot;:&quot;days&quot;,&quot;unit_count&quot;:13},{&quot;filtertype&quot;:&quot;pattern&quot;,&quot;kind&quot;:&quot;prefix&quot;,&quot;value&quot;:&quot;logstash&quot;}]'
+</pre></div></div>
 
+<div class="section">
+<h3><a name="Reference"></a>Reference</h3>
+<ul>
+
+<li><a class="externalLink" 
href="https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/index.html";>https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/index.html</a></li>
+<li><a class="externalLink" 
href="https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/filtertype_age.html";>https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/filtertype_age.html</a></li>
+<li><a class="externalLink" 
href="https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/singleton-cli.html";>https://www.elastic.co/guide/en/elasticsearch/client/curator/5.4/singleton-cli.html</a></li>
+</ul></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
     <footer>
-            <div class="container-fluid">
-              <div class="row span12">Copyright &copy;                    2018
-                        <a href="https://www.apache.org";>The Apache Software 
Foundation</a>.
-            All Rights Reserved.      
-                    
+      <div class="container-fluid">
+        <div class="row-fluid">
+ÃÂ© 2015-2016 The Apache Software Foundation. Apache Metron, Metron, Apache, 
the Apache feather logo,
+            and the Apache Metron project logo are trademarks of The Apache 
Software Foundation.
+        </div>
       </div>
-
-                          
-        
-                </div>
     </footer>
   </body>
 </html>
[19/50] [abbrv] metron git commit: METRON-1607 update public web site to point at 0.5.0 new release (justinleet) closes apache/metron#1053

Reply via email to