Jekyll build from master:5679ae1 Wrote blog post about security and performance.
Closes apache/accumulo-website#8 Signed-off-by: Josh Elser <els...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/accumulo-website/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-website/commit/803c95d0 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-website/tree/803c95d0 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-website/diff/803c95d0 Branch: refs/heads/asf-site Commit: 803c95d0cd3065eead750ad2027bc49f57c11c87 Parents: f183655 Author: Josh Elser <els...@apache.org> Authored: Mon Mar 6 17:58:45 2017 -0500 Committer: Josh Elser <els...@apache.org> Committed: Mon Mar 6 17:58:45 2017 -0500 ---------------------------------------------------------------------- README.md | 1 - .../23/security-performance-implications.html | 449 +++++++++++++++++++ feed.xml | 385 ++++++++++++---- images/blog/201702_security/accumuloFiles.png | Bin 0 -> 313628 bytes images/blog/201702_security/avgFilesTab.png | Bin 0 -> 288346 bytes images/blog/201702_security/figure1.png | Bin 0 -> 78725 bytes images/blog/201702_security/tableRecs.png | Bin 0 -> 159556 bytes images/blog/201702_security/tableRecsInMem.png | Bin 0 -> 223097 bytes images/blog/201702_security/totalIngest.png | Bin 0 -> 245159 bytes index.html | 14 +- news/index.html | 7 + 11 files changed, 754 insertions(+), 102 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 9d4b0ef..13e7cc0 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,3 @@ This can be done easily by invoking the post-commit hook (either by hand, or aut Git to invoke the post-commit hook). `./_devtools/git-hooks/post-commit` - http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/blog/2017/02/23/security-performance-implications.html ---------------------------------------------------------------------- diff --git a/blog/2017/02/23/security-performance-implications.html b/blog/2017/02/23/security-performance-implications.html new file mode 100644 index 0000000..fd23824 --- /dev/null +++ b/blog/2017/02/23/security-performance-implications.html @@ -0,0 +1,449 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous"> +<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet"> +<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css"> +<link href="/css/accumulo.css" rel="stylesheet" type="text/css"> + +<title>Security Performance Implications</title> + +<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> +<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> +<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script> +<script> + // show location of canonical site if not currently on the canonical site + $(function() { + var host = window.location.host; + if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') { + $('#non-canonical').show(); + } + }); + + $(function() { + // decorate section headers with anchors + return $("h2, h3, h4, h5, h6").each(function(i, el) { + var $el, icon, id; + $el = $(el); + id = $el.attr('id'); + icon = '<i class="fa fa-link"></i>'; + if (id) { + return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon)); + } + }); + }); + + // configure Google Analytics + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + if (ga.hasOwnProperty('loaded') && ga.loaded === true) { + ga('create', 'UA-50934829-1', 'apache.org'); + ga('send', 'pageview'); + } +</script> + +</head> +<body style="padding-top: 100px"> + + <nav class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200"/></a> + </div> + <div class="collapse navbar-collapse" id="navbar-items"> + <ul class="nav navbar-nav"> + <li class="nav-link"><a href="/downloads">Download</a></li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li> + <li><a href="/release/accumulo-1.7.2/">1.7.2</a></li> + <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li> + <li><a href="/release/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li> + <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li> + <li><a href="/1.8/examples">Examples (1.8)</a></li> + <li><a href="/features">Features</a></li> + <li><a href="/glossary">Glossary</a></li> + <li><a href="/external-docs">External Docs</a></li> + <li><a href="/docs-archive/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/get_involved">Get Involved</a></li> + <li><a href="/mailing_list">Mailing Lists</a></li> + <li><a href="/people">People</a></li> + <li><a href="/related-projects">Related Projects</a></li> + <li><a href="/contributor/">Contributor Guide</a></li> + </ul> + </li> + </ul> + <ul class="nav navbar-nav navbar-right"> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li> + </ul> + </li> + </ul> + </div> + </div> +</nav> + + + <div class="container"> + <div class="row"> + <div class="col-md-12"> + + <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;"> + Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a> + </div> + <div id="content"> + + <h1 class="title">Security Performance Implications</h1> + + <table> + +<tr><td><b>Date </b></td><td> 23 Feb 2017 </td></tr> + +</table> +<p> + + +<p>The purpose of this two part series was to measure the performance impact of +various security configurations on a cluster running Apache Accumuloâs +continuous ingest suite. The tests were performed using Amazon Web +Services (AWS), Hortonworks Data Platform 2.4 and Accumulo 1.7. Each of +the five different security settings in Accumulo 1.7 was tested including +no security, SSL, and SASL with Kerberos authentication for the three quality +of protection levels (auth, auth-int, auth-conf). KDC was MIT. HDFS was +configured to use Kerberos for authentication and had service level +authorization on. Other than that, no other security settings (HTTPS, RPC +protection, data transfer encryption, etc) were enabled. Timely was a +separate, single node HDFS/Zookeeper/Accumulo instance.</p> + +<h2 id="intro">Intro</h2> + +<p>All runs utilized the continuous ingest suite that ships with Accumulo (a +standard method to measure performance in Accumulo). It generates random +graph data and inserts it into Accumulo, creating +a long linked list of entries. Part 1 was run with just continuous ingest.<br /> +Based on the test results, there was a measurable performance impact as each additional security configuration was put in place.</p> + +<h2 id="methodology">Methodology</h2> + +<p>We ran 5 tests, one for each security configuration. Each iteration of each test inserted 2 billion entries. Batch writers were configured with 500K max mem +to artificially inflate the overall write overhead. This was performed on a +small cluster on AWS.</p> + +<p>Each test used one of the following security configurations:</p> + +<ul> + <li>No security - Default</li> + <li>Two way SSL</li> + <li>Kerberos/SASL with auth + <ul> + <li>auth is just Kerberos authentication between client and server. Each end of the RPC definitively knows who the other is.</li> + </ul> + </li> + <li>Kerberos/SASL with auth-int + <ul> + <li>Builds on auth, also providing message integrity checks of the data going across the wire. You also know that the message you received was not altered.</li> + </ul> + </li> + <li>Kerberos/SASL with auth-conf + <ul> + <li>Builds on auth-int, also providing confidentiality of the message that was sent to prevent others from reading it (aka wire-encryption).</li> + </ul> + </li> +</ul> + +<p>For each test, five iterations were run to obtain a min, max, and median +time elapsed at each security configuration. After each iteration, +Hadoop, and Zookeeper processes were restarted, Accumulo tables are +wiped clean and tables are recreated. In addition, pagecache, dentries +and inodes are dropped by issuing a â3â command on +/proc/sys/vm/drop_caches to ensure that the OS is not caching things to disk +that might affect the benchmark. The following sequence was performed +between iterations:</p> + +<ol> + <li>Bring down Accumulo</li> + <li>Bring down Zookeeper</li> + <li>Bring down Hadoop</li> + <li>Run sync command</li> + <li>Drop OS cache</li> + <li>Bring up Hadoop</li> + <li>Bring up Zookeeper</li> + <li>Bring up Accumulo</li> + <li>Drop tables</li> + <li>Create tables</li> +</ol> + +<p>For each iteration, the results were stored, fed into <a href="https://nationalsecurityagency.github.io/timely/">Timely</a>, and viewed with Grafana. +Since the runs were executed sequentially, the start epochs for each run did not align. +To mitigate, the entries for each run were inserted +with the same relative epoch for convenient comparison in Grafana.</p> + +<p>The table configurations for Accumulo remain the same throughout the +different iterations and security levels. The Accumulo site +configurations differ only due to the different settings for the +security level configurations.</p> + +<h2 id="environment">Environment</h2> + +<p>In order to perform the testing, a small AWS cluster was setup using 14 +hosts on EC2. Two i2.xlarge instances were used as master nodes and eight +d2.xlarge instances were used for workers. In addition, two c4.4xlarge +instances were used for ingesters, one m4.2xlarge instance was used for +Timely, and one m4.xlarge instance was used for Apache Ambari. A logical +diagram of the setup is depicted below:</p> + +<p><img src="/images/blog/201702_security/figure1.png" alt="" width="400px" /></p> + +<p>Figure 1 - Cluster Layout, Roles, and Instance Types on AWS.</p> + +<p>The types of nodes and their function are given below:</p> + +<table id="instance_types" class="table"> + <thead> + <tr> + <th style="text-align: left">Node Type</th> + <th style="text-align: left">AWS EC2 Type</th> + <th style="text-align: left">EC2 Type Details</th> + <th style="text-align: left">Quantity</th> + </tr> + </thead> + <tbody> + <tr> + <td style="text-align: left">Ingest Nodes</td> + <td style="text-align: left">c4.4xlarge</td> + <td style="text-align: left">16 core, 30 GB RAM</td> + <td style="text-align: left">2</td> + </tr> + <tr> + <td style="text-align: left">Worker Node</td> + <td style="text-align: left">d2.xlarge</td> + <td style="text-align: left">4 cores, 30.5 GB RAM, 3x2T GB HD</td> + <td style="text-align: left">8</td> + </tr> + <tr> + <td style="text-align: left">Master Node</td> + <td style="text-align: left">i2.xlarge</td> + <td style="text-align: left">4 cores, 30.5 GB RAM, 1x800GB SSD</td> + <td style="text-align: left">2</td> + </tr> + <tr> + <td style="text-align: left">Admin Node</td> + <td style="text-align: left">m4.xlarge</td> + <td style="text-align: left">4 cores, 16 GB RAM</td> + <td style="text-align: left">1</td> + </tr> + <tr> + <td style="text-align: left">Timely Node</td> + <td style="text-align: left">m4.2xlarge</td> + <td style="text-align: left">8 cores, 32 GB RAM</td> + <td style="text-align: left">1</td> + </tr> + </tbody> +</table> + +<p>Table 1 â AWS Instance Types, Role, Details, and Quantities</p> + +<h2 id="results">Results</h2> + +<p>The median, max, and min of the milliseconds elapsed +time of all iterations for each test is displayed below. The percentage change +columns compare the Median, Max, and Min respectively from the no +security level to each security configuration (e.g. no security Median +vs. auth-int Median, no security Max vs. auth-int Max).</p> + +<table id="results" class="table"> + <thead> + <tr> + <th>Security Level</th> + <th style="text-align: right">Median</th> + <th style="text-align: right">Standard Deviation</th> + <th style="text-align: right">Max</th> + <th style="text-align: right">Min</th> + <th style="text-align: right">% Change (nosec Median vs. Median)</th> + <th style="text-align: right">% Change (nosec Max vs. Max)</th> + <th style="text-align: right">% Change (nosec Min vs. Min)</th> + <th style="text-align: right">Delta from Previous Level (Median)</th> + </tr> + </thead> + <tbody> + <tr> + <td>no security</td> + <td style="text-align: right">7829394</td> + <td style="text-align: right">139340</td> + <td style="text-align: right">8143035</td> + <td style="text-align: right">7764309</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + </tr> + <tr> + <td>ssl</td> + <td style="text-align: right">8292760</td> + <td style="text-align: right">87012</td> + <td style="text-align: right">8464060</td> + <td style="text-align: right">8204955</td> + <td style="text-align: right">5.92%</td> + <td style="text-align: right">3.94%</td> + <td style="text-align: right">5.68%</td> + <td style="text-align: right">5.92%</td> + </tr> + <tr> + <td>auth</td> + <td style="text-align: right">8859552</td> + <td style="text-align: right">134109</td> + <td style="text-align: right">9047971</td> + <td style="text-align: right">8657618</td> + <td style="text-align: right">13.16%</td> + <td style="text-align: right">11.11%</td> + <td style="text-align: right">11.51%</td> + <td style="text-align: right">6.83%</td> + </tr> + <tr> + <td>auth-int</td> + <td style="text-align: right">9500737</td> + <td style="text-align: right">155968</td> + <td style="text-align: right">9753424</td> + <td style="text-align: right">9282371</td> + <td style="text-align: right">21.34%</td> + <td style="text-align: right">19.78%</td> + <td style="text-align: right">19.55%</td> + <td style="text-align: right">7.24%</td> + </tr> + <tr> + <td>auth-conf</td> + <td style="text-align: right">9479635</td> + <td style="text-align: right">170823</td> + <td style="text-align: right">9776580</td> + <td style="text-align: right">9282189</td> + <td style="text-align: right">21.08%</td> + <td style="text-align: right">20.06%</td> + <td style="text-align: right">19.55%</td> + <td style="text-align: right">-0.22%</td> + </tr> + </tbody> +</table> + +<p>Table 2 â Summarized Time Elapsed for Each Security Level</p> + +<h2 id="plots">Plots</h2> + +<p>Below are some snapshots of *stats.out elements via Grafana that were inserted +into Timely with the same relative start time. Each graph represents a field +in the output generated by <a href="https://github.com/apache/accumulo/blob/1.7/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java">ContinuousStatsCollector</a></p> + +<h3 id="tablerecshttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoremasterthrifttableinfojaval73"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/master/thrift/TableInfo.java#L73">TABLE_RECS</a></h3> +<p>(Number of records in the continuous ingest table. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/tableRecs.png"><img src="/images/blog/201702_security/tableRecs.png" alt="" width="800px" /></a></p> + +<h3 id="totalingesthttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoremasterthrifttableinfojaval77"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/master/thrift/TableInfo.java#L77">TOTAL_INGEST</a></h3> +<p>(Ingest rate for Accumulo instance. Down sample=5m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/totalIngest.png"><img src="/images/blog/201702_security/totalIngest.png" alt="" width="800px" /></a></p> + +<h3 id="avgfilestablethttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoreutilstatjaval63"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/util/Stat.java#L63">AVG_FILES/TABLET</a></h3> +<p>(Average number of files per Accumulo tablet. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/avgFilesTab.png"><img src="/images/blog/201702_security/avgFilesTab.png" alt="" width="800px" /></a></p> + +<h3 id="accumulofileshttpsgithubcomapacheaccumuloblob17testsrcmainjavaorgapacheaccumulotestcontinuouscontinuousstatscollectorjaval127"><a href="https://github.com/apache/accumulo/blob/1.7/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java#L127">ACCUMULO_FILES</a></h3> +<p>(Total number of files for Accumulo. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/accumuloFiles.png"><img src="/images/blog/201702_security/accumuloFiles.png" alt="" width="800px" /></a></p> + +<p>As can be seen in the plots above, the different security settings have +relatively consistent, discernable median run characteristics. The big +dip in each TOTAL_INGEST coincides with a large number of major +compactions, a rate decrease for TABLE_RECS, and a decrease in +AVG_FILES/TABLET.</p> + +<h2 id="final-thoughts">Final Thoughts</h2> + +<p>The biggest performance +hits to run duration median (compared to default security) were ~21% for +auth-int and auth-conf. Interesting to note that SSLâs median run duration was +lower than all SASL configs and that auth-confâs was lower than auth-int. +Initial speculation for these oddities revolved around the +<a href="https://github.com/m1ch1/mapkeeper/wiki/Thrift-Java-Servers-Compared">Thrift server</a> +implementations, but the Thrift differences will not explain the auth-conf/int +disparity since both utilize TThreadPoolServer. It was certainly unexpected that the +addition of wire encryption would yield a faster median run duration. This result +prompted, as a sanity check, sniffing the net traffic (in a contrived example +not during a timed run) in both auth-conf and auth-int to ensure that the message +contents were actually obfuscated in auth-conf (they were) and not obfuscated in +auth-int (they werenât).</p> + +<h2 id="future-work">Future Work</h2> + +<p>Part 2 of this series will consist of the same continuous ingest loads and +configurations with the addition of a query load on the system.</p> + + + +<p><strong>View all posts in the <a href="/news">news archive</a></strong></p> + + </div> + + +<footer> + + <p><a href="https://www.apache.org"><img src="/images/feather-small.gif" alt="Apache Software Foundation" id="asf-logo" height="100" /></a></p> + + <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p> + +</footer> + + + </div> + </div> + </div> +</body> +</html> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/feed.xml ---------------------------------------------------------------------- diff --git a/feed.xml b/feed.xml index 6b10896..db56da6 100644 --- a/feed.xml +++ b/feed.xml @@ -6,8 +6,8 @@ </description> <link>https://accumulo.apache.org/</link> <atom:link href="https://accumulo.apache.org/feed.xml" rel="self" type="application/rss+xml"/> - <pubDate>Mon, 27 Feb 2017 11:22:59 -0500</pubDate> - <lastBuildDate>Mon, 27 Feb 2017 11:22:59 -0500</lastBuildDate> + <pubDate>Mon, 06 Mar 2017 17:58:36 -0500</pubDate> + <lastBuildDate>Mon, 06 Mar 2017 17:58:36 -0500</lastBuildDate> <generator>Jekyll v3.3.1</generator> <item> @@ -182,6 +182,295 @@ HDFS High-Availability instances, forcing NameNode failover.</p> </item> <item> + <title>Security Performance Implications</title> + <description> +<p>The purpose of this two part series was to measure the performance impact of +various security configurations on a cluster running Apache Accumuloâs +continuous ingest suite. The tests were performed using Amazon Web +Services (AWS), Hortonworks Data Platform 2.4 and Accumulo 1.7. Each of +the five different security settings in Accumulo 1.7 was tested including +no security, SSL, and SASL with Kerberos authentication for the three quality +of protection levels (auth, auth-int, auth-conf). KDC was MIT. HDFS was +configured to use Kerberos for authentication and had service level +authorization on. Other than that, no other security settings (HTTPS, RPC +protection, data transfer encryption, etc) were enabled. Timely was a +separate, single node HDFS/Zookeeper/Accumulo instance.</p> + +<h2 id="intro">Intro</h2> + +<p>All runs utilized the continuous ingest suite that ships with Accumulo (a +standard method to measure performance in Accumulo). It generates random +graph data and inserts it into Accumulo, creating +a long linked list of entries. Part 1 was run with just continuous ingest.<br /> +Based on the test results, there was a measurable performance impact as each additional security configuration was put in place.</p> + +<h2 id="methodology">Methodology</h2> + +<p>We ran 5 tests, one for each security configuration. Each iteration of each test inserted 2 billion entries. Batch writers were configured with 500K max mem +to artificially inflate the overall write overhead. This was performed on a +small cluster on AWS.</p> + +<p>Each test used one of the following security configurations:</p> + +<ul> + <li>No security - Default</li> + <li>Two way SSL</li> + <li>Kerberos/SASL with auth + <ul> + <li>auth is just Kerberos authentication between client and server. Each end of the RPC definitively knows who the other is.</li> + </ul> + </li> + <li>Kerberos/SASL with auth-int + <ul> + <li>Builds on auth, also providing message integrity checks of the data going across the wire. You also know that the message you received was not altered.</li> + </ul> + </li> + <li>Kerberos/SASL with auth-conf + <ul> + <li>Builds on auth-int, also providing confidentiality of the message that was sent to prevent others from reading it (aka wire-encryption).</li> + </ul> + </li> +</ul> + +<p>For each test, five iterations were run to obtain a min, max, and median +time elapsed at each security configuration. After each iteration, +Hadoop, and Zookeeper processes were restarted, Accumulo tables are +wiped clean and tables are recreated. In addition, pagecache, dentries +and inodes are dropped by issuing a â3â command on +/proc/sys/vm/drop_caches to ensure that the OS is not caching things to disk +that might affect the benchmark. The following sequence was performed +between iterations:</p> + +<ol> + <li>Bring down Accumulo</li> + <li>Bring down Zookeeper</li> + <li>Bring down Hadoop</li> + <li>Run sync command</li> + <li>Drop OS cache</li> + <li>Bring up Hadoop</li> + <li>Bring up Zookeeper</li> + <li>Bring up Accumulo</li> + <li>Drop tables</li> + <li>Create tables</li> +</ol> + +<p>For each iteration, the results were stored, fed into <a href="https://nationalsecurityagency.github.io/timely/">Timely</a>, and viewed with Grafana. +Since the runs were executed sequentially, the start epochs for each run did not align. +To mitigate, the entries for each run were inserted +with the same relative epoch for convenient comparison in Grafana.</p> + +<p>The table configurations for Accumulo remain the same throughout the +different iterations and security levels. The Accumulo site +configurations differ only due to the different settings for the +security level configurations.</p> + +<h2 id="environment">Environment</h2> + +<p>In order to perform the testing, a small AWS cluster was setup using 14 +hosts on EC2. Two i2.xlarge instances were used as master nodes and eight +d2.xlarge instances were used for workers. In addition, two c4.4xlarge +instances were used for ingesters, one m4.2xlarge instance was used for +Timely, and one m4.xlarge instance was used for Apache Ambari. A logical +diagram of the setup is depicted below:</p> + +<p><img src="/images/blog/201702_security/figure1.png" alt="" width="400px" /></p> + +<p>Figure 1 - Cluster Layout, Roles, and Instance Types on AWS.</p> + +<p>The types of nodes and their function are given below:</p> + +<table id="instance_types" class="table"> + <thead> + <tr> + <th style="text-align: left">Node Type</th> + <th style="text-align: left">AWS EC2 Type</th> + <th style="text-align: left">EC2 Type Details</th> + <th style="text-align: left">Quantity</th> + </tr> + </thead> + <tbody> + <tr> + <td style="text-align: left">Ingest Nodes</td> + <td style="text-align: left">c4.4xlarge</td> + <td style="text-align: left">16 core, 30 GB RAM</td> + <td style="text-align: left">2</td> + </tr> + <tr> + <td style="text-align: left">Worker Node</td> + <td style="text-align: left">d2.xlarge</td> + <td style="text-align: left">4 cores, 30.5 GB RAM, 3x2T GB HD</td> + <td style="text-align: left">8</td> + </tr> + <tr> + <td style="text-align: left">Master Node</td> + <td style="text-align: left">i2.xlarge</td> + <td style="text-align: left">4 cores, 30.5 GB RAM, 1x800GB SSD</td> + <td style="text-align: left">2</td> + </tr> + <tr> + <td style="text-align: left">Admin Node</td> + <td style="text-align: left">m4.xlarge</td> + <td style="text-align: left">4 cores, 16 GB RAM</td> + <td style="text-align: left">1</td> + </tr> + <tr> + <td style="text-align: left">Timely Node</td> + <td style="text-align: left">m4.2xlarge</td> + <td style="text-align: left">8 cores, 32 GB RAM</td> + <td style="text-align: left">1</td> + </tr> + </tbody> +</table> + +<p>Table 1 â AWS Instance Types, Role, Details, and Quantities</p> + +<h2 id="results">Results</h2> + +<p>The median, max, and min of the milliseconds elapsed +time of all iterations for each test is displayed below. The percentage change +columns compare the Median, Max, and Min respectively from the no +security level to each security configuration (e.g. no security Median +vs. auth-int Median, no security Max vs. auth-int Max).</p> + +<table id="results" class="table"> + <thead> + <tr> + <th>Security Level</th> + <th style="text-align: right">Median</th> + <th style="text-align: right">Standard Deviation</th> + <th style="text-align: right">Max</th> + <th style="text-align: right">Min</th> + <th style="text-align: right">% Change (nosec Median vs. Median)</th> + <th style="text-align: right">% Change (nosec Max vs. Max)</th> + <th style="text-align: right">% Change (nosec Min vs. Min)</th> + <th style="text-align: right">Delta from Previous Level (Median)</th> + </tr> + </thead> + <tbody> + <tr> + <td>no security</td> + <td style="text-align: right">7829394</td> + <td style="text-align: right">139340</td> + <td style="text-align: right">8143035</td> + <td style="text-align: right">7764309</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + <td style="text-align: right">0.00%</td> + </tr> + <tr> + <td>ssl</td> + <td style="text-align: right">8292760</td> + <td style="text-align: right">87012</td> + <td style="text-align: right">8464060</td> + <td style="text-align: right">8204955</td> + <td style="text-align: right">5.92%</td> + <td style="text-align: right">3.94%</td> + <td style="text-align: right">5.68%</td> + <td style="text-align: right">5.92%</td> + </tr> + <tr> + <td>auth</td> + <td style="text-align: right">8859552</td> + <td style="text-align: right">134109</td> + <td style="text-align: right">9047971</td> + <td style="text-align: right">8657618</td> + <td style="text-align: right">13.16%</td> + <td style="text-align: right">11.11%</td> + <td style="text-align: right">11.51%</td> + <td style="text-align: right">6.83%</td> + </tr> + <tr> + <td>auth-int</td> + <td style="text-align: right">9500737</td> + <td style="text-align: right">155968</td> + <td style="text-align: right">9753424</td> + <td style="text-align: right">9282371</td> + <td style="text-align: right">21.34%</td> + <td style="text-align: right">19.78%</td> + <td style="text-align: right">19.55%</td> + <td style="text-align: right">7.24%</td> + </tr> + <tr> + <td>auth-conf</td> + <td style="text-align: right">9479635</td> + <td style="text-align: right">170823</td> + <td style="text-align: right">9776580</td> + <td style="text-align: right">9282189</td> + <td style="text-align: right">21.08%</td> + <td style="text-align: right">20.06%</td> + <td style="text-align: right">19.55%</td> + <td style="text-align: right">-0.22%</td> + </tr> + </tbody> +</table> + +<p>Table 2 â Summarized Time Elapsed for Each Security Level</p> + +<h2 id="plots">Plots</h2> + +<p>Below are some snapshots of *stats.out elements via Grafana that were inserted +into Timely with the same relative start time. Each graph represents a field +in the output generated by <a href="https://github.com/apache/accumulo/blob/1.7/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java">ContinuousStatsCollector</a></p> + +<h3 id="tablerecshttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoremasterthrifttableinfojaval73"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/master/thrift/TableInfo.java#L73">TABLE_RECS</a></h3> +<p>(Number of records in the continuous ingest table. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/tableRecs.png"><img src="/images/blog/201702_security/tableRecs.png" alt="" width="800px" /></a></p> + +<h3 id="totalingesthttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoremasterthrifttableinfojaval77"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/master/thrift/TableInfo.java#L77">TOTAL_INGEST</a></h3> +<p>(Ingest rate for Accumulo instance. Down sample=5m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/totalIngest.png"><img src="/images/blog/201702_security/totalIngest.png" alt="" width="800px" /></a></p> + +<h3 id="avgfilestablethttpsgithubcomapacheaccumuloblob17coresrcmainjavaorgapacheaccumulocoreutilstatjaval63"><a href="https://github.com/apache/accumulo/blob/1.7/core/src/main/java/org/apache/accumulo/core/util/Stat.java#L63">AVG_FILES/TABLET</a></h3> +<p>(Average number of files per Accumulo tablet. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/avgFilesTab.png"><img src="/images/blog/201702_security/avgFilesTab.png" alt="" width="800px" /></a></p> + +<h3 id="accumulofileshttpsgithubcomapacheaccumuloblob17testsrcmainjavaorgapacheaccumulotestcontinuouscontinuousstatscollectorjaval127"><a href="https://github.com/apache/accumulo/blob/1.7/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java#L127">ACCUMULO_FILES</a></h3> +<p>(Total number of files for Accumulo. Down sample=1m, aggregate=avg)</p> + +<p><a href="/images/blog/201702_security/accumuloFiles.png"><img src="/images/blog/201702_security/accumuloFiles.png" alt="" width="800px" /></a></p> + +<p>As can be seen in the plots above, the different security settings have +relatively consistent, discernable median run characteristics. The big +dip in each TOTAL_INGEST coincides with a large number of major +compactions, a rate decrease for TABLE_RECS, and a decrease in +AVG_FILES/TABLET.</p> + +<h2 id="final-thoughts">Final Thoughts</h2> + +<p>The biggest performance +hits to run duration median (compared to default security) were ~21% for +auth-int and auth-conf. Interesting to note that SSLâs median run duration was +lower than all SASL configs and that auth-confâs was lower than auth-int. +Initial speculation for these oddities revolved around the +<a href="https://github.com/m1ch1/mapkeeper/wiki/Thrift-Java-Servers-Compared">Thrift server</a> +implementations, but the Thrift differences will not explain the auth-conf/int +disparity since both utilize TThreadPoolServer. It was certainly unexpected that the +addition of wire encryption would yield a faster median run duration. This result +prompted, as a sanity check, sniffing the net traffic (in a contrived example +not during a timed run) in both auth-conf and auth-int to ensure that the message +contents were actually obfuscated in auth-conf (they were) and not obfuscated in +auth-int (they werenât).</p> + +<h2 id="future-work">Future Work</h2> + +<p>Part 2 of this series will consist of the same continuous ingest loads and +configurations with the addition of a query load on the system.</p> + +</description> + <pubDate>Thu, 23 Feb 2017 00:00:00 -0500</pubDate> + <link>https://accumulo.apache.org/blog/2017/02/23/security-performance-implications.html</link> + <guid isPermaLink="true">https://accumulo.apache.org/blog/2017/02/23/security-performance-implications.html</guid> + + + <category>blog</category> + + </item> + + <item> <title>Running Accumulo on Fedora 25</title> <description><p>Apache Accumulo has been available in <a href="https://getfedora.org/">Fedora</a> since F20. Recently, the Fedora packages have been updated to Accumulo version <code class="highlighter-rouge">1.6.6</code> and have made some @@ -1725,97 +2014,5 @@ HDFS High-Availability instances, forcing NameNode failover.</p> </item> - <item> - <title>Apache Accumulo 1.6.4</title> - <description><p>Apache Accumulo 1.6.4 is a maintenance release on the 1.6 version branch. -This release contains changes from 21 issues, comprised of bug-fixes, -performance improvements and better test cases. See <a href="https://issues.apache.org/jira/browse/ACCUMULO/fixforversion/12332840">JIRA</a> for a -complete list.</p> - -<p>Below are resources for this release:</p> - -<ul> - <li><a href="/1.6/accumulo_user_manual.html">User Manual</a></li> - <li><a href="/1.6/apidocs">Javadocs</a></li> - <li><a href="/1.6/examples">Examples</a></li> -</ul> - -<p>Users of any previous 1.6.x release are strongly encouraged to update as soon as -possible to benefit from the improvements with very little concern in change -of underlying functionality. Users of 1.4 or 1.5 that are seeking to upgrade -to 1.6 should consider 1.6.4 as a starting point.</p> - -<h2 id="silent-data-loss-via-bulk-imported-files">Silent data-loss via bulk imported files</h2> - -<p>A user recently reported that a simple bulk-import application would occasionally -lose some records. Through investigation, it was found that when bulk imports into -a table failed the initial assignment, the logic that automatically retries the -imports was incorrectly choosing the tablets to import the files into. <a href="https://issues.apache.org/jira/browse/ACCUMULO-3967">ACCUMULO-3967</a> -contains more information on the cause and identification of the bug. The data-loss -condition would only affect entire files. If records from a file exist in Accumulo, -it is still guaranteed that all records within that imported file were successful.</p> - -<p>As such, users who have bulk import applications using previous versions of Accumulo -should verify that all of their data was correctly ingested into Accumulo and -immediately update to Accumulo 1.6.4.</p> - -<h2 id="other-bug-fixes">Other bug fixes</h2> - -<ul> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3979">ACCUMULO-3979</a> Fixed an issue where the BulkImporter failed -with an error message âQUERY_METADATA already startedâ.</li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3965">ACCUMULO-3965</a> The <code class="highlighter-rouge">listscans</code> shell command did not contain -the <code class="highlighter-rouge">scanId</code> attribute for currently running scans.</li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3946">ACCUMULO-3946</a> Verified that all user-facing operations contained -appropriate audit messages.</li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3977">ACCUMULO-3977</a> Isolated scans with Iterators in use incorrectly -fail around invocation of <code class="highlighter-rouge">deepCopy</code>.</li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3905">ACCUMULO-3905</a> RowDeletingIterator functions incorrectly when -columns are provided by the client. This restores intended functionality without -the need for a <a href="https://issues.apache.org/jira/browse/ACCUMULO-1801?focusedCommentId=13970204&amp;page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13970204">workaround</a>.</li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3959">ACCUMULO-3959</a> <a href="https://issues.apache.org/jira/browse/ACCUMULO-3934">ACCUMULO-3934</a> Multiple documentation -improvements to <code class="highlighter-rouge">BatchScanner</code>.</li> -</ul> - -<h2 id="testing">Testing</h2> - -<p>Each unit and functional test only runs on a single node, while the RandomWalk -and Continuous Ingest tests run on any number of nodes. <em>Agitation</em> refers to -randomly restarting Accumulo processes and Hadoop Datanode processes, and, in -HDFS High-Availability instances, forcing NameNode failover.</p> - -<table id="release_notes_testing" class="table"> - <thead> - <tr> - <th>OS</th> - <th>Hadoop</th> - <th>Nodes</th> - <th>ZooKeeper</th> - <th>HDFS HA</th> - <th>Tests</th> - </tr> - </thead> - <tbody> - <tr> - <td>Amazon Linux 2014.09</td> - <td>2.6.0</td> - <td>20</td> - <td>3.4.5</td> - <td>No</td> - <td>ContinuousIngest w/ verification w/ and w/o agitation (37B entries)</td> - </tr> - </tbody> -</table> - -</description> - <pubDate>Sat, 03 Oct 2015 00:00:00 -0400</pubDate> - <link>https://accumulo.apache.org/release/accumulo-1.6.4/</link> - <guid isPermaLink="true">https://accumulo.apache.org/release/accumulo-1.6.4/</guid> - - - <category>release</category> - - </item> - </channel> </rss> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/accumuloFiles.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/accumuloFiles.png b/images/blog/201702_security/accumuloFiles.png new file mode 100644 index 0000000..4c23277 Binary files /dev/null and b/images/blog/201702_security/accumuloFiles.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/avgFilesTab.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/avgFilesTab.png b/images/blog/201702_security/avgFilesTab.png new file mode 100644 index 0000000..8251bca Binary files /dev/null and b/images/blog/201702_security/avgFilesTab.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/figure1.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/figure1.png b/images/blog/201702_security/figure1.png new file mode 100644 index 0000000..f28715c Binary files /dev/null and b/images/blog/201702_security/figure1.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/tableRecs.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/tableRecs.png b/images/blog/201702_security/tableRecs.png new file mode 100644 index 0000000..b7db4ee Binary files /dev/null and b/images/blog/201702_security/tableRecs.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/tableRecsInMem.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/tableRecsInMem.png b/images/blog/201702_security/tableRecsInMem.png new file mode 100644 index 0000000..b480372 Binary files /dev/null and b/images/blog/201702_security/tableRecsInMem.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/images/blog/201702_security/totalIngest.png ---------------------------------------------------------------------- diff --git a/images/blog/201702_security/totalIngest.png b/images/blog/201702_security/totalIngest.png new file mode 100644 index 0000000..3bc3626 Binary files /dev/null and b/images/blog/201702_security/totalIngest.png differ http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/index.html ---------------------------------------------------------------------- diff --git a/index.html b/index.html index cef80f8..0ff029d 100644 --- a/index.html +++ b/index.html @@ -164,29 +164,29 @@ <div class="row latest-news-item"> <div class="col-sm-12" style="margin-bottom: 5px"> - <span style="font-size: 12px; margin-right: 5px;">Dec 2016</span> - <a href="/blog/2016/12/19/running-on-fedora-25.html">Running Accumulo on Fedora 25</a> + <span style="font-size: 12px; margin-right: 5px;">Feb 2017</span> + <a href="/blog/2017/02/23/security-performance-implications.html">Security Performance Implications</a> </div> </div> <div class="row latest-news-item"> <div class="col-sm-12" style="margin-bottom: 5px"> - <span style="font-size: 12px; margin-right: 5px;">Nov 2016</span> - <a href="/blog/2016/11/16/simpler-scripts-and-config.html">Simpler scripts and configuration coming in Accumulo 2.0.0</a> + <span style="font-size: 12px; margin-right: 5px;">Dec 2016</span> + <a href="/blog/2016/12/19/running-on-fedora-25.html">Running Accumulo on Fedora 25</a> </div> </div> <div class="row latest-news-item"> <div class="col-sm-12" style="margin-bottom: 5px"> <span style="font-size: 12px; margin-right: 5px;">Nov 2016</span> - <a href="/blog/2016/11/02/durability-performance.html">Durability Performance Implications</a> + <a href="/blog/2016/11/16/simpler-scripts-and-config.html">Simpler scripts and configuration coming in Accumulo 2.0.0</a> </div> </div> <div class="row latest-news-item"> <div class="col-sm-12" style="margin-bottom: 5px"> - <span style="font-size: 12px; margin-right: 5px;">Sep 2016</span> - <a href="/release/accumulo-1.6.6/">Apache Accumulo 1.6.6</a> + <span style="font-size: 12px; margin-right: 5px;">Nov 2016</span> + <a href="/blog/2016/11/02/durability-performance.html">Durability Performance Implications</a> </div> </div> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/803c95d0/news/index.html ---------------------------------------------------------------------- diff --git a/news/index.html b/news/index.html index bd07ba2..2ffbe5c 100644 --- a/news/index.html +++ b/news/index.html @@ -155,6 +155,13 @@ + <div class="row" style="margin-top: 15px"> + <div class="col-md-1">Feb 23</div> + <div class="col-md-10"><a href="/blog/2017/02/23/security-performance-implications.html">Security Performance Implications</a></div> + </div> + + + <hr /> <h3>2016</h3>