http://git-wip-us.apache.org/repos/asf/flink-web/blob/a16dddeb/site/blog/page2/index.html
----------------------------------------------------------------------
diff --git a/site/blog/page2/index.html b/site/blog/page2/index.html
deleted file mode 100644
index 306b5ef..0000000
--- a/site/blog/page2/index.html
+++ /dev/null
@@ -1,998 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-           <meta charset="utf-8">
-           <meta http-equiv="X-UA-Compatible" content="IE=edge">
-           <meta name="viewport" content="width=device-width, initial-scale=1">
-
-           <title>Apache Flink: Blog</title>
-           <link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
-           <link rel="icon" href="favicon.ico" type="image/x-icon">
-           <link rel="stylesheet" href="/css/bootstrap.css">
-           <link rel="stylesheet" href="/css/bootstrap-lumen-custom.css">
-           <link rel="stylesheet" href="/css/syntax.css">
-           <link rel="stylesheet" href="/css/custom.css">
-           <link href="/css/main/main.css" rel="stylesheet">
-            <link href="/blog/feed.xml" rel="alternate" 
type="application/rss+xml" title="Flink Blog RSS feed" />
-           <!-- <link 
href="//maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css" 
rel="stylesheet"> -->
-           <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js";></script>
-           <script src="/js/bootstrap.min.js"></script>
-           <script src="/js/codetabs.js"></script>
-    </head>
-    <body>
-    <div class="af-header-container af-inner-pages-navigation">
-       <header>
-               <div class="container">
-                       <div class="row">
-                               <div class="col-md-1 af-mobile-nav-bar">
-                                       <a href="/" title="Home">
-                                       <img class="hidden-xs hidden-sm 
img-responsive"
-                                               src="/img/main/logo.png" 
alt="Apache Flink Logo">
-                                       </a>
-                                       <div class="row visible-xs">
-                                               <div class="col-xs-3">
-                                                   <a href="/" title="Home">
-                                                       <img class="hidden-x 
hidden-sm img-responsive"
-                                                               
src="/img/main/logo.png" alt="Apache Flink Logo">
-                                                       </a>
-                                               </div>
-                                               <div class="col-xs-5"></div>
-                                               <div class="col-xs-4">
-                                                       <div 
class="af-mobile-btn">
-                                                               <span 
class="glyphicon glyphicon-plus"></span>
-                                                       </div>
-                                               </div>
-                                       </div>
-                               </div>
-                               <!-- Navigation -->
-                               <div class="col-md-11">
-                                       <nav class="af-main-nav" 
role="navigation">
-                                               <ul>
-                                                       <li><a href="#" 
class="af-nav-links">Quickstart
-                                                                       <b 
class="caret"></b>
-                                                       </a>
-                                                               <ul 
class="af-dropdown-menu">
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/setup_quickstart.html";>Setup
-                                                                               
        Flink</a></li>
-                                                                       <li><a
-                                                                               
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/java_api_quickstart.html";>Java
-                                                                               
        API</a></li>
-                                                                       <li><a
-                                                                               
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/scala_api_quickstart.html";>Scala
-                                                                               
        API</a></li>
-                                                               </ul></li>
-                                                       <li><a 
href="/downloads.html">Download</a></li>
-                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/faq.html";>FAQ</a></li>
-                                                       <li><a href="#" 
class="af-nav-links">Documentation <b
-                                                                       
class="caret"></b></a>
-                                                         <ul 
class="af-dropdown-menu">
-                                                                       <li 
class="af-separator">Current Snapshot:</li>
-                                                                       
<li></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-master/";>0.9</a></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-master/api/java";>0.9 
Javadocs</a></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html#org.apache.flink.api.scala.package";>0.9
 Scaladocs</a></li>
-                                                                       <li 
class="divider"></li>
-                                                                       <li 
class="af-separator">Current Stable:</li>
-                                                                       
<li></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/";>0.8.1</a></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/api/java";>0.8.1
 Javadocs</a></li>
-                                                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/api/scala/index.html#org.apache.flink.api.scala.package";>0.8.1
 Scaladocs</a></li>
-                                                                       <li 
class="divider"></li>
-                                                                       
<li></li>
-                                                                       <li><a 
href="/archive.html">Archive</a></li>
-                                                                       
<li></li>
-                                                               </ul></li>
-                                                       <li><a href="#" 
class="af-nav-links">Community <b
-                                                                       
class="caret"></b></a>
-                                                               <ul 
class="af-dropdown-menu">
-                                                                       <li><a 
href="/community.html#getting-help">Getting Help</a></li>
-                                                                       <li><a 
href="/community.html#mailing-lists">Mailing Lists</a></li>
-                                                                       <li><a 
href="/community.html#issues">Issues</a></li>
-                                                                       <li><a 
href="/community.html#team">Team</a></li>
-                                                                       <li 
class="divider"></li>
-                                                                       <li><a 
href="/how-to-contribute.html">How To
-                                                                               
        Contribute</a></li>
-                                                                       <li><a 
href="/coding_guidelines.html">Coding
-                                                                               
        Guidelines</a></li>
-                                                               </ul></li>
-                                                       <li><a href="#" 
class="af-nav-links">Project <b
-                                                                       
class="caret"></b></a>
-                                                               <ul 
class="af-dropdown-menu">
-                                                                       <li><a 
href="/material.html">Material</a></li>
-                                                                       <li><a 
href="http://www.apache.org/";>Apache Software
-                                                                               
        Foundation <span class="glyphicon glyphicon-new-window"></span>
-                                                                       
</a></li>
-                                                                       <li><a
-                                                                               
href="https://cwiki.apache.org/confluence/display/FLINK";>Wiki
-                                                                               
        <span class="glyphicon glyphicon-new-window"></span>
-                                                                       
</a></li>
-                                                                       <li><a
-                                                                               
href="https://wiki.apache.org/incubator/StratosphereProposal";>Incubator
-                                                                               
        Proposal <span class="glyphicon glyphicon-new-window"></span>
-                                                                       
</a></li>
-                                                                       <li><a 
href="http://www.apache.org/licenses/LICENSE-2.0";>License
-                                                                               
        <span class="glyphicon glyphicon-new-window"></span>
-                                                                       
</a></li>
-                                                                       <li><a 
href="https://github.com/apache/incubator-flink";>Source
-                                                                               
        Code <span class="glyphicon glyphicon-new-window"></span>
-                                                                       
</a></li>
-                                                               </ul></li>
-                                                       <li><a 
href="/blog/index.html" class="">Blog</a></li>
-                                               </ul>
-                                       </nav>
-                               </div>
-                       </div>
-               </div>
-       </header>
-</div>
-
-
-    <div style="padding-top:50px" class="container">
-        <div class="container">
-       <div class="row">
-               <div class="col-md-2"></div>
-               <div class="col-md-8">
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/10/03/upcoming_events.html">Upcoming Events</a></h2>
-                               <p class="meta">03 Oct 2014</p>
-
-                               <div><p>We are happy to announce several 
upcoming Flink events both in Europe and the US. Starting with a <strong>Flink 
hackathon in Stockholm</strong> (Oct 8-9) and a talk about Flink at the 
<strong>Stockholm Hadoop User Group</strong> (Oct 8). This is followed by the 
very first <strong>Flink Meetup in Berlin</strong> (Oct 15). In the US, there 
will be two Flink Meetup talks: the first one at the <strong>Pasadena Big Data 
User Group</strong> (Oct 29) and the second one at <strong>Silicon Valley Hands 
On Programming Events</strong> (Nov 4).</p>
-
-<p>We are looking forward to seeing you at any of these events. The following 
is an overview of each event and links to the respective Meetup pages.</p>
-
-<h3 id="flink-hackathon,-stockholm-(oct-8-9)">Flink Hackathon, Stockholm (Oct 
8-9)</h3>
-
-<p>The hackathon will take place at KTH/SICS from Oct 8th-9th. You can sign up 
here: <a 
href="https://docs.google.com/spreadsheet/viewform?formkey=dDZnMlRtZHJ3Z0hVTlFZVjU2MWtoX0E6MA";>https://docs.google.com/spreadsheet/viewform?formkey=dDZnMlRtZHJ3Z0hVTlFZVjU2MWtoX0E6MA</a>.</p>
-
-<p>Here is a rough agenda and a list of topics to work upon or look into. 
Suggestions and more topics are welcome.</p>
-
-<h4 id="wednesday-(8th)">Wednesday (8th)</h4>
-
-<p>9:00 - 10:00  Introduction to Apache Flink, System overview, and Dev
-environment (by Stephan)</p>
-
-<p>10:15 - 11:00 Introduction to the topics (Streaming API and system by Gyula
-&amp; Marton), (Graphs by Vasia / Martin / Stephan)</p>
-
-<p>11:00 - 12:30 Happy hacking (part 1)</p>
-
-<p>12:30 - Lunch (Food will be provided by KTH / SICS. A big thank you to them
-and also to Paris, for organizing that)</p>
-
-<p>13:xx - Happy hacking (part 2)</p>
-
-<h4 id="thursday-(9th)">Thursday (9th)</h4>
-
-<p>Happy hacking (continued)</p>
-
-<h4 id="suggestions-for-topics">Suggestions for topics</h4>
-
-<h5 id="streaming">Streaming</h5>
-
-<ul>
-<li><p>Sample streaming applications (e.g. continuous heavy hitters and topics
-on the twitter stream)</p></li>
-<li><p>Implement a simple SQL to Streaming program parser. Possibly using
-Apache Calcite (<a 
href="http://optiq.incubator.apache.org/";>http://optiq.incubator.apache.org/</a>)</p></li>
-<li><p>Implement different windowing methods (count-based, time-based, 
...)</p></li>
-<li><p>Implement different windowed operations (windowed-stream-join,
-windowed-stream-co-group)</p></li>
-<li><p>Streaming state, and interaction with other programs (that access state
-of a stream program)</p></li>
-</ul>
-
-<h5 id="graph-analysis">Graph Analysis</h5>
-
-<ul>
-<li><p>Prototype a Graph DSL (simple graph building, filters, graph
-properties, some algorithms)</p></li>
-<li><p>Prototype abstractions different Graph processing paradigms
-(vertex-centric, partition-centric).</p></li>
-<li><p>Generalize the delta iterations, allow flexible state access.</p></li>
-</ul>
-
-<h3 id="meetup:-hadoop-user-group-talk,-stockholm-(oct-8)">Meetup: Hadoop User 
Group Talk, Stockholm (Oct 8)</h3>
-
-<p>Hosted by Spotify, opens at 6 PM.</p>
-
-<p><a 
href="http://www.meetup.com/stockholm-hug/events/207323222/";>http://www.meetup.com/stockholm-hug/events/207323222/</a></p>
-
-<h3 id="1st-flink-meetup,-berlin-(oct-15)">1st Flink Meetup, Berlin (Oct 
15)</h3>
-
-<p>We are happy to announce the first Flink meetup in Berlin. You are very 
welcome to to sign up and attend. The event will be held in Betahaus Cafe.</p>
-
-<p><a 
href="http://www.meetup.com/Apache-Flink-Meetup/events/208227422/";>http://www.meetup.com/Apache-Flink-Meetup/events/208227422/</a></p>
-
-<h3 id="meetup:-pasadena-big-data-user-group-(oct-29)">Meetup: Pasadena Big 
Data User Group (Oct 29)</h3>
-
-<p><a 
href="http://www.meetup.com/Pasadena-Big-Data-Users-Group/";>http://www.meetup.com/Pasadena-Big-Data-Users-Group/</a></p>
-
-<h3 id="meetup:-silicon-valley-hands-on-programming-events-(nov-4)">Meetup: 
Silicon Valley Hands On Programming Events (Nov 4)</h3>
-
-<p><a 
href="http://www.meetup.com/HandsOnProgrammingEvents/events/210504392/";>http://www.meetup.com/HandsOnProgrammingEvents/events/210504392/</a></p>
-</div>
-                               <a 
href="/news/2014/10/03/upcoming_events.html#disqus_thread">Upcoming Events</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/09/26/release-0.6.1.html">Apache Flink 0.6.1 available</a></h2>
-                               <p class="meta">26 Sep 2014</p>
-
-                               <div><p>We are happy to announce the 
availability of Flink 0.6.1.</p>
-
-<p>0.6.1 is a maintenance release, which includes minor fixes across several 
parts
-of the system. We suggest all users of Flink to work with this newest 
version.</p>
-
-<p><a href="/downloads.html">Download</a> the release today.</p>
-</div>
-                               <a 
href="/news/2014/09/26/release-0.6.1.html#disqus_thread">Apache Flink 0.6.1 
available</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/08/26/release-0.6.html">Apache Flink 0.6 available</a></h2>
-                               <p class="meta">26 Aug 2014</p>
-
-                               <div><p>We are happy to announce the 
availability of Flink 0.6. This is the
-first release of the system inside the Apache Incubator and under the
-name Flink. Releases up to 0.5 were under the name Stratosphere, the
-academic and open source project that Flink originates from.</p>
-
-<h2 id="what-is-flink?">What is Flink?</h2>
-
-<p>Apache Flink is a general-purpose data processing engine for
-clusters. It runs on YARN clusters on top of data stored in Hadoop, as
-well as stand-alone. Flink currently has programming APIs in Java and
-Scala. Jobs are executed via Flink&#39;s own runtime engine. Flink
-features:</p>
-
-<p><strong>Robust in-memory and out-of-core processing:</strong> once read, 
data stays
-  in memory as much as possible, and is gracefully de-staged to disk in
-  the presence of memory pressure from limited memory or other
-  applications. The runtime is designed to perform very well both in
-  setups with abundant memory and in setups where memory is scarce.</p>
-
-<p><strong>POJO-based APIs:</strong> when programming, you do not have to pack 
your
-  data into key-value pairs or some other framework-specific data
-  model. Rather, you can use arbitrary Java and Scala types to model
-  your data.</p>
-
-<p><strong>Efficient iterative processing:</strong> Flink contains explicit 
&quot;iterate&quot; operators
-  that enable very efficient loops over data sets, e.g., for machine
-  learning and graph applications.</p>
-
-<p><strong>A modular system stack:</strong> Flink is not a direct 
implementation of its
-  APIs but a layered system. All programming APIs are translated to an
-  intermediate program representation that is compiled and optimized
-  via a cost-based optimizer. Lower-level layers of Flink also expose
-  programming APIs for extending the system.</p>
-
-<p><strong>Data pipelining/streaming:</strong> Flink&#39;s runtime is designed 
as a
-  pipelined data processing engine rather than a batch processing
-  engine. Operators do not wait for their predecessors to finish in
-  order to start processing data. This results to very efficient
-  handling of large data sets.</p>
-
-<h2 id="release-0.6">Release 0.6</h2>
-
-<p>Flink 0.6 builds on the latest Stratosphere 0.5 release. It includes
-many bug fixes and improvements that make the system more stable and
-robust, as well as breaking API changes.</p>
-
-<p>The full release notes are available <a 
href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315522&amp;version=12327101";>here</a>.</p>
-
-<p>Download the release <a 
href="http://flink.incubator.apache.org/downloads.html";>here</a>.</p>
-
-<h2 id="contributors">Contributors</h2>
-
-<ul>
-<li>Wilson Cao</li>
-<li>Ufuk Celebi</li>
-<li>Stephan Ewen</li>
-<li>Jonathan Hasenburg</li>
-<li>Markus Holzemer</li>
-<li>Fabian Hueske</li>
-<li>Sebastian Kunert</li>
-<li>Vikhyat Korrapati</li>
-<li>Aljoscha Krettek</li>
-<li>Sebastian Kruse</li>
-<li>Raymond Liu</li>
-<li>Robert Metzger</li>
-<li>Mingliang Qi</li>
-<li>Till Rohrmann</li>
-<li>Henry Saputra</li>
-<li>Chesnay Schepler</li>
-<li>Kostas Tzoumas</li>
-<li>Robert Waury</li>
-<li>Timo Walther</li>
-<li>Daniel Warneke</li>
-<li>Tobias Wiens</li>
-</ul>
-</div>
-                               <a 
href="/news/2014/08/26/release-0.6.html#disqus_thread">Apache Flink 0.6 
available</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/05/31/release-0.5.html">Stratosphere version 0.5 
available</a></h2>
-                               <p class="meta">31 May 2014</p>
-
-                               <div><p>We are happy to announce a new major 
Stratosphere release, version 0.5. This release adds many new features and 
improves the interoperability, stability, and performance of the system. The 
major theme of the release is the completely new Java API that makes it easy to 
write powerful distributed programs.</p>
-
-<p>The release can be downloaded from the <a 
href="http://stratosphere.eu/downloads/";>Stratosphere website</a> and from <a 
href="https://github.com/stratosphere/stratosphere/releases/tag/release-0.5";>GitHub</a>.
 All components are available as Apache Maven dependencies, making it simple to 
include Stratosphere in other projects. The website provides <a 
href="http://stratosphere.eu/docs/0.5/";>extensive documentation</a> of the 
system and the new features.</p>
-
-<h2 id="shortlist-of-new-features">Shortlist of new Features</h2>
-
-<p>Below is a short list of the most important additions to the Stratosphere 
system.</p>
-
-<h4 id="new-java-api">New Java API</h4>
-
-<p>This release introduces a completely new <strong>data set-centric Java 
API</strong>. This programming model significantly eases the development of 
Stratosphere programs, supports flexible use of regular Java classes as data 
types, and adds many new built-in operators to simplify the writing of powerful 
programs. The result are programs that need less code, are more readable, 
interoperate better with existing code, and execute faster.</p>
-
-<p>Take a look at the <a 
href="http://stratosphere.eu/docs/0.5/programming_guides/examples_java.html";>examples</a>
  to get a feel for the API.</p>
-
-<h4 id="general-api-improvements">General API Improvements</h4>
-
-<p><strong>Broadcast Variables:</strong> Publish a data set to all instances 
of another operator. This is handy if the your operator depends on the result 
of a computation, e.g., filter all values smaller than the average.</p>
-
-<p><strong>Distributed Cache:</strong> Make (local and HDFS) files locally 
available on each machine processing a task.</p>
-
-<p><strong>Iteration Termination Improvements</strong> Iterative algorithms 
can now terminate based on intermediate data sets, not only through aggregated 
statistics.</p>
-
-<p><strong>Collection data sources and sinks:</strong> Speed-up the 
development and testing of Stratosphere programs by reading data from regular 
Java collections and inserting back into them.</p>
-
-<p><strong>JDBC data sources and sinks:</strong> Read data from and write data 
to relational databases using a JDBC driver.</p>
-
-<p><strong>Hadoop input format and output format support:</strong> Read and 
write data with any Hadoop input or output format.</p>
-
-<p><strong>Support for Avro encoded data:</strong> Read data that has been 
materialized using Avro.</p>
-
-<p><strong>Deflate Files:</strong> Stratosphere now transparently reads 
<code>.deflate</code> compressed files.</p>
-
-<h4 id="runtime-and-optimizer-improvements">Runtime and Optimizer 
Improvements</h4>
-
-<p><strong>DAG Runtime Streaming:</strong> Detection and resolution of 
streaming data flow deadlocks in the data flow optimizer.</p>
-
-<p><strong>Intermediate results across iteration boundaries:</strong> 
Intermediate results computed outside iterative parts can be used inside 
iterative parts of the program.</p>
-
-<p><strong>Stability fixes:</strong> Various stability fixes in both optimizer 
and runtime.</p>
-
-<h4 id="setup-&amp;-tooling">Setup &amp; Tooling</h4>
-
-<p><strong>Improved YARN support:</strong> Many improvements based on 
user-feedback: Packaging, Permissions, Error handling.</p>
-
-<p><strong>Java 8 compatibility</strong></p>
-
-<h2 id="contributors">Contributors</h2>
-
-<p>In total, 26 people have contributed to Stratosphere since the last 
release. Thank you for making this project possible!</p>
-
-<ul>
-<li>Alexander Alexandrov</li>
-<li>Jesus Camacho</li>
-<li>Ufuk Celebi</li>
-<li>Mikhail Erofeev</li>
-<li>Stephan Ewen</li>
-<li>Alexandr Ferodov</li>
-<li>Filip Haase</li>
-<li>Jonathan Hasenberg</li>
-<li>Markus Holzemer</li>
-<li>Fabian Hueske</li>
-<li>Vasia Kalavri</li>
-<li>Aljoscha Krettek</li>
-<li>Rajika Kumarasiri</li>
-<li>Sebastian Kunert</li>
-<li>Aaron Lam</li>
-<li>Robert Metzger</li>
-<li>Faisal Moeen</li>
-<li>Martin Neumann</li>
-<li>Mingliang Qi</li>
-<li>Till Rohrmann</li>
-<li>Chesnay Schepler</li>
-<li>Vyachislav Soludev</li>
-<li>Tuan Trieu</li>
-<li>Artem Tsikiridis</li>
-<li>Timo Walther</li>
-<li>Robert Waury</li>
-</ul>
-
-<h2 id="stratosphere-is-going-apache">Stratosphere is going Apache</h2>
-
-<p>The Stratosphere project has been accepted to the Apache Incubator and will 
continue its work under the umbrella of the Apache Software Foundation. Due to 
a name conflict, we are switching the name of the project. We will make future 
releases of Stratosphere through the Apache foundation under a new name.</p>
-</div>
-                               <a 
href="/news/2014/05/31/release-0.5.html#disqus_thread">Stratosphere version 0.5 
available</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/04/16/stratosphere-goes-apache-incubator.html">Stratosphere 
accepted as Apache Incubator Project</a></h2>
-                               <p class="meta">16 Apr 2014</p>
-
-                               <div><p>We are happy to announce that 
Stratosphere has been accepted as a project for the <a 
href="https://incubator.apache.org/";>Apache Incubator</a>. The <a 
href="https://wiki.apache.org/incubator/StratosphereProposal";>proposal</a> has 
been accepted by the Incubator PMC members earlier this week. The Apache 
Incubator is the first step in the process of giving a project to the <a 
href="http://apache.org";>Apache Software Foundation</a>. While under 
incubation, the project will move to the Apache infrastructure and adopt the 
community-driven development principles of the Apache Foundation. Projects can 
graduate from incubation to become top-level projects if they show activity, a 
healthy community dynamic, and releases.</p>
-
-<p>We are glad to have Alan Gates as champion on board, as well as a set of 
great mentors, including Sean Owen, Ted Dunning, Owen O&#39;Malley, Henry 
Saputra, and Ashutosh Chauhan. We are confident that we will make this a great 
open source effort.</p>
-</div>
-                               <a 
href="/news/2014/04/16/stratosphere-goes-apache-incubator.html#disqus_thread">Stratosphere
 accepted as Apache Incubator Project</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/02/24/stratosphere-google-summer-of-code-2014.html">Stratosphere
 got accepted for Google Summer of Code 2014</a></h2>
-                               <p class="meta">24 Feb 2014</p>
-
-                               <div><div class="lead">Students: Apply now for 
exciting summer projects in the Big Data / Analytics field</div>
-
-<p>We are pleased to announce that Stratosphere got accepted to <a 
href="http://www.google-melange.com/gsoc/homepage/google/gsoc2014";>Google 
Summer of Code 2014</a> as a mentoring organization. This means that we will 
host a bunch of students to conduct projects within Stratosphere over the 
summer. <a href="http://en.flossmanuals.net/GSoCStudentGuide/";>Read more on the 
GSoC manual for students</a> and the <a 
href="http://www.google-melange.com/gsoc/document/show/gsoc_program/google/gsoc2014/help_page";>official
 FAQ</a>. Students can improve their coding skills, learn to work with 
open-source projects, improve their CV and get a nice paycheck from Google.</p>
-
-<p>If you are an interested student, check out our <a 
href="https://github.com/stratosphere/stratosphere/wiki/Google-Summer-of-Code-2014";>idea
 list</a> in the wiki. It contains different projects with varying ranges of 
difficulty and requirement profiles. Students can also suggest their own 
projects.</p>
-
-<p>We welcome students to sign up at our <a 
href="https://groups.google.com/forum/#!forum/stratosphere-dev";>developer 
mailing list</a> to discuss their ideas.
-Applying students can use our wiki (create a new page) to create a project 
proposal. We are happy to have a look at it.</p>
-</div>
-                               <a 
href="/news/2014/02/24/stratosphere-google-summer-of-code-2014.html#disqus_thread">Stratosphere
 got accepted for Google Summer of Code 2014</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/02/18/amazon-elastic-mapreduce-cloud-yarn.html">Use 
Stratosphere with Amazon Elastic MapReduce</a></h2>
-                               <p class="meta">18 Feb 2014</p>
-
-                               <div><div class="lead">Get started with 
Stratosphere within 10 minutes using Amazon Elastic MapReduce.</div>
-
-<p>This step-by-step tutorial will guide you through the setup of Stratosphere 
using Amazon Elastic MapReduce.</p>
-
-<h3 id="background">Background</h3>
-
-<p><a href="http://aws.amazon.com/elasticmapreduce/";>Amazon Elastic 
MapReduce</a> (Amazon EMR) is part of Amazon Web services. EMR allows to create 
Hadoop clusters that analyze data stored in Amazon S3 (AWS&#39; cloud storage). 
Stratosphere runs on top of Hadoop using the <a 
href="http://hadoop.apache.org/docs/r2.2.0/hadoop-project-dist/hadoop-common/releasenotes.html";>recently</a>
 released cluster resource manager <a 
href="http://hadoop.apache.org/docs/current2/hadoop-yarn/hadoop-yarn-site/YARN.html";>YARN</a>.
 YARN allows to use many different data analysis tools in your cluster side by 
side. Tools that run with YARN are, for example <a 
href="https://giraph.apache.org/";>Apache Giraph</a>, <a 
href="http://spark.incubator.apache.org/";>Spark</a> or <a 
href="http://hortonworks.com/blog/introducing-hoya-hbase-on-yarn/";>HBase</a>. 
Stratosphere also <a href="/docs/0.4/setup/yarn.html">runs on YARN</a> and 
that&#39;s the approach for this tutorial.</p>
-
-<h3 id="1.-step:-login-to-aws-and-prepare-secure-access">1. Step: Login to AWS 
and prepare secure access</h3>
-
-<ul>
-<li>Log in to the <a href="https://console.aws.amazon.com/console/home";>AWS 
Console</a></li>
-</ul>
-
-<p>You need to have SSH keys to access the Hadoop master node. If you do not 
have keys for your computer, generate them:</p>
-
-<div class="row" style="padding-top:15px">
-    <div class="col-md-6">
-<a data-lightbox="inputs" href="/img/blog/emr-security.png" 
data-lightbox="example-1"><img class="img-responsive" 
src="/img/blog/emr-security.png" /></a>
-    </div>
-    <div class="col-md-6">
-        <ul>
-            <li>Select <a 
href="https://console.aws.amazon.com/ec2/v2/home";>EC2</a> and click on "Key 
Pairs" in the "NETWORK & SECURITY" section.</li>
-            <li>Click on "Create Key Pair" and give it a name</li>
-            <li>After pressing "Yes" it will download a .pem file.</li>
-            <li>Change the permissions of the .pem file</li>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">chmod 
og-rwx ~/work-laptop.pem</code></pre></div>
-
-        </ul>
-    </div>
-</div>
-
-<h3 id="2.-step:-create-your-hadoop-cluster-in-the-cloud">2. Step: Create your 
Hadoop Cluster in the cloud</h3>
-
-<ul>
-<li>Select <a 
href="https://console.aws.amazon.com/elasticmapreduce/vnext/";>Elastic 
MapReduce</a> from the AWS console</li>
-<li>Click the blue &quot;Create cluster&quot; button.</li>
-</ul>
-
-<div class="row" style="padding-top:15px">
-    <div class="col-md-6">
-<a data-lightbox="inputs" href="/img/blog/emr-hadoopversion.png" 
data-lightbox="example-1"><img class="img-responsive" 
src="/img/blog/emr-hadoopversion.png" /></a>
-    </div>
-    <div class="col-md-6">
-        <ul>
-            <li>Choose a Cluster name</li>
-            <li>You can let the other settings remain unchanged (termination 
protection, logging, debugging)</li>
-            <li>For the Hadoop distribution, it is very important to choose 
one with YARN support. We use <b>3.0.3 (Hadoop 2.2.0)</b> (the minor version 
might change over time)</li>
-            <li>Remove all applications to be installed (unless you want to 
use them)</li>
-            <li>Choose the instance types you want to start. Stratosphere runs 
fine with m1.large instances. Core and Task instances both run Stratosphere, 
but only core instances contain HDFS data nodes.</li>
-            <li>Choose the <b>EC2 key pair</b> you've created in the previous 
step!</li>
-        </ul>
-    </div>
-</div>
-
-<ul>
-<li>Thats it! You can now press the &quot;Create cluster&quot; button at the 
end of the form to boot it!</li>
-</ul>
-
-<h3 id="3.-step:-launch-stratosphere">3. Step: Launch Stratosphere</h3>
-
-<p>You might need to wait a few minutes until Amazon started your cluster. 
(You can monitor the progress of the instances in EC2). Use the refresh button 
in the top right corner.</p>
-
-<p>You see that the master is up if the field <b>Master public DNS</b> 
contains a value (first line), connect to it using SSH.</p>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">ssh 
hadoop@&lt;your master public DNS&gt; -i &lt;path to your .pem&gt;
-<span class="c"># for my example, it looks like this:</span>
-ssh had...@ec2-54-213-61-105.us-west-2.compute.amazonaws.com -i 
~/Downloads/work-laptop.pem</code></pre></div>
-
-
-
-(Windows users have to follow <a 
href="http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/emr-connect-master-node-ssh.html";>these
 instructions</a> to SSH into the machine running the master.) </br></br>
-Once connected to the master, download and start Stratosphere for YARN: 
-<ul>
-    <li>Download and extract Stratosphere-YARN</li>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">wget 
http://stratosphere-bin.s3-website-us-east-1.amazonaws.com/stratosphere-dist-0.5-SNAPSHOT-yarn.tar.gz
-<span class="c"># extract it</span>
-tar xvzf stratosphere-dist-0.5-SNAPSHOT-yarn.tar.gz</code></pre></div>
-
-    <li>Start Stratosphere in the cluster using Hadoop YARN</li>
-
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span 
class="nb">cd </span>stratosphere-yarn-0.5-SNAPSHOT/
-./bin/yarn-session.sh -n <span class="m">4</span> -jm <span 
class="m">1024</span> -tm 3000</code></pre></div>
-
-
-The arguments have the following meaning
-    <ul>
-            <li><code>-n</code> number of TaskManagers (=workers). This number 
must not exeed the number of task instances</li>
-            <li><code>-jm</code> memory (heapspace) for the JobManager</li>
-            <li><code>-tm</code> memory for the TaskManagers</li>
-    </ul>
-</ul>
-
-Once the output has changed from 
-
-<div class="highlight"><pre><code class="language-bash" 
data-lang="bash">JobManager is now running on N/A:6123</code></pre></div>
-
-to 
-
-<div class="highlight"><pre><code class="language-bash" 
data-lang="bash">JobManager is now running on 
ip-172-31-13-68.us-west-2.compute.internal:6123</code></pre></div>
-
-Stratosphere has started the JobManager. It will take a few seconds until the 
TaskManagers (workers) have connected to the JobManager. To see how many 
TaskManagers connected, you have to access the JobManager's web interface. 
Follow the steps below to do that ...
-
-
-
-
-<h3> 4. Step: Launch a Stratosphere Job</h3>
-
-This step shows how to submit and monitor a Stratosphere Job in the Amazon 
Cloud.
-
-<ul>
-<li> Open an additional terminal and connect again to the master of your 
cluster. </li>
-
-We recommend to create a SOCKS-proxy with your SSH that allows you to easily 
connect into the cluster. (If you've already a VPN setup with EC2, you can 
probably use that as well.)
-
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">ssh 
-D localhost:2001 hadoop@&lt;your master dns name&gt; -i &lt;your pem 
file&gt;</code></pre></div>
-
-
-Notice the <code>-D localhost:2001</code> argument: It opens a SOCKS proxy on 
your computer allowing any application to use it to communicate through the 
proxy via an SSH tunnel to the master node. This allows you to access all 
services in your EMR cluster, such as the HDFS NameNode or the YARN web 
interface.
-
-<li>Configure a browser to use the SOCKS proxy. Open a browser with SOCKS 
proxy support (such as Firefox). Ideally, do not use your primary browser for 
this, since ALL traffic will be routed through Amazon.</li>
-
-<div class="row" style="padding-top:15px">
-    <div class="col-md-6">
-<a data-lightbox="inputs" href="/img/blog/emr-firefoxsettings.png" 
data-lightbox="example-1"><img class="img-responsive" 
src="/img/blog/emr-firefoxsettings.png" /></a>
-    </div>
-    <div class="col-md-6">
-        <ul>
-            <li>To configure the SOCKS proxy with Firefox, click on "Edit", 
"Preferences", choose the "Advanced" tab and press the "Settings ..." 
button.</li>
-            <li>Enter the details of the SOCKS proxy <b>localhost:2001</b>. 
Choose SOCKS v4.</li>
-            <li>Close the settings, your browser is now talking to the master 
node of your cluster</li>
-        </ul>
-    </div>
-</div>
-
-<p></ul></p>
-
-<p>Since you&#39;re connected to the master now, you can open several web 
interfaces: <br>
-<b>YARN Resource Manager</b>: 
<code>http://&lt;masterIPAddress&gt;:9026/</code> <br>
-<b>HDFS NameNode</b>: <code>http://&lt;masterIPAddress&gt;:9101/</code></p>
-
-<p>You find the <code>masterIPAddress</code> by entering <code>ifconfig</code> 
into the terminal:</p>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span 
class="o">[</span>hadoop@ip-172-31-38-95 ~<span class="o">]</span><span 
class="nv">$ </span>ifconfig
-eth0      Link encap:Ethernet  HWaddr 02:CF:8E:CB:28:B2  
-          inet addr:172.31.38.95  Bcast:172.31.47.255  Mask:255.255.240.0
-          inet6 addr: fe80::cf:8eff:fecb:28b2/64 Scope:Link
-          RX bytes:166314967 <span class="o">(</span>158.6 MiB<span 
class="o">)</span>  TX bytes:89319246 <span class="o">(</span>85.1 MiB<span 
class="o">)</span></code></pre></div>
-
-<p><strong>Optional:</strong> If you want to use the hostnames within your 
Firefox (that also makes the NameNode links work), you have to enable DNS 
resolution over the SOCKS proxy. Open the Firefox config 
<code>about:config</code> and set <code>network.proxy.socks_remote_dns</code> 
to <code>true</code>.</p>
-
-<p>The YARN ResourceManager also allows you to connect to 
<b>Stratosphere&#39;s JobManager web interface</b>. Click the 
<b>ApplicationMaster</b> link in the &quot;Tracking UI&quot; column.</p>
-
-<p>To run the Wordcount example, you have to upload some sample data.</p>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span 
class="c"># download a text</span>
-wget http://www.gnu.org/licenses/gpl.txt
-<span class="c"># upload it to HDFS:</span>
-hadoop fs -copyFromLocal gpl.txt /input</code></pre></div>
-
-<p>To run a Job, enter the following command into the master&#39;s command 
line:</p>
-
-<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span 
class="c"># optional: go to the extracted directory</span>
-<span class="nb">cd </span>stratosphere-yarn-0.5-SNAPSHOT/
-<span class="c"># run the wordcount example</span>
-./bin/stratosphere run -w -j 
examples/stratosphere-java-examples-0.5-SNAPSHOT-WordCount.jar  -a <span 
class="m">16</span> hdfs:///input hdfs:///output</code></pre></div>
-
-<p>Make sure that the number of TaskManager&#39;s have connected to the 
JobManager.</p>
-
-<p>Lets go through the command in detail:</p>
-
-<ul>
-<li><code>./bin/stratosphere</code> is the standard launcher for Stratosphere 
jobs from the command line</li>
-<li>The <code>-w</code> flag stands for &quot;wait&quot;. It is a very useful 
to track the progress of the job.</li>
-<li><code>-j 
examples/stratosphere-java-examples-0.5-SNAPSHOT-WordCount.jar</code> the 
<code>-j</code> command sets the jar file containing the job. If you have you 
own application, place your Jar-file here.</li>
-<li><code>-a 16 hdfs:///input hdfs:///output</code> the <code>-a</code> 
command specifies the Job-specific arguments. In this case, the wordcount 
expects the following input <code>&lt;numSubStasks&gt; &lt;input&gt; 
&lt;output&gt;</code>.</li>
-</ul>
-
-<p>You can monitor the progress of your job in the JobManager webinterface. 
Once the job has finished (which should be the case after less than 10 
seconds), you can analyze it there.
-Inspect the result in HDFS using:</p>
-
-<div class="highlight"><pre><code class="language-bash" 
data-lang="bash">hadoop fs -tail /output</code></pre></div>
-
-<p>If you want to shut down the whole cluster in the cloud, use Amazon&#39;s 
webinterface and click on &quot;Terminate cluster&quot;. If you just want to 
stop the YARN session, press CTRL+C in the terminal. The Stratosphere instances 
will be killed by YARN.</p>
-
-<p><br><br>
-<small>Written by Robert Metzger (<a 
href="https://twitter.com/rmetzger_";>@rmetzger_</a>).</small></p>
-</div>
-                               <a 
href="/news/2014/02/18/amazon-elastic-mapreduce-cloud-yarn.html#disqus_thread">Use
 Stratosphere with Amazon Elastic MapReduce</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/01/28/querying_mongodb.html">Accessing Data Stored in MongoDB 
with Stratosphere</a></h2>
-                               <p class="meta">28 Jan 2014</p>
-
-                               <div><p>We recently merged a <a 
href="https://github.com/stratosphere/stratosphere/pull/437";>pull request</a> 
that allows you to use any existing Hadoop <a 
href="http://developer.yahoo.com/hadoop/tutorial/module5.html#inputformat";>InputFormat</a>
 with Stratosphere. So you can now (in the <code>0.5-SNAPSHOT</code> and 
upwards versions) define a Hadoop-based data source:</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">HadoopDataSource</span> <span class="n">source</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">HadoopDataSource</span><span class="o">(</span><span 
class="k">new</span> <span class="nf">TextInputFormat</span><span 
class="o">(),</span> <span class="k">new</span> <span 
class="nf">JobConf</span><span class="o">(),</span> <span class="s">&quot;Input 
Lines&quot;</span><span class="o">);</span>
-<span class="n">TextInputFormat</span><span class="o">.</span><span 
class="na">addInputPath</span><span class="o">(</span><span 
class="n">source</span><span class="o">.</span><span 
class="na">getJobConf</span><span class="o">(),</span> <span 
class="k">new</span> <span class="nf">Path</span><span class="o">(</span><span 
class="n">dataInput</span><span class="o">));</span>
-</code></pre></div>
-<p>We describe in the following article how to access data stored in <a 
href="http://www.mongodb.org/";>MongoDB</a> with Stratosphere. This allows users 
to join data from multiple sources (e.g. MonogDB and HDFS) or perform machine 
learning with the documents stored in MongoDB.</p>
-
-<p>The approach here is to use the <code>MongoInputFormat</code> that was 
developed for Apache Hadoop but now also runs with Stratosphere.</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="n">JobConf</span> <span class="n">conf</span> <span class="o">=</span> 
<span class="k">new</span> <span class="nf">JobConf</span><span 
class="o">();</span>
-<span class="n">conf</span><span class="o">.</span><span 
class="na">set</span><span class="o">(</span><span 
class="s">&quot;mongo.input.uri&quot;</span><span class="o">,</span><span 
class="s">&quot;mongodb://localhost:27017/enron_mail.messages&quot;</span><span 
class="o">);</span>
-<span class="n">HadoopDataSource</span> <span class="n">src</span> <span 
class="o">=</span> <span class="k">new</span> <span 
class="nf">HadoopDataSource</span><span class="o">(</span><span 
class="k">new</span> <span class="nf">MongoInputFormat</span><span 
class="o">(),</span> <span class="n">conf</span><span class="o">,</span> <span 
class="s">&quot;Read from Mongodb&quot;</span><span class="o">,</span> <span 
class="k">new</span> <span class="nf">WritableWrapperConverter</span><span 
class="o">());</span>
-</code></pre></div>
-<h3 id="example-program">Example Program</h3>
-
-<p>The example program reads data from the <a 
href="http://www.cs.cmu.edu/%7Eenron/";>enron dataset</a> that contains about 
500k internal e-mails. The data is stored in MongoDB and the Stratosphere 
program counts the number of e-mails per day.</p>
-
-<p>The complete code of this sample program is available on <a 
href="https://github.com/stratosphere/stratosphere-mongodb-example";>GitHub</a>.</p>
-
-<h4 id="prepare-mongodb-and-the-data">Prepare MongoDB and the Data</h4>
-
-<ul>
-<li>Install MongoDB</li>
-<li>Download the enron dataset from <a 
href="http://mongodb-enron-email.s3-website-us-east-1.amazonaws.com/";>their 
website</a>.</li>
-<li>Unpack and load it</li>
-</ul>
-<div class="highlight"><pre><code class="language-bash" data-lang="bash"> 
bunzip2 enron_mongo.tar.bz2
- tar xvf enron_mongo.tar
- mongorestore dump/enron_mail/messages.bson
-</code></pre></div>
-<p>We used <a href="http://robomongo.org/";>Robomongo</a> to visually examine 
the dataset stored in MongoDB.</p>
-
-<p><img src="/img/blog/robomongo.png" style="width:90%;margin:15px"></p>
-
-<h4 id="build-mongoinputformat">Build <code>MongoInputFormat</code></h4>
-
-<p>MongoDB offers an InputFormat for Hadoop on their <a 
href="https://github.com/mongodb/mongo-hadoop";>GitHub page</a>. The code is not 
available in any Maven repository, so we have to build the jar file on our 
own.</p>
-
-<ul>
-<li>Check out the repository</li>
-</ul>
-<div class="highlight"><pre><code class="language-text" data-lang="text">git 
clone https://github.com/mongodb/mongo-hadoop.git
-cd mongo-hadoop
-</code></pre></div>
-<ul>
-<li>Set the appropriate Hadoop version in the <code>build.sbt</code>, we used 
<code>1.1</code>.</li>
-</ul>
-<div class="highlight"><pre><code class="language-bash" 
data-lang="bash">hadoopRelease in ThisBuild :<span class="o">=</span> <span 
class="s2">&quot;1.1&quot;</span>
-</code></pre></div>
-<ul>
-<li>Build the input format</li>
-</ul>
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">./sbt 
package
-</code></pre></div>
-<p>The jar-file is now located in <code>core/target</code>.</p>
-
-<h4 id="the-stratosphere-program">The Stratosphere Program</h4>
-
-<p>Now we have everything prepared to run the Stratosphere program. I only ran 
it on my local computer, out of Eclipse. To do that, check out the code ...</p>
-<div class="highlight"><pre><code class="language-bash" data-lang="bash">git 
clone https://github.com/stratosphere/stratosphere-mongodb-example.git
-</code></pre></div>
-<p>... and import it as a Maven project into your Eclipse. You have to 
manually add the previously built mongo-hadoop jar-file as a dependency.
-You can now press the &quot;Run&quot; button and see how Stratosphere executes 
the little program. It was running for about 8 seconds on the 1.5 GB 
dataset.</p>
-
-<p>The result (located in <code>/tmp/enronCountByDay</code>) now looks like 
this.</p>
-<div class="highlight"><pre><code class="language-text" 
data-lang="text">11,Fri Sep 26 10:00:00 CEST 1997
-154,Tue Jun 29 10:56:00 CEST 1999
-292,Tue Aug 10 12:11:00 CEST 1999
-185,Thu Aug 12 18:35:00 CEST 1999
-26,Fri Mar 19 12:33:00 CET 1999
-</code></pre></div>
-<p>There is one thing left I want to point out here. MongoDB represents 
objects stored in the database as JSON-documents. Since Stratosphere&#39;s 
standard types do not support JSON documents, I was using the 
<code>WritableWrapper</code> here. This wrapper allows to use any Hadoop 
datatype with Stratosphere.</p>
-
-<p>The following code example shows how the JSON-documents are accessed in 
Stratosphere.</p>
-<div class="highlight"><pre><code class="language-java" data-lang="java"><span 
class="kd">public</span> <span class="kt">void</span> <span 
class="nf">map</span><span class="o">(</span><span class="n">Record</span> 
<span class="n">record</span><span class="o">,</span> <span 
class="n">Collector</span><span class="o">&lt;</span><span 
class="n">Record</span><span class="o">&gt;</span> <span 
class="n">out</span><span class="o">)</span> <span class="kd">throws</span> 
<span class="n">Exception</span> <span class="o">{</span>
-    <span class="n">Writable</span> <span class="n">valWr</span> <span 
class="o">=</span> <span class="n">record</span><span class="o">.</span><span 
class="na">getField</span><span class="o">(</span><span 
class="mi">1</span><span class="o">,</span> <span 
class="n">WritableWrapper</span><span class="o">.</span><span 
class="na">class</span><span class="o">).</span><span 
class="na">value</span><span class="o">();</span>
-    <span class="n">BSONWritable</span> <span class="n">value</span> <span 
class="o">=</span> <span class="o">(</span><span 
class="n">BSONWritable</span><span class="o">)</span> <span 
class="n">valWr</span><span class="o">;</span>
-    <span class="n">Object</span> <span class="n">headers</span> <span 
class="o">=</span> <span class="n">value</span><span class="o">.</span><span 
class="na">getDoc</span><span class="o">().</span><span 
class="na">get</span><span class="o">(</span><span 
class="s">&quot;headers&quot;</span><span class="o">);</span>
-    <span class="n">BasicDBObject</span> <span class="n">headerOb</span> <span 
class="o">=</span> <span class="o">(</span><span 
class="n">BasicDBObject</span><span class="o">)</span> <span 
class="n">headers</span><span class="o">;</span>
-    <span class="n">String</span> <span class="n">date</span> <span 
class="o">=</span> <span class="o">(</span><span class="n">String</span><span 
class="o">)</span> <span class="n">headerOb</span><span class="o">.</span><span 
class="na">get</span><span class="o">(</span><span 
class="s">&quot;Date&quot;</span><span class="o">);</span>
-    <span class="c1">// further date processing</span>
-<span class="o">}</span>
-</code></pre></div>
-<p>Please use the comments if you have questions or if you want to showcase 
your own MongoDB-Stratosphere integration.
-<br><br>
-<small>Written by Robert Metzger (<a 
href="https://twitter.com/rmetzger_";>@rmetzger_</a>).</small></p>
-</div>
-                               <a 
href="/news/2014/01/28/querying_mongodb.html#disqus_thread">Accessing Data 
Stored in MongoDB with Stratosphere</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/01/26/optimizer_plan_visualization_tool.html">Optimizer Plan 
Visualization Tool</a></h2>
-                               <p class="meta">26 Jan 2014</p>
-
-                               <div><p>Stratosphere&#39;s hybrid approach 
combines <strong>MapReduce</strong> and <strong>MPP database</strong> 
techniques. One central part of this approach is to have a <strong>separation 
between the programming (API) and the way programs are executed</strong> 
<em>(execution plans)</em>. The <strong>compiler/optimizer</strong> decides the 
details concerning caching or when to partition/broadcast with a holistic view 
of the program. The same program may actually be executed differently in 
different scenarios (input data of different sizes, different number of 
machines).</p>
-
-<p><strong>If you want to know how exactly the system executes your program, 
you can find it out in two ways</strong>:</p>
-
-<ol>
-<li><p>The <strong>browser-based webclient UI</strong>, which takes programs 
packaged into JARs and draws the execution plan as a visual data flow (check 
out the <a 
href="http://stratosphere.eu/docs/0.4/program_execution/web_interface.html";>documentation</a>
 for details).</p></li>
-<li><p>For <strong>programs using the <a 
href="http://stratosphere.eu/docs/0.4/program_execution/local_executor.html";>Local-
 </a> or <a 
href="http://stratosphere.eu/docs/0.4/program_execution/remote_executor.html";>Remote
 Executor</a></strong>, you can get the optimizer plan using the method 
<code>LocalExecutor.optimizerPlanAsJSON(plan)</code>. The <strong>resulting 
JSON</strong> string describes the execution strategies chosen by the 
optimizer. Naturally, you do not want to parse that yourself, especially for 
longer programs.</p></li>
-</ol>
-
-<p>The builds <em>0.5-SNAPSHOT</em> and later come with a <strong>tool that 
visualizes the JSON</strong> string. It is a standalone version of the 
webclient&#39;s visualization, packed as an html document 
<code>tools/planVisualizer.html</code>.</p>
-
-<p>If you open it in a browser (for example <code>chromium-browser 
tools/planVisualizer.html</code>) it shows a text area where you can paste the 
JSON string and it renders that string as a dataflow plan (assuming it was a 
valid JSON string and plan). The pictures below show how that looks for the <a 
href="https://github.com/stratosphere/stratosphere/blob/release-0.4/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/record/connectedcomponents/WorksetConnectedComponents.java?source=cc";>included
 sample program</a> that uses delta iterations to compute the connected 
components of a graph.</p>
-
-<p><img src="/img/blog/plan_visualizer1.png" style="width:100%;"></p>
-
-<p><img src="/img/blog/plan_visualizer2.png" style="width:100%;"></p>
-</div>
-                               <a 
href="/news/2014/01/26/optimizer_plan_visualization_tool.html#disqus_thread">Optimizer
 Plan Visualization Tool</a>
-                       </article>
-                       
-                       <article>
-                               <h2><a 
href="/news/2014/01/13/stratosphere-release-0.4.html">Stratosphere 0.4 
Released</a></h2>
-                               <p class="meta">13 Jan 2014</p>
-
-                               <div><p>We are pleased to announce that version 
0.4 of the Stratosphere system has been released. </p>
-
-<p>Our team has been working hard during the last few months to create an 
improved and stable Stratosphere version. The new version comes with many new 
features, usability and performance improvements in all levels, including a new 
Scala API for the concise specification of programs, a Pregel-like API, support 
for Yarn clusters, and major performance improvements. The system features now 
first-class support for iterative programs and thus covers traditional 
analytical use cases as well as data mining and graph processing use cases with 
great performance.</p>
-
-<p>In the course of the transition from v0.2 to v0.4 of the system, we have 
changed pre-existing APIs based on valuable user feedback. This means that, in 
the interest of easier programming, we have broken backwards compatibility and 
existing jobs must be adapted, as described in <a 
href="/blog/tutorial/2014/01/12/0.4-migration-guide.html">the migration 
guide</a>.</p>
-
-<p>This article will guide you through the feature list of the new release.</p>
-
-<h3 id="scala-programming-interface">Scala Programming Interface</h3>
-
-<p>The new Stratosphere version comes with a new programming API in Scala that 
supports very fluent and efficient programs that can be expressed with very few 
lines of code. The API uses Scala&#39;s native type system (no special boxed 
data types) and supports grouping and joining on types beyond key/value pairs. 
We use code analysis and code generation to transform Scala&#39;s data model to 
the Stratosphere runtime. Stratosphere Scala programs are optimized before 
execution by Stratosphere&#39;s optimizer just like Stratosphere Java 
programs.</p>
-
-<p>Learn more about the Scala API at the <a 
href="/docs/0.4/programming_guides/scala.html">Scala Programming Guide</a></p>
-
-<h3 id="iterations">Iterations</h3>
-
-<p>Stratosphere v0.4 introduces deep support for iterative algorithms, 
required by a large class of advanced analysis algorithms. In contrast to most 
other systems, &quot;looping over the data&quot; is done inside the 
system&#39;s runtime, rather than in the client. Individual iterations 
(supersteps) can be as fast as sub-second times. Loop-invariant data is 
automatically cached in memory.</p>
-
-<p>We support a special form of iterations called “delta iterations” that 
selectively modify only some elements of intermediate solution in each 
iteration. These are applicable to a variety of applications, e.g., use cases 
of Apache Giraph. We have observed speedups of 70x when using delta iterations 
instead of regular iterations.</p>
-
-<p>Read more about the new iteration feature in <a 
href="/docs/0.4/programming_guides/iterations.html">the documentation</a></p>
-
-<h3 id="hadoop-yarn-support">Hadoop YARN Support</h3>
-
-<p>YARN (Yet Another Resource Negotiator) is the major new feature of the 
recently announced <a href="http://hadoop.apache.org/docs/r2.2.0/";>Hadoop 
2.2</a>. It allows to share existing clusters with different runtimes. So you 
can run MapReduce alongside Storm and others. With the 0.4 release, 
Stratosphere supports YARN.
-Follow <a href="/docs/0.4/setup/yarn.html">our guide</a> on how to start a 
Stratosphere YARN session.</p>
-
-<h3 id="improved-scripting-language-meteor">Improved Scripting Language 
Meteor</h3>
-
-<p>The high-level language Meteor now natively serializes JSON trees for 
greater performance and offers additional operators and file formats. We 
greatly empowered the user to write crispier scripts by adding second-order 
functions, multi-output operators, and other syntactical sugar. For developers 
of Meteor packages, the API is much more comprehensive and allows to define 
custom data types that can be easily embedded in JSON trees through ad-hoc byte 
code generation.</p>
-
-<h3 id="spargel:-pregel-inspired-graph-processing">Spargel: Pregel Inspired 
Graph Processing</h3>
-
-<p>Spargel is a vertex-centric API similar to the interface proposed in 
Google&#39;s Pregel paper and implemented in Apache Giraph. Spargel is 
implemented in 500 lines of code (including comments) on top of 
Stratosphere&#39;s delta iterations feature. This confirms the flexibility of 
Stratosphere&#39;s architecture. </p>
-
-<h3 id="web-frontend">Web Frontend</h3>
-
-<p>Using the new web frontend, you can monitor the progress of Stratosphere 
jobs. For finished jobs, the frontend shows a breakdown of the execution times 
for each operator. The webclient also visualizes the execution strategies 
chosen by the optimizer.</p>
-
-<h3 id="accumulators">Accumulators</h3>
-
-<p>Stratosphere&#39;s accumulators allow program developers to compute simple 
statistics, such as counts, sums, min/max values, or histograms, as a side 
effect of the processing functions. An example application would be to count 
the total number of records/tuples processed by a function. Stratosphere will 
not launch additional tasks (reducers), but will compute the number &quot;on 
the fly&quot; as a side-product of the functions application to the data. The 
concept is similar to Hadoop&#39;s counters, but supports more types of 
aggregation.</p>
-
-<h3 id="refactored-apis">Refactored APIs</h3>
-
-<p>Based on valuable user feedback, we refactored the Java programming 
interface to make it more intuitive and easier to use. The basic concepts are 
still the same, however the naming of most interfaces changed and the structure 
of the code was adapted. When updating to the 0.4 release you will need to 
adapt your jobs and dependencies. A previous blog post has a guide to the 
necessary changes to adapt programs to Stratosphere 0.4.</p>
-
-<h3 id="local-debugging">Local Debugging</h3>
-
-<p>You can now test and debug Stratosphere jobs locally. The <a 
href="/docs/0.4/program_execution/local_executor.html">LocalExecutor</a> allows 
to execute Stratosphere Jobs from IDE&#39;s. The same code that runs on 
clusters also runs in a single JVM multi-threaded. The mode supports full 
debugging capabilities known from regular applications (placing breakpoints and 
stepping through the program&#39;s functions). An advanced mode supports 
simulating fully distributed operation locally.</p>
-
-<h3 id="miscellaneous">Miscellaneous</h3>
-
-<ul>
-<li>The configuration of Stratosphere has been changed to YAML</li>
-<li>HBase support</li>
-<li>JDBC Input format</li>
-<li>Improved Windows Compatibility: Batch-files to start Stratosphere on 
Windows and all unit tests passing on Windows.</li>
-<li>Stratosphere is available in Maven Central and Sonatype Snapshot 
Repository</li>
-<li>Improved build system that supports different Hadoop versions using Maven 
profiles</li>
-<li>Maven Archetypes for Stratosphere Jobs.</li>
-<li>Stability and Usability improvements with many bug fixes.</li>
-</ul>
-
-<h3 id="download-and-get-started-with-stratosphere-v0.4">Download and get 
started with Stratosphere v0.4</h3>
-
-<p>There are several options for getting started with Stratosphere. </p>
-
-<ul>
-<li>Download it on the <a href="/downloads">download page</a></li>
-<li>Start your program with the <a href="/quickstart/">Quick-start 
guides</a>.</li>
-<li>Complete <a href="/docs/0.4/">documentation and set-up guides</a></li>
-</ul>
-
-<h3 id="tell-us-what-you-think!">Tell us what you think!</h3>
-
-<p>Are you using, or planning to use Stratosphere? Sign up in our <a 
href="https://groups.google.com/forum/#!forum/stratosphere-dev";>mailing 
list</a> and drop us a line.</p>
-
-<p>Have you found a bug? <a 
href="https://github.com/stratosphere/stratosphere";>Post an issue</a> on 
GitHub.</p>
-
-<p>Follow us on <a href="https://twitter.com/stratosphere_eu";>Twitter</a> and 
<a href="https://github.com/stratosphere/stratosphere";>GitHub</a> to stay in 
touch with the latest news!</p>
-</div>
-                               <a 
href="/news/2014/01/13/stratosphere-release-0.4.html#disqus_thread">Stratosphere
 0.4 Released</a>
-                       </article>
-                       
-               </div>
-               <div class="col-md-2"></div>
-       </div>
-</div>
-
-<script type="text/javascript">
-/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
-var disqus_shortname = 'stratosphere-eu'; // required: replace example with 
your forum shortname
-
-/* * * DON'T EDIT BELOW THIS LINE * * */
-(function () {
-    var s = document.createElement('script'); s.async = true;
-    s.type = 'text/javascript';
-    s.src = '//' + disqus_shortname + '.disqus.com/count.js';
-    (document.getElementsByTagName('HEAD')[0] || 
document.getElementsByTagName('BODY')[0]).appendChild(s);
-}());
-</script>
-
-
-
-<!-- Pagination links -->
-<ul class="pager">
-       <li>
-       
-               <a href="/blog" class="previous">Previous</a>
-       
-       </li>
-       <li>
-               <span class="page_number ">Page: 2 of 3</span>
-       </li>
-       <li>
-       
-               <a href="/blog/page3" class="next">Next</a>
-       
-       </li>
-</ul>
-
-
-    </div>
-    <!--<section id="af-upfooter" class="af-section">
-       <div class="container">
-               <p>Apache Flink is an effort undergoing incubation at The Apache
-                       Software Foundation (ASF), sponsored by the Apache 
Incubator PMC.
-                       Incubation is required of all newly accepted projects 
until a further
-                       review indicates that the infrastructure, 
communications, and
-                       decision making process have stabilized in a manner 
consistent with
-                       other successful ASF projects. While incubation status 
is not
-                       necessarily a reflection of the completeness or 
stability of the
-                       code, it does indicate that the project has yet to be 
fully endorsed
-                       by the ASF.</p>
-               <a href="http://incubator.apache.org";> <img 
class="img-responsive"
-                       src="/img/main/apache-incubator-logo.png" alt="Apache 
Flink" />
-               </a>
-               <p class="text-center">
-                       <a href="/privacy-policy.html" title="Privacy Policy"
-                               class="af-privacy-policy">Privacy Policy</a>
-               </p>
-       </div>
-</section>-->
-
-<footer id="af-footer">
-       <div class="container">
-               <div class="row">
-                       <div class="col-md-3">
-                               <h3>Documentation</h3>
-                               <ul class="af-footer-menu">
-
-                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/";>0.8.1</a></li>
-                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/api/java/";>0.8.1
 Javadocs</a></li>
-                                       <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.8/api/scala/index.html#org.apache.flink.api.scala.package";>0.8.1
 Scaladocs</a></li>
-                               </ul>
-                       </div>
-                       <div class="col-md-3">
-                               <h3>Community</h3>
-                               <ul class="af-footer-menu">
-                                       <li><a 
href="/community.html#mailing-lists">Mailing Lists</a></li>
-                                       <li><a 
href="https://issues.apache.org/jira/browse/FLINK";
-                                               target="blank">Issues <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a 
href="/community.html#team">Team</a></li>
-                                       <li><a 
href="/how-to-contribute.html">How to contribute</a></li>
-                                       <li><a 
href="/coding_guidelines.html">Coding Guidelines</a></li>
-                               </ul>
-                       </div>
-                       <div class="col-md-3">
-                               <h3>ASF</h3>
-                               <ul class="af-footer-menu">
-                                       <li><a href="http://www.apache.org/"; 
target="blank">Apache
-                                                       Software foundation 
<span class="glyphicon glyphicon-new-window"></span>
-                                       </a></li>
-                                       <li><a
-                                               
href="http://www.apache.org/foundation/how-it-works.html";
-                                               target="blank">How it works 
<span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a 
href="http://www.apache.org/foundation/thanks.html";
-                                               target="blank">Thanks <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a
-                                               
href="http://www.apache.org/foundation/sponsorship.html";
-                                               target="blank">Become a sponsor 
<span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a 
href="http://incubator.apache.org/projects/flink.html";
-                                               target="blank">Incubation 
status page <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                               </ul>
-                       </div>
-                       <div class="col-md-3">
-                               <h3>Project</h3>
-                               <ul class="af-footer-menu">
-                                       <li><a href="/material.html" 
target="blank">Material <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a
-                                               
href="https://cwiki.apache.org/confluence/display/FLINK";
-                                               target="blank">Wiki <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a
-                                               
href="https://wiki.apache.org/incubator/StratosphereProposal";
-                                               target="blank">Incubator 
proposal <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a 
href="http://www.apache.org/licenses/LICENSE-2.0";
-                                               target="blank">License <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                                       <li><a 
href="https://github.com/apache/incubator-flink";
-                                               target="blank">Source code <span
-                                                       class="glyphicon 
glyphicon-new-window"></span></a></li>
-                               </ul>
-                       </div>
-               </div>
-       </div>
-       <div class="af-footer-bar">
-               <div class="container">
-                 <p>Copyright &copy 2014-2015, <a 
href="http://www.apache.org";>The Apache Software Foundation</a>. All Rights 
Reserved. Apache and the Apache feather logo are trademarks of the Apache 
Software Foundation.
-                  </p>
-                  <div>
-                    <div style="float:left">
-                      <p>
-                        <a href="/privacy-policy.html" title="Privacy Policy" 
class="af-privacy-policy">Privacy Policy</a>
-                    </p>
-                    </div>
-                    <div style="float:right">
-                    <p>
-                      <a href="/blog/feed.xml" class="af-privacy-policy">RSS 
Feed</a>
-                    </p>
-                    </div>
-                   </div>
-               </div>
-       </div>
-</footer>
-
-    <!-- Google Analytics -->
-    <script>
-      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
-      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-
-      ga('create', 'UA-52545728-1', 'auto');
-      ga('send', 'pageview');
-    </script>
-    <script src="/js/main/jquery.mobile.events.min.js"></script>
-    <script src="/js/main/main.js"></script>
-  </body>
-</html>

Reply via email to