http://git-wip-us.apache.org/repos/asf/flink-web/blob/24f3ba5a/content/news/2014/01/12/0.4-migration-guide.html ---------------------------------------------------------------------- diff --git a/content/news/2014/01/12/0.4-migration-guide.html b/content/news/2014/01/12/0.4-migration-guide.html deleted file mode 100644 index 876d022..0000000 --- a/content/news/2014/01/12/0.4-migration-guide.html +++ /dev/null @@ -1,286 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Stratosphere Version 0.4 Migration Guide</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - - <!-- Overview --> - <li><a href="/index.html">Overview</a></li> - - <!-- Features --> - <li><a href="/features.html">Features</a></li> - - <!-- Downloads --> - <li><a href="/downloads.html">Downloads</a></li> - - <!-- FAQ --> - <li><a href="/faq.html">FAQ</a></li> - - - <!-- Quickstart --> - <li class="dropdown"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/setup_quickstart.html">Setup</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/run_example_quickstart.html">Example: Wikipedia Edit Stream</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/java_api_quickstart.html">Java API</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/scala_api_quickstart.html">Scala API</a></li> - </ul> - </li> - - <!-- Documentation --> - <li class="dropdown"> - <a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Documentation <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Latest stable release --> - <li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0">1.0 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/java" class="active">1.0 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/scala/index.html" class="active">1.0 ScalaDocs</a></li> - - <!-- Snapshot docs --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master">1.1 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">1.1 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">1.1 ScalaDocs</a></li> - - <!-- Wiki --> - <li class="divider"></li> - <li><a href="/visualizer/"><small><span class="glyphicon glyphicon-new-window"></span></small> Plan Visualizer</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - - </ul> - - <ul class="nav navbar-nav navbar-right"> - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Community --> - <li role="presentation" class="dropdown-header"><strong>Community</strong></li> - <li><a href="/community.html#mailing-lists">Mailing Lists</a></li> - <li><a href="/community.html#irc">IRC</a></li> - <li><a href="/community.html#stack-overflow">Stack Overflow</a></li> - <li><a href="/community.html#issue-tracker">Issue Tracker</a></li> - <li><a href="/community.html#third-party-packages">Third Party Packages</a></li> - <li><a href="/community.html#source-code">Source Code</a></li> - <li><a href="/community.html#people">People</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Powered+by+Flink"><small><span class="glyphicon glyphicon-new-window"></span></small> Powered by Flink</a></li> - - <!-- Contribute --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - <li><a href="/contribute-code.html">Contribute Code</a></li> - <li><a href="/contribute-documentation.html">Contribute Documentation</a></li> - <li><a href="/improve-website.html">Improve the Website</a></li> - </ul> - </li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Project --> - <li role="presentation" class="dropdown-header"><strong>Project</strong></li> - <li><a href="/slides.html">Slides</a></li> - <li><a href="/material.html">Material</a></li> - <li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li> - <li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - </ul> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-8 col-sm-offset-2"> - <div class="row"> - <h1>Stratosphere Version 0.4 Migration Guide</h1> - - <article> - <p>12 Jan 2014</p> - -<p>This guide is intended to help users of previous Stratosphere versions to migrate their programs to the new API of v0.4.</p> - -<p>Version <code>0.4-rc1</code>, <code>0.4</code> and all newer versions have the new API. If you want to have the most recent version before the code change, please set the version to <code>0.4-alpha.3-SNAPSHOT</code>. (Note that the <code>0.4-alpha</code> versions are only available in the snapshot repository).</p> - -<h4 id="maven-dependencies">Maven Dependencies</h4> -<p>Since we also reorganized the Maven project structure, existing programs need to update the Maven dependencies to <code>stratosphere-java</code> (and <code>stratosphere-clients</code>, for examples and executors).</p> - -<p>The typical set of Maven dependencies for Stratosphere Java programs is:</p> - -<div class="highlight"><pre><code class="language-diff"> <groupId>eu.stratosphere</groupId> -<span class="gd">- <artifactId>pact-common</artifactId></span> -<span class="gd">- <version>0.4-SNAPSHOT</version></span> -<span class="gi">+ <artifactId>stratosphere-java</artifactId></span> -<span class="gi">+ <version>0.4</version></span> - -<span class="gd">- <artifactId>pact-clients</artifactId></span> -<span class="gd">- <version>0.4-SNAPSHOT</version></span> -<span class="gi">+ <artifactId>stratosphere-clients</artifactId></span> -<span class="gi">+ <version>0.4</version></span></code></pre></div> - -<h4 id="renamed-classes">Renamed classes</h4> - -<p>We renamed many of the most commonly used classes to make their names more intuitive:</p> - -<table class="table table-striped"> - <thead> - <tr> - <th>Old Name (before <code>0.4</code>)</th> - <th>New Name (<code>0.4</code> and after)</th> - </tr> - </thead> - <tbody> - <tr> - <td>Contract</td> - <td>Operator</td> - </tr> - <tr> - <td>MatchContract</td> - <td>JoinOperator</td> - </tr> - - <tr> - <td>[Map, Reduce, ...]Stub</td> - <td>[Map, Reduce, ...]Function</td> - </tr> - <tr> - <td>MatchStub</td> - <td>JoinFunction</td> - </tr> - <tr> - <td>Pact[Integer, Double, ...]</td> - <td>IntValue, DoubleValue, ...</td> - </tr> - <tr> - <td>PactRecord</td> - <td>Record</td> - </tr> - <tr> - <td>PlanAssembler</td> - <td>Program</td> - </tr> - <tr> - <td>PlanAssemblerDescription</td> - <td>ProgramDescription</td> - </tr> - <tr> - <td>RecordOutputFormat</td> - <td>CsvOutputFormat</td> - </tr> - </tbody> -</table> - -<p>Package names have been adapted as well. -For a complete overview of the renamings, have a look at <a href="https://github.com/stratosphere/stratosphere/issues/257">issue #257 on GitHub</a>.</p> - -<p>We suggest for Eclipse user adjust the programs as follows: Delete all old Stratosphere imports, then rename the the classes (<code>PactRecord</code> to <code>Record</code> and so on). Finally, use the âOrganize Importsâ function (<code>CTRL+SHIFT+O</code>) to choose the right imports. The names should be unique so always pick the classes that are in the <code>eu.stratosphere</code> package.</p> - -<p>Please contact us in the comments below, on the mailing list or on GitHub if you have any issues migrating to the latest Stratosphere release.</p> - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - - <hr /> - <div class="footer text-center"> - <p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html>
http://git-wip-us.apache.org/repos/asf/flink-web/blob/24f3ba5a/content/news/2014/01/13/stratosphere-release-0.4.html ---------------------------------------------------------------------- diff --git a/content/news/2014/01/13/stratosphere-release-0.4.html b/content/news/2014/01/13/stratosphere-release-0.4.html deleted file mode 100644 index 4021fcd..0000000 --- a/content/news/2014/01/13/stratosphere-release-0.4.html +++ /dev/null @@ -1,279 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Stratosphere 0.4 Released</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - - <!-- Overview --> - <li><a href="/index.html">Overview</a></li> - - <!-- Features --> - <li><a href="/features.html">Features</a></li> - - <!-- Downloads --> - <li><a href="/downloads.html">Downloads</a></li> - - <!-- FAQ --> - <li><a href="/faq.html">FAQ</a></li> - - - <!-- Quickstart --> - <li class="dropdown"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/setup_quickstart.html">Setup</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/run_example_quickstart.html">Example: Wikipedia Edit Stream</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/java_api_quickstart.html">Java API</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/scala_api_quickstart.html">Scala API</a></li> - </ul> - </li> - - <!-- Documentation --> - <li class="dropdown"> - <a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Documentation <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Latest stable release --> - <li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0">1.0 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/java" class="active">1.0 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/scala/index.html" class="active">1.0 ScalaDocs</a></li> - - <!-- Snapshot docs --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master">1.1 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">1.1 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">1.1 ScalaDocs</a></li> - - <!-- Wiki --> - <li class="divider"></li> - <li><a href="/visualizer/"><small><span class="glyphicon glyphicon-new-window"></span></small> Plan Visualizer</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - - </ul> - - <ul class="nav navbar-nav navbar-right"> - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Community --> - <li role="presentation" class="dropdown-header"><strong>Community</strong></li> - <li><a href="/community.html#mailing-lists">Mailing Lists</a></li> - <li><a href="/community.html#irc">IRC</a></li> - <li><a href="/community.html#stack-overflow">Stack Overflow</a></li> - <li><a href="/community.html#issue-tracker">Issue Tracker</a></li> - <li><a href="/community.html#third-party-packages">Third Party Packages</a></li> - <li><a href="/community.html#source-code">Source Code</a></li> - <li><a href="/community.html#people">People</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Powered+by+Flink"><small><span class="glyphicon glyphicon-new-window"></span></small> Powered by Flink</a></li> - - <!-- Contribute --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - <li><a href="/contribute-code.html">Contribute Code</a></li> - <li><a href="/contribute-documentation.html">Contribute Documentation</a></li> - <li><a href="/improve-website.html">Improve the Website</a></li> - </ul> - </li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Project --> - <li role="presentation" class="dropdown-header"><strong>Project</strong></li> - <li><a href="/slides.html">Slides</a></li> - <li><a href="/material.html">Material</a></li> - <li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li> - <li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - </ul> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-8 col-sm-offset-2"> - <div class="row"> - <h1>Stratosphere 0.4 Released</h1> - - <article> - <p>13 Jan 2014</p> - -<p>We are pleased to announce that version 0.4 of the Stratosphere system has been released. </p> - -<p>Our team has been working hard during the last few months to create an improved and stable Stratosphere version. The new version comes with many new features, usability and performance improvements in all levels, including a new Scala API for the concise specification of programs, a Pregel-like API, support for Yarn clusters, and major performance improvements. The system features now first-class support for iterative programs and thus covers traditional analytical use cases as well as data mining and graph processing use cases with great performance.</p> - -<p>In the course of the transition from v0.2 to v0.4 of the system, we have changed pre-existing APIs based on valuable user feedback. This means that, in the interest of easier programming, we have broken backwards compatibility and existing jobs must be adapted, as described in <a href="/blog/tutorial/2014/01/12/0.4-migration-guide.html">the migration guide</a>.</p> - -<p>This article will guide you through the feature list of the new release.</p> - -<h3 id="scala-programming-interface">Scala Programming Interface</h3> -<p>The new Stratosphere version comes with a new programming API in Scala that supports very fluent and efficient programs that can be expressed with very few lines of code. The API uses Scalaâs native type system (no special boxed data types) and supports grouping and joining on types beyond key/value pairs. We use code analysis and code generation to transform Scalaâs data model to the Stratosphere runtime. Stratosphere Scala programs are optimized before execution by Stratosphereâs optimizer just like Stratosphere Java programs.</p> - -<p>Learn more about the Scala API at the <a href="/docs/0.4/programming_guides/scala.html">Scala Programming Guide</a></p> - -<h3 id="iterations">Iterations</h3> -<p>Stratosphere v0.4 introduces deep support for iterative algorithms, required by a large class of advanced analysis algorithms. In contrast to most other systems, âlooping over the dataâ is done inside the systemâs runtime, rather than in the client. Individual iterations (supersteps) can be as fast as sub-second times. Loop-invariant data is automatically cached in memory.</p> - -<p>We support a special form of iterations called âdelta iterationsâ that selectively modify only some elements of intermediate solution in each iteration. These are applicable to a variety of applications, e.g., use cases of Apache Giraph. We have observed speedups of 70x when using delta iterations instead of regular iterations.</p> - -<p>Read more about the new iteration feature in <a href="/docs/0.4/programming_guides/iterations.html">the documentation</a></p> - -<h3 id="hadoop-yarn-support">Hadoop YARN Support</h3> -<p>YARN (Yet Another Resource Negotiator) is the major new feature of the recently announced <a href="http://hadoop.apache.org/docs/r2.2.0/">Hadoop 2.2</a>. It allows to share existing clusters with different runtimes. So you can run MapReduce alongside Storm and others. With the 0.4 release, Stratosphere supports YARN. -Follow <a href="/docs/0.4/setup/yarn.html">our guide</a> on how to start a Stratosphere YARN session.</p> - -<h3 id="improved-scripting-language-meteor">Improved Scripting Language Meteor</h3> -<p>The high-level language Meteor now natively serializes JSON trees for greater performance and offers additional operators and file formats. We greatly empowered the user to write crispier scripts by adding second-order functions, multi-output operators, and other syntactical sugar. For developers of Meteor packages, the API is much more comprehensive and allows to define custom data types that can be easily embedded in JSON trees through ad-hoc byte code generation.</p> - -<h3 id="spargel-pregel-inspired-graph-processing">Spargel: Pregel Inspired Graph Processing</h3> -<p>Spargel is a vertex-centric API similar to the interface proposed in Googleâs Pregel paper and implemented in Apache Giraph. Spargel is implemented in 500 lines of code (including comments) on top of Stratosphereâs delta iterations feature. This confirms the flexibility of Stratosphereâs architecture. </p> - -<h3 id="web-frontend">Web Frontend</h3> -<p>Using the new web frontend, you can monitor the progress of Stratosphere jobs. For finished jobs, the frontend shows a breakdown of the execution times for each operator. The webclient also visualizes the execution strategies chosen by the optimizer.</p> - -<h3 id="accumulators">Accumulators</h3> -<p>Stratosphereâs accumulators allow program developers to compute simple statistics, such as counts, sums, min/max values, or histograms, as a side effect of the processing functions. An example application would be to count the total number of records/tuples processed by a function. Stratosphere will not launch additional tasks (reducers), but will compute the number âon the flyâ as a side-product of the functions application to the data. The concept is similar to Hadoopâs counters, but supports more types of aggregation.</p> - -<h3 id="refactored-apis">Refactored APIs</h3> -<p>Based on valuable user feedback, we refactored the Java programming interface to make it more intuitive and easier to use. The basic concepts are still the same, however the naming of most interfaces changed and the structure of the code was adapted. When updating to the 0.4 release you will need to adapt your jobs and dependencies. A previous blog post has a guide to the necessary changes to adapt programs to Stratosphere 0.4.</p> - -<h3 id="local-debugging">Local Debugging</h3> -<p>You can now test and debug Stratosphere jobs locally. The <a href="/docs/0.4/program_execution/local_executor.html">LocalExecutor</a> allows to execute Stratosphere Jobs from IDEâs. The same code that runs on clusters also runs in a single JVM multi-threaded. The mode supports full debugging capabilities known from regular applications (placing breakpoints and stepping through the programâs functions). An advanced mode supports simulating fully distributed operation locally.</p> - -<h3 id="miscellaneous">Miscellaneous</h3> - -<ul> - <li>The configuration of Stratosphere has been changed to YAML</li> - <li>HBase support</li> - <li>JDBC Input format</li> - <li>Improved Windows Compatibility: Batch-files to start Stratosphere on Windows and all unit tests passing on Windows.</li> - <li>Stratosphere is available in Maven Central and Sonatype Snapshot Repository</li> - <li>Improved build system that supports different Hadoop versions using Maven profiles</li> - <li>Maven Archetypes for Stratosphere Jobs.</li> - <li>Stability and Usability improvements with many bug fixes.</li> -</ul> - -<h3 id="download-and-get-started-with-stratosphere-v04">Download and get started with Stratosphere v0.4</h3> -<p>There are several options for getting started with Stratosphere. </p> - -<ul> - <li>Download it on the <a href="/downloads">download page</a></li> - <li>Start your program with the <a href="/quickstart/">Quick-start guides</a>.</li> - <li>Complete <a href="/docs/0.4/">documentation and set-up guides</a></li> -</ul> - -<h3 id="tell-us-what-you-think">Tell us what you think!</h3> -<p>Are you using, or planning to use Stratosphere? Sign up in our <a href="https://groups.google.com/forum/#!forum/stratosphere-dev">mailing list</a> and drop us a line.</p> - -<p>Have you found a bug? <a href="https://github.com/stratosphere/stratosphere">Post an issue</a> on GitHub.</p> - -<p>Follow us on <a href="https://twitter.com/stratosphere_eu">Twitter</a> and <a href="https://github.com/stratosphere/stratosphere">GitHub</a> to stay in touch with the latest news!</p> - - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - - <hr /> - <div class="footer text-center"> - <p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/24f3ba5a/content/news/2014/01/26/optimizer_plan_visualization_tool.html ---------------------------------------------------------------------- diff --git a/content/news/2014/01/26/optimizer_plan_visualization_tool.html b/content/news/2014/01/26/optimizer_plan_visualization_tool.html deleted file mode 100644 index bb0003f..0000000 --- a/content/news/2014/01/26/optimizer_plan_visualization_tool.html +++ /dev/null @@ -1,228 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Optimizer Plan Visualization Tool</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - - <!-- Overview --> - <li><a href="/index.html">Overview</a></li> - - <!-- Features --> - <li><a href="/features.html">Features</a></li> - - <!-- Downloads --> - <li><a href="/downloads.html">Downloads</a></li> - - <!-- FAQ --> - <li><a href="/faq.html">FAQ</a></li> - - - <!-- Quickstart --> - <li class="dropdown"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/setup_quickstart.html">Setup</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/run_example_quickstart.html">Example: Wikipedia Edit Stream</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/java_api_quickstart.html">Java API</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/scala_api_quickstart.html">Scala API</a></li> - </ul> - </li> - - <!-- Documentation --> - <li class="dropdown"> - <a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Documentation <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Latest stable release --> - <li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0">1.0 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/java" class="active">1.0 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/scala/index.html" class="active">1.0 ScalaDocs</a></li> - - <!-- Snapshot docs --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master">1.1 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">1.1 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">1.1 ScalaDocs</a></li> - - <!-- Wiki --> - <li class="divider"></li> - <li><a href="/visualizer/"><small><span class="glyphicon glyphicon-new-window"></span></small> Plan Visualizer</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - - </ul> - - <ul class="nav navbar-nav navbar-right"> - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Community --> - <li role="presentation" class="dropdown-header"><strong>Community</strong></li> - <li><a href="/community.html#mailing-lists">Mailing Lists</a></li> - <li><a href="/community.html#irc">IRC</a></li> - <li><a href="/community.html#stack-overflow">Stack Overflow</a></li> - <li><a href="/community.html#issue-tracker">Issue Tracker</a></li> - <li><a href="/community.html#third-party-packages">Third Party Packages</a></li> - <li><a href="/community.html#source-code">Source Code</a></li> - <li><a href="/community.html#people">People</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Powered+by+Flink"><small><span class="glyphicon glyphicon-new-window"></span></small> Powered by Flink</a></li> - - <!-- Contribute --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - <li><a href="/contribute-code.html">Contribute Code</a></li> - <li><a href="/contribute-documentation.html">Contribute Documentation</a></li> - <li><a href="/improve-website.html">Improve the Website</a></li> - </ul> - </li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Project --> - <li role="presentation" class="dropdown-header"><strong>Project</strong></li> - <li><a href="/slides.html">Slides</a></li> - <li><a href="/material.html">Material</a></li> - <li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li> - <li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - </ul> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-8 col-sm-offset-2"> - <div class="row"> - <h1>Optimizer Plan Visualization Tool</h1> - - <article> - <p>26 Jan 2014</p> - -<p>Stratosphereâs hybrid approach combines <strong>MapReduce</strong> and <strong>MPP database</strong> techniques. One central part of this approach is to have a <strong>separation between the programming (API) and the way programs are executed</strong> <em>(execution plans)</em>. The <strong>compiler/optimizer</strong> decides the details concerning caching or when to partition/broadcast with a holistic view of the program. The same program may actually be executed differently in different scenarios (input data of different sizes, different number of machines).</p> - -<p><strong>If you want to know how exactly the system executes your program, you can find it out in two ways</strong>:</p> - -<ol> - <li> - <p>The <strong>browser-based webclient UI</strong>, which takes programs packaged into JARs and draws the execution plan as a visual data flow (check out the <a href="http://stratosphere.eu/docs/0.4/program_execution/web_interface.html">documentation</a> for details).</p> - </li> - <li> - <p>For <strong>programs using the <a href="http://stratosphere.eu/docs/0.4/program_execution/local_executor.html">Local- </a> or [Remote Executor] (http://stratosphere.eu/docs/0.4/program_execution/remote_executor.html)</strong>, you can get the optimizer plan using the method <code>LocalExecutor.optimizerPlanAsJSON(plan)</code>. The <strong>resulting JSON</strong> string describes the execution strategies chosen by the optimizer. Naturally, you do not want to parse that yourself, especially for longer programs.</p> - </li> -</ol> - -<p>The builds <em>0.5-SNAPSHOT</em> and later come with a <strong>tool that visualizes the JSON</strong> string. It is a standalone version of the webclientâs visualization, packed as an html document <code>tools/planVisualizer.html</code>.</p> - -<p>If you open it in a browser (for example <code>chromium-browser tools/planVisualizer.html</code>) it shows a text area where you can paste the JSON string and it renders that string as a dataflow plan (assuming it was a valid JSON string and plan). The pictures below show how that looks for the <a href="https://github.com/stratosphere/stratosphere/blob/release-0.4/stratosphere-examples/stratosphere-java-examples/src/main/java/eu/stratosphere/example/java/record/connectedcomponents/WorksetConnectedComponents.java?source=cc">included sample program</a> that uses delta iterations to compute the connected components of a graph.</p> - -<p><img src="/img/blog/plan_visualizer1.png" style="width:100%;" /></p> - -<p><img src="/img/blog/plan_visualizer2.png" style="width:100%;" /></p> - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - - <hr /> - <div class="footer text-center"> - <p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/24f3ba5a/content/news/2014/01/28/querying_mongodb.html ---------------------------------------------------------------------- diff --git a/content/news/2014/01/28/querying_mongodb.html b/content/news/2014/01/28/querying_mongodb.html deleted file mode 100644 index 19708b0..0000000 --- a/content/news/2014/01/28/querying_mongodb.html +++ /dev/null @@ -1,301 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Accessing Data Stored in MongoDB with Stratosphere</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - - <!-- Overview --> - <li><a href="/index.html">Overview</a></li> - - <!-- Features --> - <li><a href="/features.html">Features</a></li> - - <!-- Downloads --> - <li><a href="/downloads.html">Downloads</a></li> - - <!-- FAQ --> - <li><a href="/faq.html">FAQ</a></li> - - - <!-- Quickstart --> - <li class="dropdown"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/setup_quickstart.html">Setup</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/run_example_quickstart.html">Example: Wikipedia Edit Stream</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/java_api_quickstart.html">Java API</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/quickstart/scala_api_quickstart.html">Scala API</a></li> - </ul> - </li> - - <!-- Documentation --> - <li class="dropdown"> - <a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"><small><span class="glyphicon glyphicon-new-window"></span></small> Documentation <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Latest stable release --> - <li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0">1.0 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/java" class="active">1.0 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.0/api/scala/index.html" class="active">1.0 ScalaDocs</a></li> - - <!-- Snapshot docs --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master">1.1 Documentation</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">1.1 Javadocs</a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">1.1 ScalaDocs</a></li> - - <!-- Wiki --> - <li class="divider"></li> - <li><a href="/visualizer/"><small><span class="glyphicon glyphicon-new-window"></span></small> Plan Visualizer</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - - </ul> - - <ul class="nav navbar-nav navbar-right"> - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Community --> - <li role="presentation" class="dropdown-header"><strong>Community</strong></li> - <li><a href="/community.html#mailing-lists">Mailing Lists</a></li> - <li><a href="/community.html#irc">IRC</a></li> - <li><a href="/community.html#stack-overflow">Stack Overflow</a></li> - <li><a href="/community.html#issue-tracker">Issue Tracker</a></li> - <li><a href="/community.html#third-party-packages">Third Party Packages</a></li> - <li><a href="/community.html#source-code">Source Code</a></li> - <li><a href="/community.html#people">People</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Powered+by+Flink"><small><span class="glyphicon glyphicon-new-window"></span></small> Powered by Flink</a></li> - - <!-- Contribute --> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - <li><a href="/contribute-code.html">Contribute Code</a></li> - <li><a href="/contribute-documentation.html">Contribute Documentation</a></li> - <li><a href="/improve-website.html">Improve the Website</a></li> - </ul> - </li> - - <li class="dropdown hidden-md hidden-sm"> - <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <!-- Project --> - <li role="presentation" class="dropdown-header"><strong>Project</strong></li> - <li><a href="/slides.html">Slides</a></li> - <li><a href="/material.html">Material</a></li> - <li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li> - <li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li> - <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> - </ul> - </li> - </ul> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-8 col-sm-offset-2"> - <div class="row"> - <h1>Accessing Data Stored in MongoDB with Stratosphere</h1> - - <article> - <p>28 Jan 2014 by Robert Metzger (<a href="https://twitter.com/rmetzger_">@rmetzger_</a>)</p> - -<p>We recently merged a <a href="https://github.com/stratosphere/stratosphere/pull/437">pull request</a> that allows you to use any existing Hadoop <a href="http://developer.yahoo.com/hadoop/tutorial/module5.html#inputformat">InputFormat</a> with Stratosphere. So you can now (in the <code>0.5-SNAPSHOT</code> and upwards versions) define a Hadoop-based data source:</p> - -<div class="highlight"><pre><code class="language-java"><span class="n">HadoopDataSource</span> <span class="n">source</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">HadoopDataSource</span><span class="o">(</span><span class="k">new</span> <span class="nf">TextInputFormat</span><span class="o">(),</span> <span class="k">new</span> <span class="nf">JobConf</span><span class="o">(),</span> <span class="s">"Input Lines"</span><span class="o">);</span> -<span class="n">TextInputFormat</span><span class="o">.</span><span class="na">addInputPath</span><span class="o">(</span><span class="n">source</span><span class="o">.</span><span class="na">getJobConf</span><span class="o">(),</span> <span class="k">new</span> <span class="nf">Path</span><span class="o">(</span><span class="n">dataInput</span><span class="o">));</span></code></pre></div> - -<p>We describe in the following article how to access data stored in <a href="http://www.mongodb.org/">MongoDB</a> with Stratosphere. This allows users to join data from multiple sources (e.g. MonogDB and HDFS) or perform machine learning with the documents stored in MongoDB.</p> - -<p>The approach here is to use the <code>MongoInputFormat</code> that was developed for Apache Hadoop but now also runs with Stratosphere.</p> - -<div class="highlight"><pre><code class="language-java"><span class="n">JobConf</span> <span class="n">conf</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">JobConf</span><span class="o">();</span> -<span class="n">conf</span><span class="o">.</span><span class="na">set</span><span class="o">(</span><span class="s">"mongo.input.uri"</span><span class="o">,</span><span class="s">"mongodb://localhost:27017/enron_mail.messages"</span><span class="o">);</span> -<span class="n">HadoopDataSource</span> <span class="n">src</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">HadoopDataSource</span><span class="o">(</span><span class="k">new</span> <span class="nf">MongoInputFormat</span><span class="o">(),</span> <span class="n">conf</span><span class="o">,</span> <span class="s">"Read from Mongodb"</span><span class="o">,</span> <span class="k">new</span> <span class="nf">WritableWrapperConverter</span><span class="o">());</span></code></pre></div> - -<h3 id="example-program">Example Program</h3> -<p>The example program reads data from the <a href="http://www.cs.cmu.edu/~enron/">enron dataset</a> that contains about 500k internal e-mails. The data is stored in MongoDB and the Stratosphere program counts the number of e-mails per day.</p> - -<p>The complete code of this sample program is available on <a href="https://github.com/stratosphere/stratosphere-mongodb-example">GitHub</a>.</p> - -<h4 id="prepare-mongodb-and-the-data">Prepare MongoDB and the Data</h4> - -<ul> - <li>Install MongoDB</li> - <li>Download the enron dataset from <a href="http://mongodb-enron-email.s3-website-us-east-1.amazonaws.com/">their website</a>.</li> - <li>Unpack and load it</li> -</ul> - -<p><code>bash - bunzip2 enron_mongo.tar.bz2 - tar xvf enron_mongo.tar - mongorestore dump/enron_mail/messages.bson -</code></p> - -<p>We used <a href="http://robomongo.org/">Robomongo</a> to visually examine the dataset stored in MongoDB.</p> - -<p><img src="/img/blog/robomongo.png" style="width:90%;margin:15px" /></p> - -<h4 id="build-mongoinputformat">Build <code>MongoInputFormat</code></h4> - -<p>MongoDB offers an InputFormat for Hadoop on their <a href="https://github.com/mongodb/mongo-hadoop">GitHub page</a>. The code is not available in any Maven repository, so we have to build the jar file on our own.</p> - -<ul> - <li>Check out the repository</li> -</ul> - -<div class="highlight"><pre><code>git clone https://github.com/mongodb/mongo-hadoop.git -cd mongo-hadoop -</code></pre></div> - -<ul> - <li>Set the appropriate Hadoop version in the <code>build.sbt</code>, we used <code>1.1</code>.</li> -</ul> - -<div class="highlight"><pre><code class="language-bash">hadoopRelease in ThisBuild :<span class="o">=</span> <span class="s2">"1.1"</span></code></pre></div> -<ul> - <li>Build the input format</li> -</ul> - -<div class="highlight"><pre><code class="language-bash">./sbt package</code></pre></div> - -<p>The jar-file is now located in <code>core/target</code>.</p> - -<h4 id="the-stratosphere-program">The Stratosphere Program</h4> - -<p>Now we have everything prepared to run the Stratosphere program. I only ran it on my local computer, out of Eclipse. To do that, check out the code â¦</p> - -<div class="highlight"><pre><code class="language-bash">git clone https://github.com/stratosphere/stratosphere-mongodb-example.git</code></pre></div> - -<p>⦠and import it as a Maven project into your Eclipse. You have to manually add the previously built mongo-hadoop jar-file as a dependency. -You can now press the âRunâ button and see how Stratosphere executes the little program. It was running for about 8 seconds on the 1.5 GB dataset.</p> - -<p>The result (located in <code>/tmp/enronCountByDay</code>) now looks like this.</p> - -<div class="highlight"><pre><code>11,Fri Sep 26 10:00:00 CEST 1997 -154,Tue Jun 29 10:56:00 CEST 1999 -292,Tue Aug 10 12:11:00 CEST 1999 -185,Thu Aug 12 18:35:00 CEST 1999 -26,Fri Mar 19 12:33:00 CET 1999 -</code></pre></div> - -<p>There is one thing left I want to point out here. MongoDB represents objects stored in the database as JSON-documents. Since Stratosphereâs standard types do not support JSON documents, I was using the <code>WritableWrapper</code> here. This wrapper allows to use any Hadoop datatype with Stratosphere.</p> - -<p>The following code example shows how the JSON-documents are accessed in Stratosphere.</p> - -<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kt">void</span> <span class="nf">map</span><span class="o">(</span><span class="n">Record</span> <span class="n">record</span><span class="o">,</span> <span class="n">Collector</span><span class="o"><</span><span class="n">Record</span><span class="o">></span> <span class="n">out</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span> - <span class="n">Writable</span> <span class="n">valWr</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="na">getField</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span> <span class="n">WritableWrapper</span><span class="o">.</span><span class="na">class</span><span class="o">).</span><span class="na">value</span><span class="o">();</span> - <span class="n">BSONWritable</span> <span class="n">value</span> <span class="o">=</span> <span class="o">(</span><span class="n">BSONWritable</span><span class="o">)</span> <span class="n">valWr</span><span class="o">;</span> - <span class="n">Object</span> <span class="n">headers</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="na">getDoc</span><span class="o">().</span><span class="na">get</span><span class="o">(</span><span class="s">"headers"</span><span class="o">);</span> - <span class="n">BasicDBObject</span> <span class="n">headerOb</span> <span class="o">=</span> <span class="o">(</span><span class="n">BasicDBObject</span><span class="o">)</span> <span class="n">headers</span><span class="o">;</span> - <span class="n">String</span> <span class="n">date</span> <span class="o">=</span> <span class="o">(</span><span class="n">String</span><span class="o">)</span> <span class="n">headerOb</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="s">"Date"</span><span class="o">);</span> - <span class="c1">// further date processing</span> -<span class="o">}</span></code></pre></div> - -<p>Please use the comments if you have questions or if you want to showcase your own MongoDB-Stratosphere integration.</p> - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - - <hr /> - <div class="footer text-center"> - <p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html>