[32/39] flink-web git commit: Rebuild site

rmetzger Wed, 18 Jan 2017 06:08:35 -0800

http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/faq.html
----------------------------------------------------------------------
diff --git a/content/faq.html b/content/faq.html
new file mode 100644
index 0000000..7653ff8
--- /dev/null
+++ b/content/faq.html
@@ -0,0 +1,683 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <title>Apache Flink: Frequently Asked Questions (FAQ)</title>
+    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
+    <link rel="icon" href="/favicon.ico" type="image/x-icon">
+
+    <!-- Bootstrap -->
+    <link rel="stylesheet" 
href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css";>
+    <link rel="stylesheet" href="/css/flink.css">
+    <link rel="stylesheet" href="/css/syntax.css">
+
+    <!-- Blog RSS feed -->
+    <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" 
title="Apache Flink Blog: RSS feed" />
+
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <!-- We need to load Jquery in the header for custom google analytics 
event tracking-->
+    <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js";></script>
+
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
+    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+    <!--[if lt IE 9]>
+      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
+      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
+    <![endif]-->
+  </head>
+  <body>  
+    
+
+    <!-- Main content. -->
+    <div class="container">
+    <div class="row">
+
+      
+     <div id="sidebar" class="col-sm-3">
+          <!-- Top navbar. -->
+    <nav class="navbar navbar-default">
+        <!-- The logo. -->
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <div class="navbar-logo">
+            <a href="/">
+              <img alt="Apache Flink" src="/img/navbar-brand-logo.png" 
width="147px" height="73px">
+            </a>
+          </div>
+        </div><!-- /.navbar-header -->
+
+        <!-- The navigation links. -->
+        <div class="collapse navbar-collapse" 
id="bs-example-navbar-collapse-1">
+          <ul class="nav navbar-nav navbar-main">
+
+            <!-- Downloads -->
+            <li class=""><a class="btn btn-info" 
href="/downloads.html">Download Flink</a></li>
+
+            <!-- Overview -->
+            <li><a href="/index.html">Home</a></li>
+
+            <!-- Intro -->
+            <li><a href="/introduction.html">Introduction to Flink</a></li>
+
+            <!-- Use cases -->
+            <li><a href="/usecases.html">Flink Use Cases</a></li>
+
+            <!-- Powered by -->
+            <li><a href="/poweredby.html">Powered by Flink</a></li>
+
+            <!-- Ecosystem -->
+            <li><a href="/ecosystem.html">Ecosystem</a></li>
+
+            <!-- Community -->
+            <li><a href="/community.html">Community &amp; Project Info</a></li>
+
+            <!-- Contribute -->
+            <li><a href="/how-to-contribute.html">How to Contribute</a></li>
+
+            <!-- Blog -->
+            <li class=" hidden-md hidden-sm"><a href="/blog/"><b>Flink 
Blog</b></a></li>
+
+            <hr />
+
+
+
+            <!-- Documentation -->
+            <!-- <li>
+              <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1"; 
target="_blank">Documentation <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li> -->
+            <li class="dropdown">
+              <a class="dropdown-toggle" data-toggle="dropdown" 
href="#">Documentation
+                <span class="caret"></span></a>
+                <ul class="dropdown-menu">
+                  <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1"; 
target="_blank">1.1 (Latest stable release) <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+                  <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2"; 
target="_blank">1.2 (Snapshot) <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+                </ul>
+              </li>
+
+            <!-- Quickstart -->
+            <li>
+              <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html";
 target="_blank">Quickstart <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li>
+
+            <!-- GitHub -->
+            <li>
+              <a href="https://github.com/apache/flink"; target="_blank">Flink 
on GitHub <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li>
+
+
+
+
+
+
+          </ul>
+
+
+
+          <ul class="nav navbar-nav navbar-bottom">
+          <hr />
+
+            <!-- FAQ -->
+            <li  class="hidden-sm active"><a href="/faq.html">Project 
FAQ</a></li>
+
+            <!-- Twitter -->
+            <li><a href="https://twitter.com/apacheflink"; 
target="_blank">@ApacheFlink <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+
+            <!-- Visualizer -->
+            <li class=" hidden-md hidden-sm"><a href="/visualizer/" 
target="_blank">Plan Visualizer <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+
+          </ul>
+        </div><!-- /.navbar-collapse -->
+    </nav>
+
+      </div>
+      <div class="col-sm-9">
+      <div class="row-fluid">
+  <div class="col-sm-12">
+    <h1>Frequently Asked Questions (FAQ)</h1>
+
+       <!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<p>The following questions are frequently asked with regard to the Flink 
project <strong>in general</strong>. If you have further questions, make sure 
to consult the <a href="">documentation</a> or <a href="">ask the 
community</a>.</p>
+
+<div class="page-toc">
+<ul id="markdown-toc">
+  <li><a href="#general" id="markdown-toc-general">General</a>    <ul>
+      <li><a href="#is-flink-a-hadoop-project" 
id="markdown-toc-is-flink-a-hadoop-project">Is Flink a Hadoop Project?</a></li>
+      <li><a href="#do-i-have-to-install-apache-hadoop-to-use-flink" 
id="markdown-toc-do-i-have-to-install-apache-hadoop-to-use-flink">Do I have to 
install Apache Hadoop to use Flink?</a></li>
+    </ul>
+  </li>
+  <li><a href="#usage" id="markdown-toc-usage">Usage</a>    <ul>
+      <li><a href="#how-do-i-assess-the-progress-of-a-flink-program" 
id="markdown-toc-how-do-i-assess-the-progress-of-a-flink-program">How do I 
assess the progress of a Flink program?</a></li>
+      <li><a href="#how-can-i-figure-out-why-a-program-failed" 
id="markdown-toc-how-can-i-figure-out-why-a-program-failed">How can I figure 
out why a program failed?</a></li>
+      <li><a href="#how-do-i-debug-flink-programs" 
id="markdown-toc-how-do-i-debug-flink-programs">How do I debug Flink 
programs?</a></li>
+      <li><a href="#what-is-the-parallelism-how-do-i-set-it" 
id="markdown-toc-what-is-the-parallelism-how-do-i-set-it">What is the 
parallelism? How do I set it?</a></li>
+    </ul>
+  </li>
+  <li><a href="#errors" id="markdown-toc-errors">Errors</a>    <ul>
+      <li><a href="#why-am-i-getting-a-nonserializableexception-" 
id="markdown-toc-why-am-i-getting-a-nonserializableexception-">Why am I getting 
a âNonSerializableExceptionâ ?</a></li>
+      <li><a 
href="#in-scala-api-i-get-an-error-about-implicit-values-and-evidence-parameters"
 
id="markdown-toc-in-scala-api-i-get-an-error-about-implicit-values-and-evidence-parameters">In
 Scala API, I get an error about implicit values and evidence 
parameters</a></li>
+      <li><a 
href="#i-get-an-error-message-saying-that-not-enough-buffers-are-available-how-do-i-fix-this"
 
id="markdown-toc-i-get-an-error-message-saying-that-not-enough-buffers-are-available-how-do-i-fix-this">I
 get an error message saying that not enough buffers are available. How do I 
fix this?</a></li>
+      <li><a 
href="#my-job-fails-early-with-a-javaioeofexception-what-could-be-the-cause" 
id="markdown-toc-my-job-fails-early-with-a-javaioeofexception-what-could-be-the-cause">My
 job fails early with a java.io.EOFException. What could be the cause?</a></li>
+      <li><a 
href="#my-job-fails-with-various-exceptions-from-the-hdfshadoop-code-what-can-i-do"
 
id="markdown-toc-my-job-fails-with-various-exceptions-from-the-hdfshadoop-code-what-can-i-do">My
 job fails with various exceptions from the HDFS/Hadoop code. What can I 
do?</a></li>
+      <li><a href="#in-eclipse-i-get-compilation-errors-in-the-scala-projects" 
id="markdown-toc-in-eclipse-i-get-compilation-errors-in-the-scala-projects">In 
Eclipse, I get compilation errors in the Scala projects</a></li>
+      <li><a 
href="#my-program-does-not-compute-the-correct-result-why-are-my-custom-key-types"
 
id="markdown-toc-my-program-does-not-compute-the-correct-result-why-are-my-custom-key-types">My
 program does not compute the correct result. Why are my custom key 
types</a></li>
+      <li><a 
href="#i-get-a-javalanginstantiationexception-for-my-data-type-what-is-wrong" 
id="markdown-toc-i-get-a-javalanginstantiationexception-for-my-data-type-what-is-wrong">I
 get a java.lang.InstantiationException for my data type, what is 
wrong?</a></li>
+      <li><a 
href="#i-cant-stop-flink-with-the-provided-stop-scripts-what-can-i-do" 
id="markdown-toc-i-cant-stop-flink-with-the-provided-stop-scripts-what-can-i-do">I
 canât stop Flink with the provided stop-scripts. What can I do?</a></li>
+      <li><a href="#i-got-an-outofmemoryexception-what-can-i-do" 
id="markdown-toc-i-got-an-outofmemoryexception-what-can-i-do">I got an 
OutOfMemoryException. What can I do?</a></li>
+      <li><a href="#why-do-the-taskmanager-log-files-become-so-huge" 
id="markdown-toc-why-do-the-taskmanager-log-files-become-so-huge">Why do the 
TaskManager log files become so huge?</a></li>
+      <li><a 
href="#the-slot-allocated-for-my-task-manager-has-been-released-what-should-i-do"
 
id="markdown-toc-the-slot-allocated-for-my-task-manager-has-been-released-what-should-i-do">The
 slot allocated for my task manager has been released. What should I 
do?</a></li>
+    </ul>
+  </li>
+  <li><a href="#yarn-deployment" id="markdown-toc-yarn-deployment">YARN 
Deployment</a>    <ul>
+      <li><a href="#the-yarn-session-runs-only-for-a-few-seconds" 
id="markdown-toc-the-yarn-session-runs-only-for-a-few-seconds">The YARN session 
runs only for a few seconds</a></li>
+      <li><a 
href="#my-yarn-containers-are-killed-because-they-use-too-much-memory" 
id="markdown-toc-my-yarn-containers-are-killed-because-they-use-too-much-memory">My
 YARN containers are killed because they use too much memory</a></li>
+      <li><a 
href="#the-yarn-session-crashes-with-a-hdfs-permission-exception-during-startup"
 
id="markdown-toc-the-yarn-session-crashes-with-a-hdfs-permission-exception-during-startup">The
 YARN session crashes with a HDFS permission exception during startup</a></li>
+      <li><a href="#my-job-is-not-reacting-to-a-job-cancellation" 
id="markdown-toc-my-job-is-not-reacting-to-a-job-cancellation">My job is not 
reacting to a job cancellation?</a></li>
+    </ul>
+  </li>
+  <li><a href="#features" id="markdown-toc-features">Features</a>    <ul>
+      <li><a href="#what-kind-of-fault-tolerance-does-flink-provide" 
id="markdown-toc-what-kind-of-fault-tolerance-does-flink-provide">What kind of 
fault-tolerance does Flink provide?</a></li>
+      <li><a 
href="#are-hadoop-like-utilities-such-as-counters-and-the-distributedcache-supported"
 
id="markdown-toc-are-hadoop-like-utilities-such-as-counters-and-the-distributedcache-supported">Are
 Hadoop-like utilities, such as Counters and the DistributedCache 
supported?</a></li>
+    </ul>
+  </li>
+</ul>
+
+</div>
+
+<h2 id="general">General</h2>
+
+<h3 id="is-flink-a-hadoop-project">Is Flink a Hadoop Project?</h3>
+
+<p>Flink is a data processing system and an <strong>alternative to Hadoopâs
+MapReduce component</strong>. It comes with its <em>own runtime</em> rather 
than building on top
+of MapReduce. As such, it can work completely independently of the Hadoop
+ecosystem. However, Flink can also access Hadoopâs distributed file
+system (HDFS) to read and write data, and Hadoopâs next-generation resource
+manager (YARN) to provision cluster resources. Since most Flink users are
+using Hadoop HDFS to store their data, Flink already ships the required 
libraries to
+access HDFS.</p>
+
+<h3 id="do-i-have-to-install-apache-hadoop-to-use-flink">Do I have to install 
Apache Hadoop to use Flink?</h3>
+
+<p><strong>No</strong>. Flink can run <strong>without</strong> a Hadoop 
installation. However, a <em>very common</em>
+setup is to use Flink to analyze data stored in the Hadoop Distributed
+File System (HDFS). To make these setups work out of the box, Flink bundles the
+Hadoop client libraries by default.</p>
+
+<p>Additionally, we provide a special YARN Enabled download of Flink for
+users with an existing Hadoop YARN cluster. <a 
href="http://hadoop.apache.org/docs/r2.2.0/hadoop-yarn/hadoop-yarn-site/YARN.html";>Apache
 Hadoop
+YARN</a>
+is Hadoopâs cluster resource manager that allows use of
+different execution engines next to each other on a cluster.</p>
+
+<h2 id="usage">Usage</h2>
+
+<h3 id="how-do-i-assess-the-progress-of-a-flink-program">How do I assess the 
progress of a Flink program?</h3>
+
+<p>There are a multiple of ways to track the progress of a Flink program:</p>
+
+<ul>
+  <li>The JobManager (the master of the distributed system) starts a web 
interface
+to observe program execution. In runs on port 8081 by default (configured in
+<code>conf/flink-config.yml</code>).</li>
+  <li>When you start a program from the command line, it will print the status
+changes of all operators as the program progresses through the operations.</li>
+  <li>All status changes are also logged to the JobManagerâs log file.</li>
+</ul>
+
+<h3 id="how-can-i-figure-out-why-a-program-failed">How can I figure out why a 
program failed?</h3>
+
+<ul>
+  <li>The JobManager web frontend (by default on port 8081) displays the 
exceptions
+of failed tasks.</li>
+  <li>If you run the program from the command-line, task exceptions are 
printed to
+the standard error stream and shown on the console.</li>
+  <li>Both the command line and the web interface allow you to figure out which
+parallel task first failed and caused the other tasks to cancel the 
execution.</li>
+  <li>Failing tasks and the corresponding exceptions are reported in the log 
files
+of the master and the worker where the exception occurred
+(<code>log/flink-&lt;user&gt;-jobmanager-&lt;host&gt;.log</code> and
+<code>log/flink-&lt;user&gt;-taskmanager-&lt;host&gt;.log</code>).</li>
+</ul>
+
+<h3 id="how-do-i-debug-flink-programs">How do I debug Flink programs?</h3>
+
+<ul>
+  <li>When you start a program locally with the <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/apis/local_execution.html";>LocalExecutor</a>,
+you can place breakpoints in your functions and debug them like normal
+Java/Scala programs.</li>
+  <li>The <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/apis/programming_guide.html#accumulators--counters";>Accumulators</a>
 are very helpful in
+tracking the behavior of the parallel execution. They allow you to gather
+information inside the programâs operations and show them after the program
+execution.</li>
+</ul>
+
+<h3 id="what-is-the-parallelism-how-do-i-set-it">What is the parallelism? How 
do I set it?</h3>
+
+<p>In Flink programs, the parallelism determines how operations are split into
+individual tasks which are assigned to task slots. Each node in a cluster has 
at
+least one task slot. The total number of task slots is the number of all task 
slots
+on all machines. If the parallelism is set to <code>N</code>, Flink tries to 
divide an
+operation into <code>N</code> parallel tasks which can be computed 
concurrently using the
+available task slots. The number of task slots should be equal to the
+parallelism to ensure that all tasks can be computed in a task slot 
concurrently.</p>
+
+<p><strong>Note</strong>: Not all operations can be divided into multiple 
tasks. For example, a
+<code>GroupReduce</code> operation without a grouping has to be performed with 
a
+parallelism of 1 because the entire group needs to be present at exactly one
+node to perform the reduce operation. Flink will determine whether the
+parallelism has to be 1 and set it accordingly.</p>
+
+<p>The parallelism can be set in numerous ways to ensure a fine-grained control
+over the execution of a Flink program. See
+the <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/setup/config.html#common-options";>Configuration
 guide</a> for detailed instructions on how to
+set the parallelism. Also check out <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/setup/config.html#configuring-taskmanager-processing-slots";>this
 figure</a> detailing
+how the processing slots and parallelism are related to each other.</p>
+
+<h2 id="errors">Errors</h2>
+
+<h3 id="why-am-i-getting-a-nonserializableexception-">Why am I getting a 
âNonSerializableExceptionâ ?</h3>
+
+<p>All functions in Flink must be serializable, as defined by <a 
href="http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html";>java.io.Serializable</a>.
+Since all function interfaces are serializable, the exception means that one
+of the fields used in your function is not serializable.</p>
+
+<p>In particular, if your function is an inner class, or anonymous inner class,
+it contains a hidden reference to the enclosing class (usually called 
<code>this$0</code>, if you look
+at the function in the debugger). If the enclosing class is not serializable, 
this is probably
+the source of the error. Solutions are to</p>
+
+<ul>
+  <li>make the function a standalone class, or a static inner class (no more 
reference to the enclosing class)</li>
+  <li>make the enclosing class serializable</li>
+  <li>use a Java 8 lambda function.</li>
+</ul>
+
+<h3 
id="in-scala-api-i-get-an-error-about-implicit-values-and-evidence-parameters">In
 Scala API, I get an error about implicit values and evidence parameters</h3>
+
+<p>It means that the implicit value for the type information could not be 
provided.
+Make sure that you have an <code>import org.apache.flink.api.scala._</code> 
statement in your code.</p>
+
+<p>If you are using flink operations inside functions or classes that take
+generic parameters a TypeInformation must be available for that parameter.
+This can be achieved by using a context bound:</p>
+
+<div class="highlight"><pre><code class="language-scala"><span 
class="k">def</span> <span class="n">myFunction</span><span 
class="o">[</span><span class="kt">T:</span> <span 
class="kt">TypeInformation</span><span class="o">](</span><span 
class="n">input</span><span class="k">:</span> <span 
class="kt">DataSet</span><span class="o">[</span><span class="kt">T</span><span 
class="o">])</span><span class="k">:</span> <span 
class="kt">DataSet</span><span class="o">[</span><span 
class="kt">Seq</span><span class="o">[</span><span class="kt">T</span><span 
class="o">]]</span> <span class="k">=</span> <span class="o">{</span>
+  <span class="n">input</span><span class="o">.</span><span 
class="n">reduceGroup</span><span class="o">(</span> <span class="n">i</span> 
<span class="k">=&gt;</span> <span class="n">i</span><span 
class="o">.</span><span class="n">toSeq</span> <span class="o">)</span>
+<span class="o">}</span></code></pre></div>
+
+<p>See <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/internals/types_serialization.html";>Type
 Extraction and Serialization</a> for
+an in-depth discussion of how Flink handles types.</p>
+
+<h3 
id="i-get-an-error-message-saying-that-not-enough-buffers-are-available-how-do-i-fix-this">I
 get an error message saying that not enough buffers are available. How do I 
fix this?</h3>
+
+<p>If you run Flink in a massively parallel setting (100+ parallel threads),
+you need to adapt the number of network buffers via the config parameter
+<code>taskmanager.network.numberOfBuffers</code>.
+As a rule-of-thumb, the number of buffers should be at least
+<code>4 * numberOfTaskManagers * numberOfSlotsPerTaskManager^2</code>. See
+<a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/setup/config.html#configuring-the-network-buffers";>Configuration
 Reference</a> for details.</p>
+
+<h3 
id="my-job-fails-early-with-a-javaioeofexception-what-could-be-the-cause">My 
job fails early with a java.io.EOFException. What could be the cause?</h3>
+
+<p>The most common case for these exception is when Flink is set up with the
+wrong HDFS version. Because different HDFS versions are often not compatible
+with each other, the connection between the filesystem master and the client
+breaks.</p>
+
+<div class="highlight"><pre><code class="language-bash">Call to 
&lt;host:port&gt; failed on <span class="nb">local </span>exception: 
java.io.EOFException
+    at org.apache.hadoop.ipc.Client.wrapException<span 
class="o">(</span>Client.java:775<span class="o">)</span>
+    at org.apache.hadoop.ipc.Client.call<span 
class="o">(</span>Client.java:743<span class="o">)</span>
+    at org.apache.hadoop.ipc.RPC<span class="nv">$Invoker</span>.invoke<span 
class="o">(</span>RPC.java:220<span class="o">)</span>
+    at <span class="nv">$Proxy0</span>.getProtocolVersion<span 
class="o">(</span>Unknown Source<span class="o">)</span>
+    at org.apache.hadoop.ipc.RPC.getProxy<span 
class="o">(</span>RPC.java:359<span class="o">)</span>
+    at org.apache.hadoop.hdfs.DFSClient.createRPCNamenode<span 
class="o">(</span>DFSClient.java:106<span class="o">)</span>
+    at org.apache.hadoop.hdfs.DFSClient.&lt;init&gt;<span 
class="o">(</span>DFSClient.java:207<span class="o">)</span>
+    at org.apache.hadoop.hdfs.DFSClient.&lt;init&gt;<span 
class="o">(</span>DFSClient.java:170<span class="o">)</span>
+    at org.apache.hadoop.hdfs.DistributedFileSystem.initialize<span 
class="o">(</span>DistributedFileSystem.java:82<span class="o">)</span>
+    at org.apache.flinkruntime.fs.hdfs.DistributedFileSystem.initialize<span 
class="o">(</span>DistributedFileSystem.java:276</code></pre></div>
+
+<p>Please refer to the <a href="/downloads.html#maven">download page</a> and
+the <a href="https://github.com/apache/flink/tree/master/README.md";>build 
instructions</a>
+for details on how to set up Flink for different Hadoop and HDFS versions.</p>
+
+<h3 
id="my-job-fails-with-various-exceptions-from-the-hdfshadoop-code-what-can-i-do">My
 job fails with various exceptions from the HDFS/Hadoop code. What can I 
do?</h3>
+
+<p>Flink is shipping with the Hadoop 2.2 binaries by default. These binaries 
are used
+to connect to HDFS or YARN.
+It seems that there are some bugs in the HDFS client which cause exceptions 
while writing to HDFS
+(in particular under high load).
+Among the exceptions are the following:</p>
+
+<ul>
+  <li><code>HDFS client trying to connect to the standby Namenode 
"org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException): 
Operation category READ is not supported in state standby"</code></li>
+  <li>
+    <p><code>java.io.IOException: Bad response ERROR for block 
BP-1335380477-172.22.5.37-1424696786673:blk_1107843111_34301064 from datanode 
172.22.5.81:50010
+at 
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer$ResponseProcessor.run(DFSOutputStream.java:732)</code></p>
+  </li>
+  <li><code>Caused by: 
org.apache.hadoop.ipc.RemoteException(java.lang.ArrayIndexOutOfBoundsException):
 0
+      at 
org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager.getDatanodeStorageInfos(DatanodeManager.java:478)
+      at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.updatePipelineInternal(FSNamesystem.java:6039)
+      at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.updatePipeline(FSNamesystem.java:6002)</code></li>
+</ul>
+
+<p>If you are experiencing any of these, we recommend using a Flink build with 
a Hadoop version matching
+your local HDFS version.
+You can also manually build Flink against the exact Hadoop version (for example
+when using a Hadoop distribution with a custom patch level)</p>
+
+<h3 id="in-eclipse-i-get-compilation-errors-in-the-scala-projects">In Eclipse, 
I get compilation errors in the Scala projects</h3>
+
+<p>Flink uses a new feature of the Scala compiler (called âquasiquotesâ) 
that have not yet been properly
+integrated with the Eclipse Scala plugin. In order to make this feature 
available in Eclipse, you
+need to manually configure the <em>flink-scala</em> project to use a 
<em>compiler plugin</em>:</p>
+
+<ul>
+  <li>Right click on <em>flink-scala</em> and choose âPropertiesâ</li>
+  <li>Select âScala Compilerâ and click on the âAdvancedâ tab. (If you 
do not have that, you probably have not set up Eclipse for Scala properly.)</li>
+  <li>Check the box âUse Project Settingsâ</li>
+  <li>In the field âXpluginâ, put the path 
â/home/<user-name>/.m2/repository/org/scalamacros/paradise_2.10.4/2.0.1/paradise_2.10.4-2.0.1.jar"</user-name></li>
+  <li>NOTE: You have to build Flink with Maven on the command line first, to 
make sure the plugin is downloaded.</li>
+</ul>
+
+<h3 
id="my-program-does-not-compute-the-correct-result-why-are-my-custom-key-types">My
 program does not compute the correct result. Why are my custom key types</h3>
+<p>are not grouped/joined correctly?</p>
+
+<p>Keys must correctly implement the methods 
<code>java.lang.Object#hashCode()</code>,
+<code>java.lang.Object#equals(Object o)</code>, and 
<code>java.util.Comparable#compareTo(...)</code>.
+These methods are always backed with default implementations which are usually
+inadequate. Therefore, all keys must override <code>hashCode()</code> and 
<code>equals(Object o)</code>.</p>
+
+<h3 
id="i-get-a-javalanginstantiationexception-for-my-data-type-what-is-wrong">I 
get a java.lang.InstantiationException for my data type, what is wrong?</h3>
+
+<p>All data type classes must be public and have a public nullary constructor
+(constructor with no arguments). Further more, the classes must not be abstract
+or interfaces. If the classes are internal classes, they must be public and
+static.</p>
+
+<h3 id="i-cant-stop-flink-with-the-provided-stop-scripts-what-can-i-do">I 
canât stop Flink with the provided stop-scripts. What can I do?</h3>
+
+<p>Stopping the processes sometimes takes a few seconds, because the shutdown 
may
+do some cleanup work.</p>
+
+<p>In some error cases it happens that the JobManager or TaskManager cannot be
+stopped with the provided stop-scripts (<code>bin/stop-local.sh</code> or 
<code>bin/stop-
+cluster.sh</code>). You can kill their processes on Linux/Mac as follows:</p>
+
+<ul>
+  <li>Determine the process id (pid) of the JobManager / TaskManager process. 
You
+can use the <code>jps</code> command on Linux(if you have OpenJDK installed) 
or command
+<code>ps -ef | grep java</code> to find all Java processes.</li>
+  <li>Kill the process with <code>kill -9 &lt;pid&gt;</code>, where 
<code>pid</code> is the process id of the
+affected JobManager or TaskManager process.</li>
+</ul>
+
+<p>On Windows, the TaskManager shows a table of all processes and allows you to
+destroy a process by right its entry.</p>
+
+<p>Both the JobManager and TaskManager services will write signals (like 
SIGKILL
+and SIGTERM) into their respective log files. This can be helpful for 
+debugging issues with the stopping behavior.</p>
+
+<h3 id="i-got-an-outofmemoryexception-what-can-i-do">I got an 
OutOfMemoryException. What can I do?</h3>
+
+<p>These exceptions occur usually when the functions in the program consume a 
lot
+of memory by collection large numbers of objects, for example in lists or maps.
+The OutOfMemoryExceptions in Java are kind of tricky. The exception is not
+necessarily thrown by the component that allocated most of the memory but by 
the
+component that tried to requested the latest bit of memory that could not be
+provided.</p>
+
+<p>There are two ways to go about this:</p>
+
+<ol>
+  <li>
+    <p>See whether you can use less memory inside the functions. For example, 
use
+arrays of primitive types instead of object types.</p>
+  </li>
+  <li>
+    <p>Reduce the memory that Flink reserves for its own processing. The
+TaskManager reserves a certain portion of the available memory for sorting,
+hashing, caching, network buffering, etc. That part of the memory is 
unavailable
+to the user-defined functions. By reserving it, the system can guarantee to not
+run out of memory on large inputs, but to plan with the available memory and
+destage operations to disk, if necessary. By default, the system reserves 
around
+70% of the memory. If you frequently run applications that need more memory in
+the user-defined functions, you can reduce that value using the configuration
+entries <code>taskmanager.memory.fraction</code> or 
<code>taskmanager.memory.size</code>. See the
+<a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/setup/config.html";>Configuration
 Reference</a> for details. This will leave more memory to JVM heap,
+but may cause data processing tasks to go to disk more often.</p>
+  </li>
+</ol>
+
+<p>Another reason for OutOfMemoryExceptions is the use of the wrong state 
backend.
+By default, Flink is using a heap-based state backend for operator state in
+streaming jobs. The <code>RocksDBStateBackend</code> allows state sizes larger 
than the
+available heap space.</p>
+
+<h3 id="why-do-the-taskmanager-log-files-become-so-huge">Why do the 
TaskManager log files become so huge?</h3>
+
+<p>Check the logging behavior of your jobs. Emitting logging per object or 
tuple may be
+helpful to debug jobs in small setups with tiny data sets but can limit 
performance
+and consume substantial disk space if used for large input data.</p>
+
+<h3 
id="the-slot-allocated-for-my-task-manager-has-been-released-what-should-i-do">The
 slot allocated for my task manager has been released. What should I do?</h3>
+
+<p>If you see a <code>java.lang.Exception: The slot in which the task was 
executed has been released. Probably loss of TaskManager</code> even though the 
TaskManager did actually not crash, it 
+means that the TaskManager was unresponsive for a time. That can be due to 
network issues, but is frequently due to long garbage collection stalls.
+In this case, a quick fix would be to use an incremental Garbage Collector, 
like the G1 garbage collector. It usually leads to shorter pauses. Furthermore, 
you can dedicate more memory to
+the user code by reducing the amount of memory Flink grabs for its internal 
operations (see configuration of TaskManager managed memory).</p>
+
+<p>If both of these approaches fail and the error persists, simply increase 
the TaskManagerâs heartbeat pause by setting AKKA_WATCH_HEARTBEAT_PAUSE 
(akka.watch.heartbeat.pause) to a greater value (e.g. 600s).
+This will cause the JobManager to wait for a heartbeat for a longer time 
interval before considering the TaskManager lost.</p>
+
+<h2 id="yarn-deployment">YARN Deployment</h2>
+
+<h3 id="the-yarn-session-runs-only-for-a-few-seconds">The YARN session runs 
only for a few seconds</h3>
+
+<p>The <code>./bin/yarn-session.sh</code> script is intended to run while the 
YARN-session is
+open. In some error cases however, the script immediately stops running. The
+output looks like this:</p>
+
+<div class="highlight"><pre><code>07:34:27,004 INFO  
org.apache.hadoop.yarn.client.api.impl.YarnClientImpl         - Submitted 
application application_1395604279745_273123 to ResourceManager at 
jobtracker-host
+Flink JobManager is now running on worker1:6123
+JobManager Web Interface: 
http://jobtracker-host:54311/proxy/application_1295604279745_273123/
+07:34:51,528 INFO  org.apache.flinkyarn.Client                                 
  - Application application_1295604279745_273123 finished with state FINISHED 
at 1398152089553
+07:34:51,529 INFO  org.apache.flinkyarn.Client                                 
  - Killing the Flink-YARN application.
+07:34:51,529 INFO  org.apache.hadoop.yarn.client.api.impl.YarnClientImpl       
  - Killing application application_1295604279745_273123
+07:34:51,534 INFO  org.apache.flinkyarn.Client                                 
  - Deleting files in hdfs://user/marcus/.flink/application_1295604279745_273123
+07:34:51,559 INFO  org.apache.flinkyarn.Client                                 
  - YARN Client is shutting down
+</code></pre></div>
+
+<p>The problem here is that the Application Master (AM) is stopping and the 
YARN client assumes that the application has finished.</p>
+
+<p>There are three possible reasons for that behavior:</p>
+
+<ul>
+  <li>
+    <p>The ApplicationMaster exited with an exception. To debug that error, 
have a
+look in the logfiles of the container. The <code>yarn-site.xml</code> file 
contains the
+configured path. The key for the path is 
<code>yarn.nodemanager.log-dirs</code>, the
+default value is <code>${yarn.log.dir}/userlogs</code>.</p>
+  </li>
+  <li>
+    <p>YARN has killed the container that runs the ApplicationMaster. This case
+happens when the AM used too much memory or other resources beyond YARNâs
+limits. In this case, youâll find error messages in the nodemanager logs on
+the host.</p>
+  </li>
+  <li>
+    <p>The operating system has shut down the JVM of the AM. This can happen 
if the
+YARN configuration is wrong and more memory than physically available is
+configured. Execute <code>dmesg</code> on the machine where the AM was running 
to see if
+this happened. You see messages from Linuxâ <a 
href="http://linux-mm.org/OOM_Killer";>OOM killer</a>.</p>
+  </li>
+</ul>
+
+<h3 id="my-yarn-containers-are-killed-because-they-use-too-much-memory">My 
YARN containers are killed because they use too much memory</h3>
+
+<p>This is usually indicated my a log message like the following one:</p>
+
+<div class="highlight"><pre><code>Container 
container_e05_1467433388200_0136_01_000002 is completed with diagnostics: 
Container [pid=5832,containerID=container_e05_1467433388200_0136_01_000002] is 
running beyond physical memory limits. Current usage: 2.3 GB of 2 GB physical 
memory used; 6.1 GB of 4.2 GB virtual memory used. Killing container.
+</code></pre></div>
+
+<p>In that case, the JVM process grew too large. Because the Java heap size is 
always limited, the extra memory typically comes from non-heap sources:</p>
+
+<ul>
+  <li>Libraries that use off-heap memory. (Flinkâs own off-heap memory is 
limited and taken into account when calculating the allowed heap size.)</li>
+  <li>PermGen space (strings and classes), code caches, memory mapped jar 
files</li>
+  <li>Native libraries (RocksDB)</li>
+</ul>
+
+<p>You can activate the <a 
href="https://ci.apache.org/projects/flink/flink-docs-release-1.0/setup/config.html#memory-and-performance-debugging";>memory
 debug logger</a> to get more insight into what memory pool is actually using 
up too much memory.</p>
+
+<h3 
id="the-yarn-session-crashes-with-a-hdfs-permission-exception-during-startup">The
 YARN session crashes with a HDFS permission exception during startup</h3>
+
+<p>While starting the YARN session, you are receiving an exception like 
this:</p>
+
+<div class="highlight"><pre><code>Exception in thread "main" 
org.apache.hadoop.security.AccessControlException: Permission denied: 
user=robert, access=WRITE, inode="/user/robert":hdfs:supergroup:drwxr-xr-x
+  at 
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:234)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:214)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:158)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5193)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5175)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkAncestorAccess(FSNamesystem.java:5149)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2090)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2043)
+  at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:1996)
+  at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:491)
+  at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:301)
+  at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:59570)
+  at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
+  at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
+  at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2053)
+  at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)
+  at java.security.AccessController.doPrivileged(Native Method)
+  at javax.security.auth.Subject.doAs(Subject.java:396)
+  at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
+  at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2047)
+
+  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
+  at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
+  at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
+  at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
+  at 
org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
+  at 
org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
+  at 
org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1393)
+  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1382)
+  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1307)
+  at 
org.apache.hadoop.hdfs.DistributedFileSystem$6.doCall(DistributedFileSystem.java:384)
+  at 
org.apache.hadoop.hdfs.DistributedFileSystem$6.doCall(DistributedFileSystem.java:380)
+  at 
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
+  at 
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:380)
+  at 
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:324)
+  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
+  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:886)
+  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:783)
+  at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:365)
+  at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338)
+  at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:2021)
+  at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1989)
+  at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1954)
+  at org.apache.flinkyarn.Utils.setupLocalResource(Utils.java:176)
+  at org.apache.flinkyarn.Client.run(Client.java:362)
+  at org.apache.flinkyarn.Client.main(Client.java:568)
+</code></pre></div>
+
+<p>The reason for this error is, that the home directory of the user 
<strong>in HDFS</strong>
+has the wrong permissions. The user (in this case <code>robert</code>) can not 
create
+directories in his own home directory.</p>
+
+<p>Flink creates a <code>.flink/</code> directory in the users home directory
+where it stores the Flink jar and configuration file.</p>
+
+<h3 id="my-job-is-not-reacting-to-a-job-cancellation">My job is not reacting 
to a job cancellation?</h3>
+
+<p>Flink is canceling a job by calling the <code>cancel()</code> method on all 
user tasks. Ideally,
+the tasks properly react to the call and stop what they are currently doing, 
so that 
+all threads can shut down.</p>
+
+<p>If the tasks are not reacting for a certain amount of time, Flink will 
start interrupting
+the thread periodically.</p>
+
+<p>The TaskManager logs will also contain the current stack of the method 
where the user 
+code is blocked.</p>
+
+<h2 id="features">Features</h2>
+
+<h3 id="what-kind-of-fault-tolerance-does-flink-provide">What kind of 
fault-tolerance does Flink provide?</h3>
+
+<p>For streaming programs Flink has a novel approach to draw periodic 
snapshots of the streaming dataflow state and use those for recovery.
+This mechanism is both efficient and flexible. See the documentation on <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/internals/stream_checkpointing.html";>streaming
 fault tolerance</a> for details.</p>
+
+<p>For batch processing programs Flink remembers the programâs sequence of 
transformations and can restart failed jobs.</p>
+
+<h3 
id="are-hadoop-like-utilities-such-as-counters-and-the-distributedcache-supported">Are
 Hadoop-like utilities, such as Counters and the DistributedCache 
supported?</h3>
+
+<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/apis/programming_guide.html#accumulators--counters";>Flinkâs
 Accumulators</a> work very similar like
+Hadoopâs counters, but are more powerful.</p>
+
+<p>Flink has a <a 
href="https://github.com/apache/flink/tree/master/flink-core/src/main/java/org/apache/flink/api/common/cache/DistributedCache.java";>Distributed
 Cache</a> that is deeply integrated with the APIs. Please refer to the <a 
href="https://github.com/apache/flink/tree/master/flink-java/src/main/java/org/apache/flink/api/java/ExecutionEnvironment.java#L831";>JavaDocs</a>
 for details on how to use it.</p>
+
+<p>In order to make data sets available on all tasks, we encourage you to use 
<a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/apis/programming_guide.html#broadcast-variables";>Broadcast
 Variables</a> instead. They are more efficient and easier to use than the 
distributed cache.</p>
+
+
+  </div>
+</div>
+
+      </div>
+    </div>
+
+    <hr />
+
+    <div class="row">
+      <div class="footer text-center col-sm-12">
+        <p>Copyright Â© 2014-2016 <a href="http://apache.org";>The Apache 
Software Foundation</a>. All Rights Reserved.</p>
+        <p>Apache Flink, Apache, and the Apache feather logo are either 
registered trademarks or trademarks of The Apache Software Foundation.</p>
+        <p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a 
href="/blog/feed.xml">RSS feed</a></p>
+      </div>
+    </div>
+    </div><!-- /.container -->
+
+    <!-- Include all compiled plugins (below), or include individual files as 
needed -->
+    <script 
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js";></script>
+    <script src="/js/codetabs.js"></script>
+    <script src="/js/stickysidebar.js"></script>
+
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-52545728-1', 'auto');
+      ga('send', 'pageview');
+    </script>
+  </body>
+</html>


http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/favicon.ico
----------------------------------------------------------------------
diff --git a/content/favicon.ico b/content/favicon.ico
new file mode 100755
index 0000000..34a467a
Binary files /dev/null and b/content/favicon.ico differ

http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/features.html
----------------------------------------------------------------------
diff --git a/content/features.html b/content/features.html
new file mode 100644
index 0000000..5a5d0c5
--- /dev/null
+++ b/content/features.html
@@ -0,0 +1,511 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <title>Apache Flink: Features</title>
+    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
+    <link rel="icon" href="/favicon.ico" type="image/x-icon">
+
+    <!-- Bootstrap -->
+    <link rel="stylesheet" 
href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css";>
+    <link rel="stylesheet" href="/css/flink.css">
+    <link rel="stylesheet" href="/css/syntax.css">
+
+    <!-- Blog RSS feed -->
+    <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" 
title="Apache Flink Blog: RSS feed" />
+
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <!-- We need to load Jquery in the header for custom google analytics 
event tracking-->
+    <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js";></script>
+
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
+    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+    <!--[if lt IE 9]>
+      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
+      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
+    <![endif]-->
+  </head>
+  <body>  
+    
+
+    <!-- Main content. -->
+    <div class="container">
+    <div class="row">
+
+      
+     <div id="sidebar" class="col-sm-3">
+          <!-- Top navbar. -->
+    <nav class="navbar navbar-default">
+        <!-- The logo. -->
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <div class="navbar-logo">
+            <a href="/">
+              <img alt="Apache Flink" src="/img/navbar-brand-logo.png" 
width="147px" height="73px">
+            </a>
+          </div>
+        </div><!-- /.navbar-header -->
+
+        <!-- The navigation links. -->
+        <div class="collapse navbar-collapse" 
id="bs-example-navbar-collapse-1">
+          <ul class="nav navbar-nav navbar-main">
+
+            <!-- Downloads -->
+            <li class=""><a class="btn btn-info" 
href="/downloads.html">Download Flink</a></li>
+
+            <!-- Overview -->
+            <li><a href="/index.html">Home</a></li>
+
+            <!-- Intro -->
+            <li><a href="/introduction.html">Introduction to Flink</a></li>
+
+            <!-- Use cases -->
+            <li><a href="/usecases.html">Flink Use Cases</a></li>
+
+            <!-- Powered by -->
+            <li><a href="/poweredby.html">Powered by Flink</a></li>
+
+            <!-- Ecosystem -->
+            <li><a href="/ecosystem.html">Ecosystem</a></li>
+
+            <!-- Community -->
+            <li><a href="/community.html">Community &amp; Project Info</a></li>
+
+            <!-- Contribute -->
+            <li><a href="/how-to-contribute.html">How to Contribute</a></li>
+
+            <!-- Blog -->
+            <li class=" hidden-md hidden-sm"><a href="/blog/"><b>Flink 
Blog</b></a></li>
+
+            <hr />
+
+
+
+            <!-- Documentation -->
+            <!-- <li>
+              <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1"; 
target="_blank">Documentation <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li> -->
+            <li class="dropdown">
+              <a class="dropdown-toggle" data-toggle="dropdown" 
href="#">Documentation
+                <span class="caret"></span></a>
+                <ul class="dropdown-menu">
+                  <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1"; 
target="_blank">1.1 (Latest stable release) <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+                  <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2"; 
target="_blank">1.2 (Snapshot) <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+                </ul>
+              </li>
+
+            <!-- Quickstart -->
+            <li>
+              <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html";
 target="_blank">Quickstart <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li>
+
+            <!-- GitHub -->
+            <li>
+              <a href="https://github.com/apache/flink"; target="_blank">Flink 
on GitHub <small><span class="glyphicon 
glyphicon-new-window"></span></small></a>
+            </li>
+
+
+
+
+
+
+          </ul>
+
+
+
+          <ul class="nav navbar-nav navbar-bottom">
+          <hr />
+
+            <!-- FAQ -->
+            <li ><a href="/faq.html">Project FAQ</a></li>
+
+            <!-- Twitter -->
+            <li><a href="https://twitter.com/apacheflink"; 
target="_blank">@ApacheFlink <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+
+            <!-- Visualizer -->
+            <li class=" hidden-md hidden-sm"><a href="/visualizer/" 
target="_blank">Plan Visualizer <small><span class="glyphicon 
glyphicon-new-window"></span></small></a></li>
+
+          </ul>
+        </div><!-- /.navbar-collapse -->
+    </nav>
+
+      </div>
+      <div class="col-sm-9">
+      <div class="row-fluid">
+  <div class="col-sm-12">
+
+<!-- --------------------------------------------- -->
+<!--                Streaming
+<!-- --------------------------------------------- -->
+
+<hr />
+
+<div class="row" style="padding: 0 0 0 0">
+  <div class="col-sm-12" style="text-align: center;">
+    <h1 id="streaming"><b>Streaming</b></h1>
+  </div>
+</div>
+
+<hr />
+
+<!-- High Performance -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="performance"><i>High Performance &amp; Low Latency</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-12">
+    <p class="lead">Flink's data streaming runtime achieves high throughput 
rates and low latency with little configuration.
+    The charts below show the performance of a distributed item counting task, 
requiring streaming data shuffles.</p>
+  </div>
+</div>
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12 img-column">
+    <img src="/img/features/streaming_performance.png" alt="Performance of 
data streaming applications" style="width:75%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Event Time Streaming -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="event_time"><i>Support for Event Time and Out-of-Order 
Events</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink supports stream processing and windowing with 
<b>Event Time</b> semantics.</p>
+    <p class="lead">Event time makes it easy to compute over streams where 
events arrive out of order, and where events may arrive delayed.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/out_of_order_stream.png" alt="Event Time and 
Out-of-Order Streams" style="width:100%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Exactly-once Semantics -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="exactly_once"><i>Exactly-once Semantics for Stateful 
Computations</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Streaming applications can maintain custom state during 
their computation.</p>
+    <p class="lead">Flink's checkpointing mechanism ensures <i>exactly 
once</i> semantics for the state in the presence of failures.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/exactly_once_state.png" alt="Exactly-once 
Semantics for Stateful Computations" style="width:50%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Windowing -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="windows"><i>Highly flexible Streaming Windows</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink supports windows over time, count, or sessions, as 
well as data-driven windows.</p>
+    <p class="lead">Windows can be customized with flexible triggering 
conditions, to support sophisticated streaming patterns.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/windows.png" alt="Windows" style="width:100%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Continuous streaming -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="streaming_model"><i>Continuous Streaming Model with 
Backpressure</i></h1>
+  </div>
+</div>
+
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Data streaming applications are executed with continuous 
(long lived) operators.</p>
+    <p class="lead">Flink's streaming runtime has natural flow control: Slow 
data sinks backpressure faster sources.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/continuous_streams.png" alt="Continuous Streaming 
Model" style="width:60%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Lightweight distributed snapshots -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="snapshots"><i>Fault-tolerance via Lightweight Distributed 
Snapshots</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink's fault tolerance mechanism is based on 
Chandy-Lamport distributed snapshots.</p>
+    <p class="lead">The mechanism is lightweight, allowing the system to 
maintain high throughput rates and provide strong consistency guarantees at the 
same time.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/distributed_snapshots.png" alt="Lightweight 
Distributed Snapshots" style="width:40%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- --------------------------------------------- -->
+<!--                Batch
+<!-- --------------------------------------------- -->
+
+<div class="row" style="padding: 0 0 0 0">
+  <div class="col-sm-12" style="text-align: center;">
+    <h1 id="batch-on-streaming"><b>Batch and Streaming in One System</b></h1>
+  </div>
+</div>
+
+<hr />
+
+<!-- One Runtime for Streaming and Batch Processing -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="one_runtime"><i>One Runtime for Streaming and Batch 
Processing</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink uses one common runtime for data streaming 
applications and batch processing applications.</p>
+    <p class="lead">Batch processing applications run efficiently as special 
cases of stream processing applications.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/one_runtime.png" alt="Unified Runtime for Batch 
and Stream Data Analysis" style="width:50%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Memory Management -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="memory_management"><i>Memory Management</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink implements its own memory management inside the 
JVM.</p>
+    <p class="lead">Applications scale to data sizes beyond main memory and 
experience less garbage collection overhead.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/memory_heap_division.png" alt="Managed JVM Heap" 
style="width:50%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Iterations -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="iterations"><i>Iterations and Delta Iterations</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink has dedicated support for iterative computations (as 
in machine learning and graph analysis).</p>
+    <p class="lead">Delta iterations can exploit computational dependencies 
for faster convergence.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/iterations.png" alt="Performance of iterations and 
delta iterations" style="width:75%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- Optimizer -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="optimizer"><i>Program Optimizer</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Batch programs are automatically optimized to exploit 
situations where expensive operations (like shuffles and sorts) can be avoided, 
and when intermediate data should be cached.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/features/optimizer_choice.png" alt="Optimizer choosing 
between different execution strategies" style="width:100%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- --------------------------------------------- -->
+<!--             APIs and Libraries
+<!-- --------------------------------------------- -->
+
+<div class="row" style="padding: 0 0 0 0">
+  <div class="col-sm-12" style="text-align: center;">
+    <h1 id="apis-and-libs"><b>APIs and Libraries</b></h1>
+  </div>
+</div>
+
+<hr />
+
+<!-- Data Streaming API -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="streaming_api"><i>Streaming Data Applications</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-5">
+    <p class="lead">The <i>DataStream</i> API supports functional 
transformations on data streams, with user-defined state, and flexible 
windows.</p>
+    <p class="lead">The example shows how to compute a sliding histogram of 
word occurrences of a data stream of texts.</p>
+  </div>
+  <div class="col-sm-7">
+    <p class="lead">WindowWordCount in Flink's DataStream API</p>
+
+<div class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span class="k">case</span> <span class="k">class</span> 
<span class="nc">Word</span><span class="o">(</span><span 
class="n">word</span><span class="k">:</span> <span 
class="kt">String</span><span class="o">,</span> <span 
class="n">freq</span><span class="k">:</span> <span class="kt">Long</span><span 
class="o">)</span>
+
+<span class="k">val</span> <span class="n">texts</span><span 
class="k">:</span> <span class="kt">DataStream</span><span 
class="o">[</span><span class="kt">String</span><span class="o">]</span> <span 
class="k">=</span> <span class="o">...</span>
+
+<span class="k">val</span> <span class="n">counts</span> <span 
class="k">=</span> <span class="n">text</span>
+  <span class="o">.</span><span class="n">flatMap</span> <span 
class="o">{</span> <span class="n">line</span> <span class="k">=&gt;</span> 
<span class="n">line</span><span class="o">.</span><span 
class="n">split</span><span class="o">(</span><span 
class="s">&quot;\\W+&quot;</span><span class="o">)</span> <span 
class="o">}</span>
+  <span class="o">.</span><span class="n">map</span> <span class="o">{</span> 
<span class="n">token</span> <span class="k">=&gt;</span> <span 
class="nc">Word</span><span class="o">(</span><span class="n">token</span><span 
class="o">,</span> <span class="mi">1</span><span class="o">)</span> <span 
class="o">}</span>
+  <span class="o">.</span><span class="n">keyBy</span><span 
class="o">(</span><span class="s">&quot;word&quot;</span><span 
class="o">)</span>
+  <span class="o">.</span><span class="n">timeWindow</span><span 
class="o">(</span><span class="nc">Time</span><span class="o">.</span><span 
class="n">seconds</span><span class="o">(</span><span class="mi">5</span><span 
class="o">),</span> <span class="nc">Time</span><span class="o">.</span><span 
class="n">seconds</span><span class="o">(</span><span class="mi">1</span><span 
class="o">))</span>
+  <span class="o">.</span><span class="n">sum</span><span 
class="o">(</span><span class="s">&quot;freq&quot;</span><span 
class="o">)</span></code></pre></div>
+
+  </div>
+</div>
+
+<hr />
+
+<!-- Batch Processing API -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="batch_api"><i>Batch Processing Applications</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-5">
+    <p class="lead">Flink's <i>DataSet</i> API lets you write beautiful 
type-safe and maintainable code in Java or Scala. It supports a wide range of 
data types beyond key/value pairs, and a wealth of operators.</p>
+    <p class="lead">The example shows the core loop of the PageRank algorithm 
for graphs.</p>
+  </div>
+  <div class="col-sm-7">
+
+<div class="highlight"><pre><code class="language-scala" 
data-lang="scala"><span class="k">case</span> <span class="k">class</span> 
<span class="nc">Page</span><span class="o">(</span><span 
class="n">pageId</span><span class="k">:</span> <span 
class="kt">Long</span><span class="o">,</span> <span class="n">rank</span><span 
class="k">:</span> <span class="kt">Double</span><span class="o">)</span>
+<span class="k">case</span> <span class="k">class</span> <span 
class="nc">Adjacency</span><span class="o">(</span><span 
class="n">id</span><span class="k">:</span> <span class="kt">Long</span><span 
class="o">,</span> <span class="n">neighbors</span><span class="k">:</span> 
<span class="kt">Array</span><span class="o">[</span><span 
class="kt">Long</span><span class="o">])</span>
+
+<span class="k">val</span> <span class="n">result</span> <span 
class="k">=</span> <span class="n">initialRanks</span><span 
class="o">.</span><span class="n">iterate</span><span class="o">(</span><span 
class="mi">30</span><span class="o">)</span> <span class="o">{</span> <span 
class="n">pages</span> <span class="k">=&gt;</span>
+  <span class="n">pages</span><span class="o">.</span><span 
class="n">join</span><span class="o">(</span><span 
class="n">adjacency</span><span class="o">).</span><span 
class="n">where</span><span class="o">(</span><span 
class="s">&quot;pageId&quot;</span><span class="o">).</span><span 
class="n">equalTo</span><span class="o">(</span><span 
class="s">&quot;id&quot;</span><span class="o">)</span> <span class="o">{</span>
+
+    <span class="o">(</span><span class="n">page</span><span 
class="o">,</span> <span class="n">adj</span><span class="o">,</span> <span 
class="n">out</span><span class="k">:</span> <span 
class="kt">Collector</span><span class="o">[</span><span 
class="kt">Page</span><span class="o">])</span> <span class="k">=&gt;</span> 
<span class="o">{</span>
+      <span class="n">out</span><span class="o">.</span><span 
class="n">collect</span><span class="o">(</span><span 
class="nc">Page</span><span class="o">(</span><span class="n">page</span><span 
class="o">.</span><span class="n">pageId</span><span class="o">,</span> <span 
class="mf">0.15</span> <span class="o">/</span> <span 
class="n">numPages</span><span class="o">))</span>
+
+      <span class="k">val</span> <span class="n">nLen</span> <span 
class="k">=</span> <span class="n">adj</span><span class="o">.</span><span 
class="n">neighbors</span><span class="o">.</span><span class="n">length</span>
+      <span class="k">for</span> <span class="o">(</span><span 
class="n">n</span> <span class="k">&lt;-</span> <span class="n">adj</span><span 
class="o">.</span><span class="n">neighbors</span><span class="o">)</span> 
<span class="o">{</span>
+        <span class="n">out</span><span class="o">.</span><span 
class="n">collect</span><span class="o">(</span><span 
class="nc">Page</span><span class="o">(</span><span class="n">n</span><span 
class="o">,</span> <span class="mf">0.85</span> <span class="o">*</span> <span 
class="n">page</span><span class="o">.</span><span class="n">rank</span> <span 
class="o">/</span> <span class="n">nLen</span><span class="o">))</span>
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">}</span>
+  <span class="o">.</span><span class="n">groupBy</span><span 
class="o">(</span><span class="s">&quot;pageId&quot;</span><span 
class="o">).</span><span class="n">sum</span><span class="o">(</span><span 
class="s">&quot;rank&quot;</span><span class="o">)</span>
+<span class="o">}</span></code></pre></div>
+
+  </div>
+</div>
+
+<hr />
+
+<!-- Library Ecosystem -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="libraries"><i>Library Ecosystem</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink's stack offers libraries with high-level APIs for 
different use cases: Complex Event Processing, Machine Learning, and Graph 
Analytics.</p>
+    <p class="lead">The libraries are currently in <i>beta</i> status and are 
heavily developed.</p>
+  </div>
+  <div class="col-sm-6 img-column">
+    <img src="/img/flink-stack-frontpage.png" alt="Flink Stack with Libraries" 
style="width:100%" />
+  </div>
+</div>
+
+<hr />
+
+<!-- --------------------------------------------- -->
+<!--             Ecosystem
+<!-- --------------------------------------------- -->
+
+<div class="row" style="padding: 0 0 0 0">
+  <div class="col-sm-12" style="text-align: center;">
+    <h1><b>Ecosystem</b></h1>
+  </div>
+</div>
+
+<hr />
+
+<!-- Ecosystem -->
+<div class="row" style="padding: 0 0 2em 0">
+  <div class="col-sm-12">
+    <h1 id="ecosystem"><i>Broad Integration</i></h1>
+  </div>
+</div>
+<div class="row">
+  <div class="col-sm-6">
+    <p class="lead">Flink is integrated with many other projects in the 
open-source data processing ecosystem.</p>
+    <p class="lead">Flink runs on YARN, works with HDFS, streams data from 
Kafka, can execute Hadoop program code, and connects to various other data 
storage systems.</p>
+  </div>
+  <div class="col-sm-6  img-column">
+    <img src="/img/features/ecosystem_logos.png" alt="Other projects that 
Flink is integrated with" style="width:75%" />
+  </div>
+</div>
+
+
+  </div>
+</div>
+
+      </div>
+    </div>
+
+    <hr />
+
+    <div class="row">
+      <div class="footer text-center col-sm-12">
+        <p>Copyright Â© 2014-2016 <a href="http://apache.org";>The Apache 
Software Foundation</a>. All Rights Reserved.</p>
+        <p>Apache Flink, Apache, and the Apache feather logo are either 
registered trademarks or trademarks of The Apache Software Foundation.</p>
+        <p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a 
href="/blog/feed.xml">RSS feed</a></p>
+      </div>
+    </div>
+    </div><!-- /.container -->
+
+    <!-- Include all compiled plugins (below), or include individual files as 
needed -->
+    <script 
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js";></script>
+    <script src="/js/codetabs.js"></script>
+    <script src="/js/stickysidebar.js"></script>
+
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-52545728-1', 'auto');
+      ga('send', 'pageview');
+    </script>
+  </body>
+</html>

[32/39] flink-web git commit: Rebuild site

Reply via email to