Remove references to Java 7 and Hadoop support before 2.5
Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/ae58782b Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/ae58782b Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/ae58782b Branch: refs/heads/asf-site Commit: ae58782baafff924a904e01197400104d85471f1 Parents: fe49ab1 Author: Sean Owen <so...@cloudera.com> Authored: Tue Feb 14 21:33:56 2017 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Tue Feb 14 21:35:47 2017 +0000 ---------------------------------------------------------------------- developer-tools.md | 7 ++-- js/downloads.js | 18 +++------ release-process.md | 6 +-- site/committers.html | 40 +++++++++++--------- site/community.html | 8 ++-- site/contributing.html | 20 +++++----- site/developer-tools.html | 21 +++++----- site/documentation.html | 5 ++- site/js/downloads.js | 18 +++------ site/news/index.html | 10 ++--- site/news/spark-0-9-1-released.html | 2 +- site/news/spark-0-9-2-released.html | 2 +- site/news/spark-1-1-0-released.html | 2 +- site/news/spark-1-2-2-released.html | 2 +- site/news/spark-and-shark-in-the-news.html | 2 +- .../spark-summit-east-2015-videos-posted.html | 2 +- site/release-process.html | 8 ++-- site/releases/spark-release-0-8-0.html | 4 +- site/releases/spark-release-0-9-1.html | 20 +++++----- site/releases/spark-release-1-0-1.html | 8 ++-- site/releases/spark-release-1-0-2.html | 2 +- site/releases/spark-release-1-1-0.html | 6 +-- site/releases/spark-release-1-2-0.html | 2 +- site/releases/spark-release-1-3-0.html | 6 +-- site/releases/spark-release-1-3-1.html | 6 +-- site/releases/spark-release-1-4-0.html | 4 +- site/releases/spark-release-1-5-0.html | 30 +++++++-------- site/releases/spark-release-1-6-0.html | 20 +++++----- site/releases/spark-release-2-0-0.html | 36 +++++++++--------- site/releases/spark-release-2-1-0.html | 24 ++++++------ site/sitemap.xml | 12 +++--- site/third-party-projects.html | 2 +- site/versioning-policy.html | 2 +- 33 files changed, 174 insertions(+), 183 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/developer-tools.md ---------------------------------------------------------------------- diff --git a/developer-tools.md b/developer-tools.md index 77d225f..e8853b8 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -115,8 +115,7 @@ When running tests for a pull request on Jenkins, you can add special phrases to your pull request to change testing behavior. This includes: - `[test-maven]` - signals to test the pull request using maven -- `[test-hadoop1.0]` - signals to test using Spark's Hadoop 1.0 profile (other options include -Hadoop 2.0, 2.2, and 2.3) +- `[test-hadoop2.7]` - signals to test using Spark's Hadoop 2.7 profile <h3>Organizing Imports</h3> @@ -143,8 +142,8 @@ automatically update the IntelliJ project. - As documented in <a href="http://spark.apache.org/docs/latest/building-spark.html">Building Spark</a>, some build configurations require specific profiles to be enabled. The same profiles that are enabled with `-P[profile name]` above may be enabled on the -Profiles screen in the Import wizard. For example, if developing for Hadoop 2.4 with YARN support, -enable profiles yarn and hadoop-2.4. These selections can be changed later by accessing the +Profiles screen in the Import wizard. For example, if developing for Hadoop 2.7 with YARN support, +enable profiles `yarn` and `hadoop-2.7`. These selections can be changed later by accessing the "Maven Projects" tool window from the View menu, and expanding the Profiles section. Other tips: http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/js/downloads.js ---------------------------------------------------------------------- diff --git a/js/downloads.js b/js/downloads.js index 36a04c7..04a6b13 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -16,26 +16,18 @@ var hadoop2p3 = {pretty: "Pre-built for Hadoop 2.3", tag: "hadoop2.3"}; var hadoop2p4 = {pretty: "Pre-built for Hadoop 2.4", tag: "hadoop2.4"}; var hadoop2p6 = {pretty: "Pre-built for Hadoop 2.6", tag: "hadoop2.6"}; var hadoop2p7 = {pretty: "Pre-built for Hadoop 2.7 and later", tag: "hadoop2.7"}; -//var mapr3 = {pretty: "Pre-built for MapR 3.X", tag: "mapr3"}; -//var mapr4 = {pretty: "Pre-built for MapR 4.X", tag: "mapr4"}; - -// 0.7+ -//var packagesV1 = [hadoop1, cdh4, sources]; -// 0.8.1+ -//var packagesV2 = [hadoop2].concat(packagesV1); -// 1.0.1+ -//var packagesV3 = [mapr3, mapr4].concat(packagesV2); -// 1.1.0+ -//var packagesV4 = [hadoop2p4, hadoop2p3, mapr3, mapr4].concat(packagesV1); -// 1.3.1+ -//var packagesV5 = [hadoop2p6].concat(packagesV4); + // 1.4.0+ var packagesV6 = [hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, hadoop1, cdh4, sources]; // 2.0.0+ var packagesV7 = [hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, sources]; +// 2.2.0+ +var packagesV8 = [hadoop2p7, hadoop2p6, hadoopFree, sources] // addRelease("2.0.0-preview", new Date("05/24/2016"), sources.concat(packagesV7), true, false); +//addRelease("2.2.0", new Date("x/x/2017"), packagesV8, true); + addRelease("2.1.0", new Date("12/28/2016"), packagesV7, true); addRelease("2.0.2", new Date("11/14/2016"), packagesV7, true); addRelease("2.0.1", new Date("10/03/2016"), packagesV7, true); http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/release-process.md ---------------------------------------------------------------------- diff --git a/release-process.md b/release-process.md index 0de2adc..28ecaad 100644 --- a/release-process.md +++ b/release-process.md @@ -42,7 +42,7 @@ places to change are: - **Maven build**. Ensure that the version in all the `pom.xml` files is `<SPARK-VERSION>-SNAPSHOT` (e.g. `1.1.1-SNAPSHOT`). This will be changed to `<SPARK-VERSION>` (e.g. 1.1.1) automatically by Maven when cutting the release. Note that there are a few exceptions that should just use -`<SPARK-VERSION>`, like `extras/java8-tests/pom.xml`. These modules are not published as artifacts. +`<SPARK-VERSION>`. These modules are not published as artifacts. - **Spark REPLs**. Look for the Spark ASCII art in `SparkILoopInit.scala` for the Scala shell and in `shell.py` for the Python REPL. - **Docs**. Search for VERSION in `docs/_config.yml` @@ -183,13 +183,13 @@ You can audit the ec2 set-up by launching a cluster and running this audit scrip The website repository is located at <a href="https://github.com/apache/spark-website">https://github.com/apache/spark-website</a>. -Ensure the docs were generated with the PRODUCTION=1 environment variable and with Java 7. +Ensure the docs were generated with the PRODUCTION=1 environment variable. ``` # Build the latest docs $ git checkout v1.1.1 $ cd docs -$ JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build +$ PRODUCTION=1 jekyll build # Copy the new documentation to apache $ git clone https://github.com/apache/spark-website http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/committers.html ---------------------------------------------------------------------- diff --git a/site/committers.html b/site/committers.html index 0d692e3..d570328 100644 --- a/site/committers.html +++ b/site/committers.html @@ -484,24 +484,30 @@ follow-up can be well communicated to all Spark developers. (great!), but you risk introducing new or even worse bugs in maintenance releases (bad!). The decision point is when you have a bug fix and it’s not clear whether it is worth backporting.</p> -<p>I think the following facets are important to consider: -- Backports are an extremely valuable service to the community and should be considered for -any bug fix. -- Introducing a new bug in a maintenance release must be avoided at all costs. It over time would -erode confidence in our release process. -- Distributions or advanced users can always backport risky patches on their own, if they see fit.</p> - -<p>For me, the consequence of these is that we should backport in the following situations: -- Both the bug and the fix are well understood and isolated. Code being modified is well tested. -- The bug being addressed is high priority to the community. -- The backported fix does not vary widely from the master branch fix.</p> - -<p>We tend to avoid backports in the converse situations: -- The bug or fix are not well understood. For instance, it relates to interactions between complex +<p>I think the following facets are important to consider:</p> +<ul> + <li>Backports are an extremely valuable service to the community and should be considered for +any bug fix.</li> + <li>Introducing a new bug in a maintenance release must be avoided at all costs. It over time would +erode confidence in our release process.</li> + <li>Distributions or advanced users can always backport risky patches on their own, if they see fit.</li> +</ul> + +<p>For me, the consequence of these is that we should backport in the following situations:</p> +<ul> + <li>Both the bug and the fix are well understood and isolated. Code being modified is well tested.</li> + <li>The bug being addressed is high priority to the community.</li> + <li>The backported fix does not vary widely from the master branch fix.</li> +</ul> + +<p>We tend to avoid backports in the converse situations:</p> +<ul> + <li>The bug or fix are not well understood. For instance, it relates to interactions between complex components or third party libraries (e.g. Hadoop libraries). The code is not well tested outside -of the immediate bug being fixed. -- The bug is not clearly a high priority for the community. -- The backported fix is widely different from the master branch fix.</p> +of the immediate bug being fixed.</li> + <li>The bug is not clearly a high priority for the community.</li> + <li>The backported fix is widely different from the master branch fix.</li> +</ul> </div> </div> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/community.html ---------------------------------------------------------------------- diff --git a/site/community.html b/site/community.html index 154d353..9df9622 100644 --- a/site/community.html +++ b/site/community.html @@ -212,14 +212,14 @@ as it is an active forum for Spark users’ questions and answers.</p> <a href="http://stackoverflow.com/questions/tagged/apache-spark"><code>apache-spark</code></a> tag to see if your question has already been answered</li> <li>Search the nabble archive for -<a href="http://apache-spark-user-list.1001560.n3.nabble.com/">us...@spark.apache.org</a> </li> +<a href="http://apache-spark-user-list.1001560.n3.nabble.com/">us...@spark.apache.org</a></li> </ul> </li> - <li>Please follow the StackOverflow <a href="http://stackoverflow.com/help/how-to-ask">code of conduct</a> </li> + <li>Please follow the StackOverflow <a href="http://stackoverflow.com/help/how-to-ask">code of conduct</a></li> <li>Always use the <code>apache-spark</code> tag when asking questions</li> <li>Please also use a secondary tag to specify components so subject matter experts can more easily find them. Examples include: <code>pyspark</code>, <code>spark-dataframe</code>, <code>spark-streaming</code>, <code>spark-r</code>, <code>spark-mllib</code>, -<code>spark-ml</code>, <code>spark-graphx</code>, <code>spark-graphframes</code>, <code>spark-tensorframes</code>, etc. </li> +<code>spark-ml</code>, <code>spark-graphx</code>, <code>spark-graphframes</code>, <code>spark-tensorframes</code>, etc.</li> <li>Please do not cross-post between StackOverflow and the mailing lists</li> <li>No jobs, sales, or solicitation is permitted on StackOverflow</li> </ul> @@ -253,7 +253,7 @@ project, and scenarios, it is recommended you use the u...@spark.apache.org mail <li>Search StackOverflow at <a href="http://stackoverflow.com/questions/tagged/apache-spark"><code>apache-spark</code></a> to see if your question has already been answered</li> <li>Search the nabble archive for -<a href="http://apache-spark-user-list.1001560.n3.nabble.com/">us...@spark.apache.org</a> </li> +<a href="http://apache-spark-user-list.1001560.n3.nabble.com/">us...@spark.apache.org</a></li> </ul> </li> <li>Tagging the subject line of your email will help you get a faster response, e.g. http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/contributing.html ---------------------------------------------------------------------- diff --git a/site/contributing.html b/site/contributing.html index 465ce6f..7d74c22 100644 --- a/site/contributing.html +++ b/site/contributing.html @@ -243,7 +243,7 @@ edit the Markdown source files in Spark’s <a href="https://github.com/apache/spark/tree/master/docs"><code>docs/</code></a> directory, whose <code>README</code> file shows how to build the documentation locally to test your changes. The process to propose a doc change is otherwise the same as the process for proposing code -changes below. </p> +changes below.</p> <p>To propose a change to the rest of the documentation (that is, docs that do <em>not</em> appear under <a href="https://spark.apache.org/docs/">https://spark.apache.org/docs/</a>), similarly, edit the Markdown in the @@ -346,7 +346,7 @@ or similar search tools. Often, the problem has been discussed before, with a resolution that doesn’t require a code change, or recording what kinds of changes will not be accepted as a resolution.</li> <li>Search JIRA for existing issues: -<a href="https://issues.apache.org/jira/browse/SPARK">https://issues.apache.org/jira/browse/SPARK</a> </li> +<a href="https://issues.apache.org/jira/browse/SPARK">https://issues.apache.org/jira/browse/SPARK</a></li> <li>Type <code>spark [search terms]</code> at the top right search box. If a logically similar issue already exists, then contribute to the discussion on the existing JIRA and pull request first, instead of creating a new one.</li> @@ -397,7 +397,7 @@ rather than receive iterations of review.</p> <li>Introduces complex new functionality, especially an API that needs to be supported</li> <li>Adds complexity that only helps a niche use case</li> <li>Adds user-space functionality that does not need to be maintained in Spark, but could be hosted -externally and indexed by <a href="http://spark-packages.org/">spark-packages.org</a> </li> +externally and indexed by <a href="http://spark-packages.org/">spark-packages.org</a></li> <li>Changes a public API or semantics (rarely allowed)</li> <li>Adds large dependencies</li> <li>Changes versions of existing dependencies</li> @@ -456,7 +456,7 @@ Example: <code>Fix typos in Foo scaladoc</code></li> and there is a workaround</li> <li>Minor: a niche use case is missing some support, but it does not affect usage or is easily worked around</li> - <li>Trivial: a nice-to-have change but unlikely to be any problem in practice otherwise </li> + <li>Trivial: a nice-to-have change but unlikely to be any problem in practice otherwise</li> </ol> </li> <li><strong>Component</strong></li> @@ -680,11 +680,13 @@ instead of Scala docs style.</p> <p>Always import packages using absolute paths (e.g. <code>scala.util.Random</code>) instead of relative ones (e.g. <code>util.Random</code>). In addition, sort imports in the following order -(use alphabetical order within each group): -- <code>java.*</code> and <code>javax.*</code> -- <code>scala.*</code> -- Third-party libraries (<code>org.*</code>, <code>com.*</code>, etc) -- Project classes (<code>org.apache.spark.*</code>)</p> +(use alphabetical order within each group):</p> +<ul> + <li><code>java.*</code> and <code>javax.*</code></li> + <li><code>scala.*</code></li> + <li>Third-party libraries (<code>org.*</code>, <code>com.*</code>, etc)</li> + <li>Project classes (<code>org.apache.spark.*</code>)</li> +</ul> <p>The <a href="https://plugins.jetbrains.com/plugin/7350">IntelliJ import organizer plugin</a> can organize imports for you. Use this configuration for the plugin (configured under http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/developer-tools.html ---------------------------------------------------------------------- diff --git a/site/developer-tools.html b/site/developer-tools.html index 24b1ce1..ed84669 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -295,8 +295,7 @@ your pull request to change testing behavior. This includes:</p> <ul> <li><code>[test-maven]</code> - signals to test the pull request using maven</li> - <li><code>[test-hadoop1.0]</code> - signals to test using Spark’s Hadoop 1.0 profile (other options include -Hadoop 2.0, 2.2, and 2.3)</li> + <li><code>[test-hadoop2.7]</code> - signals to test using Spark’s Hadoop 2.7 profile</li> </ul> <h3>Organizing Imports</h3> @@ -325,8 +324,8 @@ automatically update the IntelliJ project.</li> <li>As documented in <a href="http://spark.apache.org/docs/latest/building-spark.html">Building Spark</a>, some build configurations require specific profiles to be enabled. The same profiles that are enabled with <code>-P[profile name]</code> above may be enabled on the -Profiles screen in the Import wizard. For example, if developing for Hadoop 2.4 with YARN support, -enable profiles yarn and hadoop-2.4. These selections can be changed later by accessing the +Profiles screen in the Import wizard. For example, if developing for Hadoop 2.7 with YARN support, +enable profiles <code>yarn</code> and <code>hadoop-2.7</code>. These selections can be changed later by accessing the “Maven Projects” tool window from the View menu, and expanding the Profiles section.</li> </ul> @@ -354,13 +353,13 @@ compiler options” field. It will work then although the option will come reimports. If you try to build any of the projects using quasiquotes (eg., sql) then you will need to make that jar a compiler plugin (just below “Additional compiler options”). Otherwise you will see errors like: -<code> -/Users/irashid/github/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala + <pre><code>/Users/irashid/github/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala Error:(147, 9) value q is not a member of StringContext Note: implicit class Evaluate2 is not applicable here because it comes after the application point and it lacks an explicit result type q""" ^ -</code></li> +</code></pre> + </li> </ul> <h4>Eclipse</h4> @@ -443,12 +442,12 @@ consider mirroring this file or including it on a custom AMI.</li> <li>Copy the expanded YourKit files to each node using copy-dir: <code>~/spark-ec2/copy-dir /root/yjp-12.0.5</code></li> <li>Configure the Spark JVMs to use the YourKit profiling agent by editing <code>~/spark/conf/spark-env.sh</code> and adding the lines -<code> -SPARK_DAEMON_JAVA_OPTS+=" -agentpath:/root/yjp-12.0.5/bin/linux-x86-64/libyjpagent.so=sampling" + <pre><code>SPARK_DAEMON_JAVA_OPTS+=" -agentpath:/root/yjp-12.0.5/bin/linux-x86-64/libyjpagent.so=sampling" export SPARK_DAEMON_JAVA_OPTS SPARK_JAVA_OPTS+=" -agentpath:/root/yjp-12.0.5/bin/linux-x86-64/libyjpagent.so=sampling" export SPARK_JAVA_OPTS -</code></li> +</code></pre> + </li> <li>Copy the updated configuration to each node: <code>~/spark-ec2/copy-dir ~/spark/conf/spark-env.sh</code></li> <li>Restart your Spark cluster: <code>~/spark/bin/stop-all.sh</code> and <code>~/spark/bin/start-all.sh</code></li> <li>By default, the YourKit profiler agents use ports 10001-10010. To connect the YourKit desktop @@ -473,7 +472,7 @@ cluster with the same name, your security group settings will be re-used.</li> <h4>In Spark unit tests</h4> <p>When running Spark tests through SBT, add <code>javaOptions in Test += "-agentpath:/path/to/yjp"</code> -to <code>SparkBuild.scala</code> to launch the tests with the YourKit profiler agent enabled. +to <code>SparkBuild.scala</code> to launch the tests with the YourKit profiler agent enabled.<br /> The platform-specific paths to the profiler agents are listed in the <a href="http://www.yourkit.com/docs/80/help/agent.jsp">YourKit documentation</a>.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/documentation.html ---------------------------------------------------------------------- diff --git a/site/documentation.html b/site/documentation.html index 6e73003..fb4b6b5 100644 --- a/site/documentation.html +++ b/site/documentation.html @@ -266,12 +266,13 @@ </ul> <h4><a name="meetup-videos"></a>Meetup Talk Videos</h4> -<p>In addition to the videos listed below, you can also view <a href="http://www.meetup.com/spark-users/files/">all slides from Bay Area meetups here</a>. +<p>In addition to the videos listed below, you can also view <a href="http://www.meetup.com/spark-users/files/">all slides from Bay Area meetups here</a>.</p> <style type="text/css"> .video-meta-info { font-size: 0.95em; } -</style></p> +</style> + <ul> <li><a href="http://www.youtube.com/watch?v=NUQ-8to2XAk&list=PL-x35fyliRwiP3YteXbnhk0QGOtYLBT3a">Spark 1.0 and Beyond</a> (<a href="http://files.meetup.com/3138542/Spark%201.0%20Meetup.ppt">slides</a>) <span class="video-meta-info">by Patrick Wendell, at Cisco in San Jose, 2014-04-23</span></li> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/js/downloads.js ---------------------------------------------------------------------- diff --git a/site/js/downloads.js b/site/js/downloads.js index 36a04c7..04a6b13 100644 --- a/site/js/downloads.js +++ b/site/js/downloads.js @@ -16,26 +16,18 @@ var hadoop2p3 = {pretty: "Pre-built for Hadoop 2.3", tag: "hadoop2.3"}; var hadoop2p4 = {pretty: "Pre-built for Hadoop 2.4", tag: "hadoop2.4"}; var hadoop2p6 = {pretty: "Pre-built for Hadoop 2.6", tag: "hadoop2.6"}; var hadoop2p7 = {pretty: "Pre-built for Hadoop 2.7 and later", tag: "hadoop2.7"}; -//var mapr3 = {pretty: "Pre-built for MapR 3.X", tag: "mapr3"}; -//var mapr4 = {pretty: "Pre-built for MapR 4.X", tag: "mapr4"}; - -// 0.7+ -//var packagesV1 = [hadoop1, cdh4, sources]; -// 0.8.1+ -//var packagesV2 = [hadoop2].concat(packagesV1); -// 1.0.1+ -//var packagesV3 = [mapr3, mapr4].concat(packagesV2); -// 1.1.0+ -//var packagesV4 = [hadoop2p4, hadoop2p3, mapr3, mapr4].concat(packagesV1); -// 1.3.1+ -//var packagesV5 = [hadoop2p6].concat(packagesV4); + // 1.4.0+ var packagesV6 = [hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, hadoop1, cdh4, sources]; // 2.0.0+ var packagesV7 = [hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, sources]; +// 2.2.0+ +var packagesV8 = [hadoop2p7, hadoop2p6, hadoopFree, sources] // addRelease("2.0.0-preview", new Date("05/24/2016"), sources.concat(packagesV7), true, false); +//addRelease("2.2.0", new Date("x/x/2017"), packagesV8, true); + addRelease("2.1.0", new Date("12/28/2016"), packagesV7, true); addRelease("2.0.2", new Date("11/14/2016"), packagesV7, true); addRelease("2.0.1", new Date("10/03/2016"), packagesV7, true); http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/index.html ---------------------------------------------------------------------- diff --git a/site/news/index.html b/site/news/index.html index 8b113a6..99a4521 100644 --- a/site/news/index.html +++ b/site/news/index.html @@ -451,7 +451,7 @@ The Summit will contain <a href="https://spark-summit.org/2015/schedule/">presen <h3 class="entry-title"><a href="/news/spark-summit-east-2015-videos-posted.html">Spark Summit East 2015 Videos Posted</a></h3> <div class="entry-date">April 20, 2015</div> </header> - <div class="entry-content"><p>The videos and slides for Spark Summit East 2015 are now all <a href="http://spark-summit.org/east/2015">available online</a>. Watch them to get the latest news from the Spark community as well as use cases and applications built on top. </p> + <div class="entry-content"><p>The videos and slides for Spark Summit East 2015 are now all <a href="http://spark-summit.org/east/2015">available online</a>. Watch them to get the latest news from the Spark community as well as use cases and applications built on top.</p> </div> </article> @@ -461,7 +461,7 @@ The Summit will contain <a href="https://spark-summit.org/2015/schedule/">presen <h3 class="entry-title"><a href="/news/spark-1-2-2-released.html">Spark 1.2.2 and 1.3.1 released</a></h3> <div class="entry-date">April 17, 2015</div> </header> - <div class="entry-content"><p>We are happy to announce the availability of <a href="/releases/spark-release-1-2-2.html" title="Spark Release 1.2.2">Spark 1.2.2</a> and <a href="/releases/spark-release-1-3-1.html" title="Spark Release 1.3.1">Spark 1.3.1</a>! These are both maintenance releases that collectively feature the work of more than 90 developers. </p> + <div class="entry-content"><p>We are happy to announce the availability of <a href="/releases/spark-release-1-2-2.html" title="Spark Release 1.2.2">Spark 1.2.2</a> and <a href="/releases/spark-release-1-3-1.html" title="Spark Release 1.3.1">Spark 1.3.1</a>! These are both maintenance releases that collectively feature the work of more than 90 developers.</p> </div> </article> @@ -573,7 +573,7 @@ The Summit will contain <a href="https://spark-summit.org/2015/schedule/">presen </header> <div class="entry-content"><p>We are happy to announce the availability of <a href="/releases/spark-release-0-9-2.html" title="Spark Release 0.9.2"> Spark 0.9.2</a>! Apache Spark 0.9.2 is a maintenance release with bug fixes. We recommend all 0.9.x users to upgrade to this stable release. -Contributions to this release came from 28 developers. </p> +Contributions to this release came from 28 developers.</p> </div> </article> @@ -644,7 +644,7 @@ about the latest happenings in Spark.</p> <div class="entry-content"><p>We are happy to announce the availability of <a href="/releases/spark-release-0-9-1.html" title="Spark Release 0.9.1"> Spark 0.9.1</a>! Apache Spark 0.9.1 is a maintenance release with bug fixes, performance improvements, better stability with YARN and improved parity of the Scala and Python API. We recommend all 0.9.0 users to upgrade to this stable release. -Contributions to this release came from 37 developers. </p> +Contributions to this release came from 37 developers.</p> </div> </article> @@ -894,7 +894,7 @@ Over 450 Spark developers and enthusiasts from 13 countries and more than 180 co <li><a href="http://data-informed.com/spark-an-open-source-engine-for-iterative-data-mining/">DataInformed</a> interviewed two Spark users and wrote about their applications in anomaly detection, predictive analytics and data mining.</li> </ul> -<p>In other news, there will be a full day of tutorials on Spark and Shark at the <a href="http://strataconf.com/strata2013">O’Reilly Strata conference</a> in February. They include a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27438">introduction to Spark, Shark and BDAS</a> Tuesday morning, and a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27440">hands-on exercise session</a>. </p> +<p>In other news, there will be a full day of tutorials on Spark and Shark at the <a href="http://strataconf.com/strata2013">O’Reilly Strata conference</a> in February. They include a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27438">introduction to Spark, Shark and BDAS</a> Tuesday morning, and a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27440">hands-on exercise session</a>.</p> </div> </article> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-0-9-1-released.html ---------------------------------------------------------------------- diff --git a/site/news/spark-0-9-1-released.html b/site/news/spark-0-9-1-released.html index 653feac..cbd7707 100644 --- a/site/news/spark-0-9-1-released.html +++ b/site/news/spark-0-9-1-released.html @@ -198,7 +198,7 @@ <p>We are happy to announce the availability of <a href="/releases/spark-release-0-9-1.html" title="Spark Release 0.9.1"> Spark 0.9.1</a>! Apache Spark 0.9.1 is a maintenance release with bug fixes, performance improvements, better stability with YARN and improved parity of the Scala and Python API. We recommend all 0.9.0 users to upgrade to this stable release. -Contributions to this release came from 37 developers. </p> +Contributions to this release came from 37 developers.</p> <p>Visit the <a href="/releases/spark-release-0-9-1.html" title="Spark Release 0.9.1">release notes</a> to read about the new features, or <a href="/downloads.html">download</a> the release today.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-0-9-2-released.html ---------------------------------------------------------------------- diff --git a/site/news/spark-0-9-2-released.html b/site/news/spark-0-9-2-released.html index 1cb05e7..be9214e 100644 --- a/site/news/spark-0-9-2-released.html +++ b/site/news/spark-0-9-2-released.html @@ -197,7 +197,7 @@ <p>We are happy to announce the availability of <a href="/releases/spark-release-0-9-2.html" title="Spark Release 0.9.2"> Spark 0.9.2</a>! Apache Spark 0.9.2 is a maintenance release with bug fixes. We recommend all 0.9.x users to upgrade to this stable release. -Contributions to this release came from 28 developers. </p> +Contributions to this release came from 28 developers.</p> <p>Visit the <a href="/releases/spark-release-0-9-2.html" title="Spark Release 0.9.2">release notes</a> to read about the new features, or <a href="/downloads.html">download</a> the release today.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-1-1-0-released.html ---------------------------------------------------------------------- diff --git a/site/news/spark-1-1-0-released.html b/site/news/spark-1-1-0-released.html index c7dc50a..7722cc7 100644 --- a/site/news/spark-1-1-0-released.html +++ b/site/news/spark-1-1-0-released.html @@ -197,7 +197,7 @@ <p>We are happy to announce the availability of <a href="/releases/spark-release-1-1-0.html" title="Spark Release 1.1.0">Spark 1.1.0</a>! Spark 1.1.0 is the second release on the API-compatible 1.X line. It is Spark’s largest release ever, with contributions from 171 developers!</p> -<p>This release brings operational and performance improvements in Spark core including a new implementation of the Spark shuffle designed for very large scale workloads. Spark 1.1 adds significant extensions to the newest Spark modules, MLlib and Spark SQL. Spark SQL introduces a JDBC server, byte code generation for fast expression evaluation, a public types API, JSON support, and other features and optimizations. MLlib introduces a new statistics libary along with several new algorithms and optimizations. Spark 1.1 also builds out Sparkâs Python support and adds new components to the Spark Streaming module. </p> +<p>This release brings operational and performance improvements in Spark core including a new implementation of the Spark shuffle designed for very large scale workloads. Spark 1.1 adds significant extensions to the newest Spark modules, MLlib and Spark SQL. Spark SQL introduces a JDBC server, byte code generation for fast expression evaluation, a public types API, JSON support, and other features and optimizations. MLlib introduces a new statistics libary along with several new algorithms and optimizations. Spark 1.1 also builds out Sparkâs Python support and adds new components to the Spark Streaming module.</p> <p>Visit the <a href="/releases/spark-release-1-1-0.html" title="Spark Release 1.1.0">release notes</a> to read about the new features, or <a href="/downloads.html">download</a> the release today.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-1-2-2-released.html ---------------------------------------------------------------------- diff --git a/site/news/spark-1-2-2-released.html b/site/news/spark-1-2-2-released.html index 3d3bb32..a54bb20 100644 --- a/site/news/spark-1-2-2-released.html +++ b/site/news/spark-1-2-2-released.html @@ -195,7 +195,7 @@ <h2>Spark 1.2.2 and 1.3.1 released</h2> -<p>We are happy to announce the availability of <a href="/releases/spark-release-1-2-2.html" title="Spark Release 1.2.2">Spark 1.2.2</a> and <a href="/releases/spark-release-1-3-1.html" title="Spark Release 1.3.1">Spark 1.3.1</a>! These are both maintenance releases that collectively feature the work of more than 90 developers. </p> +<p>We are happy to announce the availability of <a href="/releases/spark-release-1-2-2.html" title="Spark Release 1.2.2">Spark 1.2.2</a> and <a href="/releases/spark-release-1-3-1.html" title="Spark Release 1.3.1">Spark 1.3.1</a>! These are both maintenance releases that collectively feature the work of more than 90 developers.</p> <p>To download either release, visit the <a href="/downloads.html">downloads</a> page.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-and-shark-in-the-news.html ---------------------------------------------------------------------- diff --git a/site/news/spark-and-shark-in-the-news.html b/site/news/spark-and-shark-in-the-news.html index 51a3843..deb7a6e 100644 --- a/site/news/spark-and-shark-in-the-news.html +++ b/site/news/spark-and-shark-in-the-news.html @@ -205,7 +205,7 @@ <li><a href="http://data-informed.com/spark-an-open-source-engine-for-iterative-data-mining/">DataInformed</a> interviewed two Spark users and wrote about their applications in anomaly detection, predictive analytics and data mining.</li> </ul> -<p>In other news, there will be a full day of tutorials on Spark and Shark at the <a href="http://strataconf.com/strata2013">O’Reilly Strata conference</a> in February. They include a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27438">introduction to Spark, Shark and BDAS</a> Tuesday morning, and a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27440">hands-on exercise session</a>. </p> +<p>In other news, there will be a full day of tutorials on Spark and Shark at the <a href="http://strataconf.com/strata2013">O’Reilly Strata conference</a> in February. They include a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27438">introduction to Spark, Shark and BDAS</a> Tuesday morning, and a three-hour <a href="http://strataconf.com/strata2013/public/schedule/detail/27440">hands-on exercise session</a>.</p> <p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/news/spark-summit-east-2015-videos-posted.html ---------------------------------------------------------------------- diff --git a/site/news/spark-summit-east-2015-videos-posted.html b/site/news/spark-summit-east-2015-videos-posted.html index 962aa1e..a70d565 100644 --- a/site/news/spark-summit-east-2015-videos-posted.html +++ b/site/news/spark-summit-east-2015-videos-posted.html @@ -195,7 +195,7 @@ <h2>Spark Summit East 2015 Videos Posted</h2> -<p>The videos and slides for Spark Summit East 2015 are now all <a href="http://spark-summit.org/east/2015">available online</a>. Watch them to get the latest news from the Spark community as well as use cases and applications built on top. </p> +<p>The videos and slides for Spark Summit East 2015 are now all <a href="http://spark-summit.org/east/2015">available online</a>. Watch them to get the latest news from the Spark community as well as use cases and applications built on top.</p> <p>If you like what you see, consider joining us at the <a href="http://spark-summit.org/2015/agenda">2015 Spark Summit</a> in San Francisco.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/release-process.html ---------------------------------------------------------------------- diff --git a/site/release-process.html b/site/release-process.html index 4e21109..eae0675 100644 --- a/site/release-process.html +++ b/site/release-process.html @@ -217,7 +217,7 @@ <ol> <li>updating the Spark website</li> <li>writing release notes</li> - <li>announcing the release </li> + <li>announcing the release</li> </ol> </li> </ol> @@ -239,7 +239,7 @@ places to change are:</p> <li><strong>Maven build</strong>. Ensure that the version in all the <code>pom.xml</code> files is <code><SPARK-VERSION>-SNAPSHOT</code> (e.g. <code>1.1.1-SNAPSHOT</code>). This will be changed to <code><SPARK-VERSION></code> (e.g. 1.1.1) automatically by Maven when cutting the release. Note that there are a few exceptions that should just use -<code><SPARK-VERSION></code>, like <code>extras/java8-tests/pom.xml</code>. These modules are not published as artifacts.</li> +<code><SPARK-VERSION></code>. These modules are not published as artifacts.</li> <li><strong>Spark REPLs</strong>. Look for the Spark ASCII art in <code>SparkILoopInit.scala</code> for the Scala shell and in <code>shell.py</code> for the Python REPL.</li> <li><strong>Docs</strong>. Search for VERSION in <code>docs/_config.yml</code></li> @@ -383,12 +383,12 @@ You can audit the ec2 set-up by launching a cluster and running this audit scrip <p>The website repository is located at <a href="https://github.com/apache/spark-website">https://github.com/apache/spark-website</a>. -Ensure the docs were generated with the PRODUCTION=1 environment variable and with Java 7.</p> +Ensure the docs were generated with the PRODUCTION=1 environment variable.</p> <pre><code># Build the latest docs $ git checkout v1.1.1 $ cd docs -$ JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build +$ PRODUCTION=1 jekyll build # Copy the new documentation to apache $ git clone https://github.com/apache/spark-website http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-0-8-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-0-8-0.html b/site/releases/spark-release-0-8-0.html index 342f97b..145ebc5 100644 --- a/site/releases/spark-release-0-8-0.html +++ b/site/releases/spark-release-0-8-0.html @@ -219,13 +219,13 @@ <p>Sparkâs internal job scheduler has been refactored and extended to include more sophisticated scheduling policies. In particular, a <a href="http://spark.incubator.apache.org/docs/0.8.0/job-scheduling.html#scheduling-within-an-application">fair scheduler</a> implementation now allows multiple users to share an instance of Spark, which helps users running shorter jobs to achieve good performance, even when longer-running jobs are running in parallel. Support for topology-aware scheduling has been extended, including the ability to take into account rack locality and support for multiple executors on a single machine.</p> <h3 id="easier-deployment-and-linking">Easier Deployment and Linking</h3> -<p>User programs can now link to Spark no matter which Hadoop version they need, without having to publish a version of <code>spark-core</code> specifically for that Hadoop version. An explanation of how to link against different Hadoop versions is provided <a href="http://spark.incubator.apache.org/docs/0.8.0/scala-programming-guide.html#linking-with-spark">here</a>. </p> +<p>User programs can now link to Spark no matter which Hadoop version they need, without having to publish a version of <code>spark-core</code> specifically for that Hadoop version. An explanation of how to link against different Hadoop versions is provided <a href="http://spark.incubator.apache.org/docs/0.8.0/scala-programming-guide.html#linking-with-spark">here</a>.</p> <h3 id="expanded-ec2-capabilities">Expanded EC2 Capabilities</h3> <p>Sparkâs EC2 scripts now support launching in any availability zone. Support has also been added for EC2 instance types which use the newer âHVMâ architecture. This includes the cluster compute (cc1/cc2) family of instance types. Weâve also added support for running newer versions of HDFS alongside Spark. Finally, weâve added the ability to launch clusters with maintenance releases of Spark in addition to launching the newest release.</p> <h3 id="improved-documentation">Improved Documentation</h3> -<p>This release adds documentation about cluster hardware provisioning and inter-operation with common Hadoop distributions. Docs are also included to cover the MLlib machine learning functions and new cluster monitoring features. Existing documentation has been updated to reflect changes in building and deploying Spark. </p> +<p>This release adds documentation about cluster hardware provisioning and inter-operation with common Hadoop distributions. Docs are also included to cover the MLlib machine learning functions and new cluster monitoring features. Existing documentation has been updated to reflect changes in building and deploying Spark.</p> <h3 id="other-improvements">Other Improvements</h3> <ul> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-0-9-1.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-0-9-1.html b/site/releases/spark-release-0-9-1.html index 23924b5..a20f863 100644 --- a/site/releases/spark-release-0-9-1.html +++ b/site/releases/spark-release-0-9-1.html @@ -210,9 +210,9 @@ <li>Fixed hash collision bug in external spilling [<a href="https://issues.apache.org/jira/browse/SPARK-1113">SPARK-1113</a>]</li> <li>Fixed conflict with Sparkâs log4j for users relying on other logging backends [<a href="https://issues.apache.org/jira/browse/SPARK-1190">SPARK-1190</a>]</li> <li>Fixed Graphx missing from Spark assembly jar in maven builds</li> - <li>Fixed silent failures due to map output status exceeding Akka frame size [<a href="https://issues.apache.org/jira/browse/SPARK-1244">SPARK-1244</a>] </li> - <li>Removed Sparkâs unnecessary direct dependency on ASM [<a href="https://issues.apache.org/jira/browse/SPARK-782">SPARK-782</a>] </li> - <li>Removed metrics-ganglia from default build due to LGPL license conflict [<a href="https://issues.apache.org/jira/browse/SPARK-1167">SPARK-1167</a>] </li> + <li>Fixed silent failures due to map output status exceeding Akka frame size [<a href="https://issues.apache.org/jira/browse/SPARK-1244">SPARK-1244</a>]</li> + <li>Removed Sparkâs unnecessary direct dependency on ASM [<a href="https://issues.apache.org/jira/browse/SPARK-782">SPARK-782</a>]</li> + <li>Removed metrics-ganglia from default build due to LGPL license conflict [<a href="https://issues.apache.org/jira/browse/SPARK-1167">SPARK-1167</a>]</li> <li>Fixed bug in distribution tarball not containing spark assembly jar [<a href="https://issues.apache.org/jira/browse/SPARK-1184">SPARK-1184</a>]</li> <li>Fixed bug causing infinite NullPointerException failures due to a null in map output locations [<a href="https://issues.apache.org/jira/browse/SPARK-1124">SPARK-1124</a>]</li> <li>Fixed bugs in post-job cleanup of schedulerâs data structures</li> @@ -228,7 +228,7 @@ <li>Fixed bug making Spark application stall when YARN registration fails [<a href="https://issues.apache.org/jira/browse/SPARK-1032">SPARK-1032</a>]</li> <li>Race condition in getting HDFS delegation tokens in yarn-client mode [<a href="https://issues.apache.org/jira/browse/SPARK-1203">SPARK-1203</a>]</li> <li>Fixed bug in yarn-client mode not exiting properly [<a href="https://issues.apache.org/jira/browse/SPARK-1049">SPARK-1049</a>]</li> - <li>Fixed regression bug in ADD_JAR environment variable not correctly adding custom jars [<a href="https://issues.apache.org/jira/browse/SPARK-1089">SPARK-1089</a>] </li> + <li>Fixed regression bug in ADD_JAR environment variable not correctly adding custom jars [<a href="https://issues.apache.org/jira/browse/SPARK-1089">SPARK-1089</a>]</li> </ul> <h3 id="improvements-to-other-deployment-scenarios">Improvements to other deployment scenarios</h3> @@ -239,19 +239,19 @@ <h3 id="optimizations-to-mllib">Optimizations to MLLib</h3> <ul> - <li>Optimized memory usage of ALS [<a href="https://issues.apache.org/jira/browse/MLLIB-25">MLLIB-25</a>] </li> + <li>Optimized memory usage of ALS [<a href="https://issues.apache.org/jira/browse/MLLIB-25">MLLIB-25</a>]</li> <li>Optimized computation of YtY for implicit ALS [<a href="https://issues.apache.org/jira/browse/SPARK-1237">SPARK-1237</a>]</li> <li>Support for negative implicit input in ALS [<a href="https://issues.apache.org/jira/browse/MLLIB-22">MLLIB-22</a>]</li> <li>Setting of a random seed in ALS [<a href="https://issues.apache.org/jira/browse/SPARK-1238">SPARK-1238</a>]</li> - <li>Faster construction of features with intercept [<a href="https://issues.apache.org/jira/browse/SPARK-1260">SPARK-1260</a>] </li> + <li>Faster construction of features with intercept [<a href="https://issues.apache.org/jira/browse/SPARK-1260">SPARK-1260</a>]</li> <li>Check for intercept and weight in GLMâs addIntercept [<a href="https://issues.apache.org/jira/browse/SPARK-1327">SPARK-1327</a>]</li> </ul> <h3 id="bug-fixes-and-better-api-parity-for-pyspark">Bug fixes and better API parity for PySpark</h3> <ul> <li>Fixed bug in Python de-pickling [<a href="https://issues.apache.org/jira/browse/SPARK-1135">SPARK-1135</a>]</li> - <li>Fixed bug in serialization of strings longer than 64K [<a href="https://issues.apache.org/jira/browse/SPARK-1043">SPARK-1043</a>] </li> - <li>Fixed bug that made jobs hang when base file is not available [<a href="https://issues.apache.org/jira/browse/SPARK-1025">SPARK-1025</a>] </li> + <li>Fixed bug in serialization of strings longer than 64K [<a href="https://issues.apache.org/jira/browse/SPARK-1043">SPARK-1043</a>]</li> + <li>Fixed bug that made jobs hang when base file is not available [<a href="https://issues.apache.org/jira/browse/SPARK-1025">SPARK-1025</a>]</li> <li>Added Missing RDD operations to PySpark - top, zip, foldByKey, repartition, coalesce, getStorageLevel, setName and toDebugString</li> </ul> @@ -283,13 +283,13 @@ <li>Kay Ousterhout - Multiple bug fixes in scheduler’s handling of task failures</li> <li>Kousuke Saruta - Use of https to access github</li> <li>Mark Grover - Bug fix in distribution tar.gz</li> - <li>Matei Zaharia - Bug fixes in handling of task failures due to NPE, and cleaning up of scheduler data structures </li> + <li>Matei Zaharia - Bug fixes in handling of task failures due to NPE, and cleaning up of scheduler data structures</li> <li>Nan Zhu - Bug fixes in PySpark RDD.takeSample and adding of JARs using ADD_JAR - and improvements to docs</li> <li>Nick Lanham - Added ability to make distribution tarballs with Tachyon</li> <li>Patrick Wendell - Bug fixes in ASM shading, fixes for log4j initialization, removing Ganglia due to LGPL license, and other miscallenous bug fixes</li> <li>Prabin Banka - RDD.zip and other missing RDD operations in PySpark</li> <li>Prashant Sharma - RDD.foldByKey in PySpark, and other PySpark doc improvements</li> - <li>Qiuzhuang - Bug fix in standalone worker </li> + <li>Qiuzhuang - Bug fix in standalone worker</li> <li>Raymond Liu - Changed working directory in ZookeeperPersistenceEngine</li> <li>Reynold Xin - Improvements to docs and test infrastructure</li> <li>Sandy Ryza - Multiple important Yarn bug fixes and improvements</li> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-0-1.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-0-1.html b/site/releases/spark-release-1-0-1.html index ffa0791..67e54d1 100644 --- a/site/releases/spark-release-1-0-1.html +++ b/site/releases/spark-release-1-0-1.html @@ -267,8 +267,8 @@ <li>Cheng Hao – SQL features</li> <li>Cheng Lian – SQL features</li> <li>Christian Tzolov – build improvmenet</li> - <li>Clément MATHIEU – doc updates </li> - <li>CodingCat – doc updates and bug fix </li> + <li>Clément MATHIEU – doc updates</li> + <li>CodingCat – doc updates and bug fix</li> <li>Colin McCabe – bug fix</li> <li>Daoyuan – SQL joins</li> <li>David Lemieux – bug fix</li> @@ -284,7 +284,7 @@ <li>Kan Zhang – PySpark SQL features</li> <li>Kay Ousterhout – documentation fix</li> <li>LY Lai – bug fix</li> - <li>Lars Albertsson – bug fix </li> + <li>Lars Albertsson – bug fix</li> <li>Lei Zhang – SQL fix and feature</li> <li>Mark Hamstra – bug fix</li> <li>Matei Zaharia – doc updates and bug fix</li> @@ -306,7 +306,7 @@ <li>Shixiong Zhu – code clean-up</li> <li>Szul, Piotr – bug fix</li> <li>Takuya UESHIN – bug fixes and SQL features</li> - <li>Thomas Graves – bug fix </li> + <li>Thomas Graves – bug fix</li> <li>Uri Laserson – bug fix</li> <li>Vadim Chekan – bug fix</li> <li>Varakhedi Sujeet – ec2 r3 support</li> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-0-2.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-0-2.html b/site/releases/spark-release-1-0-2.html index d2f9206..18d025f 100644 --- a/site/releases/spark-release-1-0-2.html +++ b/site/releases/spark-release-1-0-2.html @@ -277,7 +277,7 @@ <li>johnnywalleye - Bug fixes in MLlib</li> <li>joyyoj - Bug fix in Streaming</li> <li>kballou - Doc fix</li> - <li>lianhuiwang - Doc fix </li> + <li>lianhuiwang - Doc fix</li> <li>witgo - Bug fix in sbt</li> </ul> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-1-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-1-0.html b/site/releases/spark-release-1-1-0.html index 922bfa8..915e4a0 100644 --- a/site/releases/spark-release-1-1-0.html +++ b/site/releases/spark-release-1-1-0.html @@ -206,7 +206,7 @@ <p>Spark SQL adds a number of new features and performance improvements in this release. A <a href="http://spark.apache.org/docs/1.1.0/sql-programming-guide.html#running-the-thrift-jdbc-server">JDBC/ODBC server</a> allows users to connect to SparkSQL from many different applications and provides shared access to cached tables. A new module provides <a href="http://spark.apache.org/docs/1.1.0/sql-programming-guide.html#json-datasets">support for loading JSON data</a> directly into Sparkâs SchemaRDD format, including automatic schema inference. Spark SQL introduces <a href="http://spark.apache.org/docs/1.1.0/sql-programming-guide.html#other-configuration-options">dynamic bytecode generation</a> in this release, a technique which significantly speeds up execution for queries that perform complex expression evaluation. This release also adds support for registering Python, Scala, and Java lambda functions as UDFs, which can then be called directly in SQL. Spark 1.1 adds a <a href="ht tp://spark.apache.org/docs/1.1.0/sql-programming-guide.html#programmatically-specifying-the-schema">public types API to allow users to create SchemaRDDâs from custom data sources</a>. Finally, many optimizations have been added to the native Parquet support as well as throughout the engine.</p> <h3 id="mllib">MLlib</h3> -<p>MLlib adds several new algorithms and optimizations in this release. 1.1 introduces a <a href="https://issues.apache.org/jira/browse/SPARK-2359">new library of statistical packages</a> which provides exploratory analytic functions. These include stratified sampling, correlations, chi-squared tests and support for creating random datasets. This release adds utilities for feature extraction (<a href="https://issues.apache.org/jira/browse/SPARK-2510">Word2Vec</a> and <a href="https://issues.apache.org/jira/browse/SPARK-2511">TF-IDF</a>) and feature transformation (<a href="https://issues.apache.org/jira/browse/SPARK-2272">normalization and standard scaling</a>). Also new are support for <a href="https://issues.apache.org/jira/browse/SPARK-1553">nonnegative matrix factorization</a> and <a href="https://issues.apache.org/jira/browse/SPARK-1782">SVD via Lanczos</a>. The decision tree algorithm has been <a href="https://issues.apache.org/jira/browse/SPARK-2478">added in Python and Java< /a>. A tree aggregation primitive has been added to help optimize many existing algorithms. Performance improves across the board in MLlib 1.1, with improvements of around 2-3X for many algorithms and up to 5X for large scale decision tree problems. </p> +<p>MLlib adds several new algorithms and optimizations in this release. 1.1 introduces a <a href="https://issues.apache.org/jira/browse/SPARK-2359">new library of statistical packages</a> which provides exploratory analytic functions. These include stratified sampling, correlations, chi-squared tests and support for creating random datasets. This release adds utilities for feature extraction (<a href="https://issues.apache.org/jira/browse/SPARK-2510">Word2Vec</a> and <a href="https://issues.apache.org/jira/browse/SPARK-2511">TF-IDF</a>) and feature transformation (<a href="https://issues.apache.org/jira/browse/SPARK-2272">normalization and standard scaling</a>). Also new are support for <a href="https://issues.apache.org/jira/browse/SPARK-1553">nonnegative matrix factorization</a> and <a href="https://issues.apache.org/jira/browse/SPARK-1782">SVD via Lanczos</a>. The decision tree algorithm has been <a href="https://issues.apache.org/jira/browse/SPARK-2478">added in Python and Java< /a>. A tree aggregation primitive has been added to help optimize many existing algorithms. Performance improves across the board in MLlib 1.1, with improvements of around 2-3X for many algorithms and up to 5X for large scale decision tree problems.</p> <h3 id="graphx-and-spark-streaming">GraphX and Spark Streaming</h3> <p>Spark streaming adds a new data source <a href="https://issues.apache.org/jira/browse/SPARK-1981">Amazon Kinesis</a>. For the Apache Flume, a new mode is supported which <a href="https://issues.apache.org/jira/browse/SPARK-1729">pulls data from Flume</a>, simplifying deployment and providing high availability. The first of a set of <a href="https://issues.apache.org/jira/browse/SPARK-2438">streaming machine learning algorithms</a> is introduced with streaming linear regression. Finally, <a href="https://issues.apache.org/jira/browse/SPARK-1341">rate limiting</a> has been added for streaming inputs. GraphX adds <a href="https://issues.apache.org/jira/browse/SPARK-1991">custom storage levels for vertices and edges</a> along with <a href="https://issues.apache.org/jira/browse/SPARK-2748">improved numerical precision</a> across the board. Finally, GraphX adds a new label propagation algorithm.</p> @@ -224,7 +224,7 @@ <ul> <li>The default value of <code>spark.io.compression.codec</code> is now <code>snappy</code> for improved memory usage. Old behavior can be restored by switching to <code>lzf</code>.</li> - <li>The default value of <code>spark.broadcast.factory</code> is now <code>org.apache.spark.broadcast.TorrentBroadcastFactory</code> for improved efficiency of broadcasts. Old behavior can be restored by switching to <code>org.apache.spark.broadcast.HttpBroadcastFactory</code>. </li> + <li>The default value of <code>spark.broadcast.factory</code> is now <code>org.apache.spark.broadcast.TorrentBroadcastFactory</code> for improved efficiency of broadcasts. Old behavior can be restored by switching to <code>org.apache.spark.broadcast.HttpBroadcastFactory</code>.</li> <li>PySpark now performs external spilling during aggregations. Old behavior can be restored by setting <code>spark.shuffle.spill</code> to <code>false</code>.</li> <li>PySpark uses a new heuristic for determining the parallelism of shuffle operations. Old behavior can be restored by setting <code>spark.default.parallelism</code> to the number of cores in the cluster.</li> </ul> @@ -284,7 +284,7 @@ <li>Daneil Darabos – bug fixes and UI enhancements</li> <li>Daoyuan Wang – SQL fixes</li> <li>David Lemieux – bug fix</li> - <li>Davies Liu – PySpark fixes and spilling </li> + <li>Davies Liu – PySpark fixes and spilling</li> <li>DB Tsai – online summaries in MLlib and other MLlib features</li> <li>Derek Ma – bug fix</li> <li>Doris Xin – MLlib stats library and several fixes</li> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-2-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-2-0.html b/site/releases/spark-release-1-2-0.html index a752beb..764370a 100644 --- a/site/releases/spark-release-1-2-0.html +++ b/site/releases/spark-release-1-2-0.html @@ -203,7 +203,7 @@ <p>In 1.2 Spark core upgrades two major subsystems to improve the performance and stability of very large scale shuffles. The first is Sparkâs communication manager used during bulk transfers, which upgrades to a <a href="https://issues.apache.org/jira/browse/SPARK-2468">netty-based implementation</a>. The second is Sparkâs shuffle mechanism, which upgrades to the <a href="https://issues.apache.org/jira/browse/SPARK-3280">âsort basedâ shuffle initially released in Spark 1.1</a>. These both improve the performance and stability of very large scale shuffles. Spark also adds an <a href="https://issues.apache.org/jira/browse/SPARK-3174">elastic scaling mechanism</a> designed to improve cluster utilization during long running ETL-style jobs. This is currently supported on YARN and will make its way to other cluster managers in future versions. Finally, Spark 1.2 adds support for Scala 2.11. For instructions on building for Scala 2.11 see the <a href="/docs/1.2.0/building-spark.ht ml#building-for-scala-211">build documentation</a>.</p> <h3 id="spark-streaming">Spark Streaming</h3> -<p>This release includes two major feature additions to Sparkâs streaming library, a Python API and a write ahead log for full driver H/A. The <a href="https://issues.apache.org/jira/browse/SPARK-2377">Python API</a> covers almost all the DStream transformations and output operations. Input sources based on text files and text over sockets are currently supported. Support for Kafka and Flume input streams in Python will be added in the next release. Second, Spark streaming now features H/A driver support through a <a href="https://issues.apache.org/jira/browse/SPARK-3129">write ahead log (WAL)</a>. In Spark 1.1 and earlier, some buffered (received but not yet processed) data can be lost during driver restarts. To prevent this Spark 1.2 adds an optional WAL, which buffers received data into a fault-tolerant file system (e.g. HDFS). See the <a href="/docs/1.2.0/streaming-programming-guide.html">streaming programming guide</a> for more details. </p> +<p>This release includes two major feature additions to Sparkâs streaming library, a Python API and a write ahead log for full driver H/A. The <a href="https://issues.apache.org/jira/browse/SPARK-2377">Python API</a> covers almost all the DStream transformations and output operations. Input sources based on text files and text over sockets are currently supported. Support for Kafka and Flume input streams in Python will be added in the next release. Second, Spark streaming now features H/A driver support through a <a href="https://issues.apache.org/jira/browse/SPARK-3129">write ahead log (WAL)</a>. In Spark 1.1 and earlier, some buffered (received but not yet processed) data can be lost during driver restarts. To prevent this Spark 1.2 adds an optional WAL, which buffers received data into a fault-tolerant file system (e.g. HDFS). See the <a href="/docs/1.2.0/streaming-programming-guide.html">streaming programming guide</a> for more details.</p> <h3 id="mllib">MLLib</h3> <p>Spark 1.2 previews a new set of machine learning APIâs in a package called spark.ml that <a href="https://issues.apache.org/jira/browse/SPARK-3530">supports learning pipelines</a>, where multiple algorithms are run in sequence with varying parameters. This type of pipeline is common in practical machine learning deployments. The new ML package uses Sparkâs SchemaRDD to represent <a href="https://issues.apache.org/jira/browse/SPARK-3573">ML datasets</a>, providing direct interoperability with Spark SQL. In addition to the new API, Spark 1.2 extends decision trees with two tree ensemble methods: <a href="https://issues.apache.org/jira/browse/SPARK-1545">random forests</a> and <a href="https://issues.apache.org/jira/browse/SPARK-1547">gradient-boosted trees</a>, among the most successful tree-based models for classification and regression. Finally, MLlib’s Python implementation receives a major update in 1.2 to simplify the process of adding Python APIs, along with better Python API coverage.</p> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-3-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-3-0.html b/site/releases/spark-release-1-3-0.html index ed881eb..f27ccad 100644 --- a/site/releases/spark-release-1-3-0.html +++ b/site/releases/spark-release-1-3-0.html @@ -200,7 +200,7 @@ <p>To download Spark 1.3 visit the <a href="/downloads.html">downloads</a> page.</p> <h3 id="spark-core">Spark Core</h3> -<p>Spark 1.3 sees a handful of usability improvements in the core engine. The core API now supports <a href="https://issues.apache.org/jira/browse/SPARK-5430">multi level aggregation trees</a> to help speed up expensive reduce operations. <a href="https://issues.apache.org/jira/browse/SPARK-5063">Improved error reporting</a> has been added for certain gotcha operations. Spark’s Jetty dependency is <a href="https://issues.apache.org/jira/browse/SPARK-3996">now shaded</a> to help avoid conflicts with user programs. Spark now supports <a href="https://issues.apache.org/jira/browse/SPARK-3883">SSL encryption</a> for some communication endpoints. Finaly, realtime <a href="https://issues.apache.org/jira/browse/SPARK-3428">GC metrics</a> and <a href="https://issues.apache.org/jira/browse/SPARK-4874">record counts</a> have been added to the UI. </p> +<p>Spark 1.3 sees a handful of usability improvements in the core engine. The core API now supports <a href="https://issues.apache.org/jira/browse/SPARK-5430">multi level aggregation trees</a> to help speed up expensive reduce operations. <a href="https://issues.apache.org/jira/browse/SPARK-5063">Improved error reporting</a> has been added for certain gotcha operations. Spark’s Jetty dependency is <a href="https://issues.apache.org/jira/browse/SPARK-3996">now shaded</a> to help avoid conflicts with user programs. Spark now supports <a href="https://issues.apache.org/jira/browse/SPARK-3883">SSL encryption</a> for some communication endpoints. Finaly, realtime <a href="https://issues.apache.org/jira/browse/SPARK-3428">GC metrics</a> and <a href="https://issues.apache.org/jira/browse/SPARK-4874">record counts</a> have been added to the UI.</p> <h3 id="dataframe-api">DataFrame API</h3> <p>Spark 1.3 adds a new <a href="/docs/1.3.0/sql-programming-guide.html#dataframes">DataFrames API</a> that provides powerful and convenient operators when working with structured datasets. The DataFrame is an evolution of the base RDD API that includes named fields along with schema information. Itâs easy to construct a DataFrame from sources such as Hive tables, JSON data, a JDBC database, or any implementation of Sparkâs new data source API. Data frames will become a common interchange format between Spark components and when importing and exporting data to other systems. Data frames are supported in Python, Scala, and Java.</p> @@ -212,7 +212,7 @@ <p>In this release Spark MLlib introduces several new algorithms: latent Dirichlet allocation (LDA) for <a href="https://issues.apache.org/jira/browse/SPARK-1405">topic modeling</a>, <a href="https://issues.apache.org/jira/browse/SPARK-2309">multinomial logistic regression</a> for multiclass classification, <a href="https://issues.apache.org/jira/browse/SPARK-5012">Gaussian mixture model (GMM)</a> and <a href="https://issues.apache.org/jira/browse/SPARK-4259">power iteration clustering</a> for clustering, <a href="https://issues.apache.org/jira/browse/SPARK-4001">FP-growth</a> for frequent pattern mining, and <a href="https://issues.apache.org/jira/browse/SPARK-4409">block matrix abstraction</a> for distributed linear algebra. Initial support has been added for <a href="https://issues.apache.org/jira/browse/SPARK-4587">model import/export</a> in exchangeable format, which will be expanded in future versions to cover more model types in Java/Python/Scala. The implementations of k-mea ns and ALS receive <a href="https://issues.apache.org/jira/browse/SPARK-3424, https://issues.apache.org/jira/browse/SPARK-3541">updates</a> that lead to significant performance gain. PySpark now supports the <a href="https://issues.apache.org/jira/browse/SPARK-4586">ML pipeline API</a> added in Spark 1.2, and <a href="https://issues.apache.org/jira/browse/SPARK-5094">gradient boosted trees</a> and <a href="https://issues.apache.org/jira/browse/SPARK-5012">Gaussian mixture model</a>. Finally, the ML pipeline API has been ported to support the new DataFrames abstraction.</p> <h3 id="spark-streaming">Spark Streaming</h3> -<p>Spark 1.3 introduces a new <a href="https://issues.apache.org/jira/browse/SPARK-4964"><em>direct</em> Kafka API</a> (<a href="http://spark.apache.org/docs/1.3.0/streaming-kafka-integration.html">docs</a>) which enables exactly-once delivery without the use of write ahead logs. It also adds a <a href="https://issues.apache.org/jira/browse/SPARK-5047">Python Kafka API</a> along with infrastructure for additional Python APIâs in future releases. An online version of <a href="https://issues.apache.org/jira/browse/SPARK-4979">logistic regression</a> and the ability to read <a href="https://issues.apache.org/jira/browse/SPARK-4969">binary records</a> have also been added. For stateful operations, support has been added for loading of an <a href="https://issues.apache.org/jira/browse/SPARK-3660">initial state RDD</a>. Finally, the streaming programming guide has been updated to include information about SQL and DataFrame operations within streaming applications, and important clarific ations to the fault-tolerance semantics. </p> +<p>Spark 1.3 introduces a new <a href="https://issues.apache.org/jira/browse/SPARK-4964"><em>direct</em> Kafka API</a> (<a href="http://spark.apache.org/docs/1.3.0/streaming-kafka-integration.html">docs</a>) which enables exactly-once delivery without the use of write ahead logs. It also adds a <a href="https://issues.apache.org/jira/browse/SPARK-5047">Python Kafka API</a> along with infrastructure for additional Python APIâs in future releases. An online version of <a href="https://issues.apache.org/jira/browse/SPARK-4979">logistic regression</a> and the ability to read <a href="https://issues.apache.org/jira/browse/SPARK-4969">binary records</a> have also been added. For stateful operations, support has been added for loading of an <a href="https://issues.apache.org/jira/browse/SPARK-3660">initial state RDD</a>. Finally, the streaming programming guide has been updated to include information about SQL and DataFrame operations within streaming applications, and important clarific ations to the fault-tolerance semantics.</p> <h3 id="graphx">GraphX</h3> <p>GraphX adds a handful of utility functions in this release, including conversion into a <a href="https://issues.apache.org/jira/browse/SPARK-4917">canonical edge graph</a>.</p> @@ -228,7 +228,7 @@ <ul> <li><a href="https://issues.apache.org/jira/browse/SPARK-6194">SPARK-6194</a>: A memory leak in PySPark’s <code>collect()</code>.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-6222">SPARK-6222</a>: An issue with failure recovery in Spark Streaming.</li> - <li><a href="https://issues.apache.org/jira/browse/SPARK-6315">SPARK-6315</a>: Spark SQL can’t read parquet data generated with Spark 1.1. </li> + <li><a href="https://issues.apache.org/jira/browse/SPARK-6315">SPARK-6315</a>: Spark SQL can’t read parquet data generated with Spark 1.1.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-6247">SPARK-6247</a>: Errors analyzing certain join types in Spark SQL.</li> </ul> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-3-1.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-3-1.html b/site/releases/spark-release-1-3-1.html index 7c42e2f..54a2112 100644 --- a/site/releases/spark-release-1-3-1.html +++ b/site/releases/spark-release-1-3-1.html @@ -205,10 +205,10 @@ <h4 id="spark-sql">Spark SQL</h4> <ul> <li>Unable to use reserved words in DDL (<a href="http://issues.apache.org/jira/browse/SPARK-6250">SPARK-6250</a>)</li> - <li>Parquet no longer caches metadata (<a href="http://issues.apache.org/jira/browse/SPARK-6575">SPARK-6575</a>) </li> + <li>Parquet no longer caches metadata (<a href="http://issues.apache.org/jira/browse/SPARK-6575">SPARK-6575</a>)</li> <li>Bug when joining two Parquet tables (<a href="http://issues.apache.org/jira/browse/SPARK-6851">SPARK-6851</a>)</li> - <li>Unable to read parquet data generated by Spark 1.1.1 (<a href="http://issues.apache.org/jira/browse/SPARK-6315">SPARK-6315</a>) </li> - <li>Parquet data source may use wrong Hadoop FileSystem (<a href="http://issues.apache.org/jira/browse/SPARK-6330">SPARK-6330</a>) </li> + <li>Unable to read parquet data generated by Spark 1.1.1 (<a href="http://issues.apache.org/jira/browse/SPARK-6315">SPARK-6315</a>)</li> + <li>Parquet data source may use wrong Hadoop FileSystem (<a href="http://issues.apache.org/jira/browse/SPARK-6330">SPARK-6330</a>)</li> </ul> <h4 id="spark-streaming">Spark Streaming</h4> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-4-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-4-0.html b/site/releases/spark-release-1-4-0.html index bec699b..22ced18 100644 --- a/site/releases/spark-release-1-4-0.html +++ b/site/releases/spark-release-1-4-0.html @@ -259,7 +259,7 @@ Python coverage. MLlib also adds several new algorithms.</p> </ul> <h3 id="spark-streaming">Spark Streaming</h3> -<p>Spark streaming adds visual instrumentation graphs and significantly improved debugging information in the UI. It also enhances support for both Kafka and Kinesis. </p> +<p>Spark streaming adds visual instrumentation graphs and significantly improved debugging information in the UI. It also enhances support for both Kafka and Kinesis.</p> <ul> <li><a href="https://issues.apache.org/jira/browse/SPARK-7602">SPARK-7602</a>: Visualization and monitoring in the streaming UI including batch drill down (<a href="https://issues.apache.org/jira/browse/SPARK-6796">SPARK-6796</a>, <a href="https://issues.apache.org/jira/browse/SPARK-6862">SPARK-6862</a>)</li> @@ -285,7 +285,7 @@ Python coverage. MLlib also adds several new algorithms.</p> <h4 id="test-partners">Test Partners</h4> -<p>Thanks to The following organizations, who helped benchmark or integration test release candidates: <br /> Intel, Palantir, Cloudera, Mesosphere, Huawei, Shopify, Netflix, Yahoo, UC Berkeley and Databricks. </p> +<p>Thanks to The following organizations, who helped benchmark or integration test release candidates: <br /> Intel, Palantir, Cloudera, Mesosphere, Huawei, Shopify, Netflix, Yahoo, UC Berkeley and Databricks.</p> <h4 id="contributors">Contributors</h4> <ul> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-5-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-5-0.html b/site/releases/spark-release-1-5-0.html index 19adb10..1046257 100644 --- a/site/releases/spark-release-1-5-0.html +++ b/site/releases/spark-release-1-5-0.html @@ -200,25 +200,25 @@ <p>You can consult JIRA for the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12332078">detailed changes</a>. We have curated a list of high level changes here:</p> <ul id="markdown-toc"> - <li><a href="#apis-rdd-dataframe-and-sql">APIs: RDD, DataFrame and SQL</a></li> - <li><a href="#backend-execution-dataframe-and-sql">Backend Execution: DataFrame and SQL</a></li> - <li><a href="#integrations-data-sources-hive-hadoop-mesos-and-cluster-management">Integrations: Data Sources, Hive, Hadoop, Mesos and Cluster Management</a></li> - <li><a href="#r-language">R Language</a></li> - <li><a href="#machine-learning-and-advanced-analytics">Machine Learning and Advanced Analytics</a></li> - <li><a href="#spark-streaming">Spark Streaming</a></li> - <li><a href="#deprecations-removals-configs-and-behavior-changes">Deprecations, Removals, Configs, and Behavior Changes</a> <ul> - <li><a href="#spark-core">Spark Core</a></li> - <li><a href="#spark-sql--dataframes">Spark SQL & DataFrames</a></li> - <li><a href="#spark-streaming-1">Spark Streaming</a></li> - <li><a href="#mllib">MLlib</a></li> + <li><a href="#apis-rdd-dataframe-and-sql" id="markdown-toc-apis-rdd-dataframe-and-sql">APIs: RDD, DataFrame and SQL</a></li> + <li><a href="#backend-execution-dataframe-and-sql" id="markdown-toc-backend-execution-dataframe-and-sql">Backend Execution: DataFrame and SQL</a></li> + <li><a href="#integrations-data-sources-hive-hadoop-mesos-and-cluster-management" id="markdown-toc-integrations-data-sources-hive-hadoop-mesos-and-cluster-management">Integrations: Data Sources, Hive, Hadoop, Mesos and Cluster Management</a></li> + <li><a href="#r-language" id="markdown-toc-r-language">R Language</a></li> + <li><a href="#machine-learning-and-advanced-analytics" id="markdown-toc-machine-learning-and-advanced-analytics">Machine Learning and Advanced Analytics</a></li> + <li><a href="#spark-streaming" id="markdown-toc-spark-streaming">Spark Streaming</a></li> + <li><a href="#deprecations-removals-configs-and-behavior-changes" id="markdown-toc-deprecations-removals-configs-and-behavior-changes">Deprecations, Removals, Configs, and Behavior Changes</a> <ul> + <li><a href="#spark-core" id="markdown-toc-spark-core">Spark Core</a></li> + <li><a href="#spark-sql--dataframes" id="markdown-toc-spark-sql--dataframes">Spark SQL & DataFrames</a></li> + <li><a href="#spark-streaming-1" id="markdown-toc-spark-streaming-1">Spark Streaming</a></li> + <li><a href="#mllib" id="markdown-toc-mllib">MLlib</a></li> </ul> </li> - <li><a href="#known-issues">Known Issues</a> <ul> - <li><a href="#sqldataframe">SQL/DataFrame</a></li> - <li><a href="#streaming">Streaming</a></li> + <li><a href="#known-issues" id="markdown-toc-known-issues">Known Issues</a> <ul> + <li><a href="#sqldataframe" id="markdown-toc-sqldataframe">SQL/DataFrame</a></li> + <li><a href="#streaming" id="markdown-toc-streaming">Streaming</a></li> </ul> </li> - <li><a href="#credits">Credits</a></li> + <li><a href="#credits" id="markdown-toc-credits">Credits</a></li> </ul> <h3 id="apis-rdd-dataframe-and-sql">APIs: RDD, DataFrame and SQL</h3> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-1-6-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-1-6-0.html b/site/releases/spark-release-1-6-0.html index c04721c..9872253 100644 --- a/site/releases/spark-release-1-6-0.html +++ b/site/releases/spark-release-1-6-0.html @@ -200,13 +200,13 @@ <p>You can consult JIRA for the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12333083&projectId=12315420">detailed changes</a>. We have curated a list of high level changes here:</p> <ul id="markdown-toc"> - <li><a href="#spark-coresql">Spark Core/SQL</a></li> - <li><a href="#spark-streaming">Spark Streaming</a></li> - <li><a href="#mllib">MLlib</a></li> - <li><a href="#deprecations">Deprecations</a></li> - <li><a href="#changes-of-behavior">Changes of behavior</a></li> - <li><a href="#known-issues">Known issues</a></li> - <li><a href="#credits">Credits</a></li> + <li><a href="#spark-coresql" id="markdown-toc-spark-coresql">Spark Core/SQL</a></li> + <li><a href="#spark-streaming" id="markdown-toc-spark-streaming">Spark Streaming</a></li> + <li><a href="#mllib" id="markdown-toc-mllib">MLlib</a></li> + <li><a href="#deprecations" id="markdown-toc-deprecations">Deprecations</a></li> + <li><a href="#changes-of-behavior" id="markdown-toc-changes-of-behavior">Changes of behavior</a></li> + <li><a href="#known-issues" id="markdown-toc-known-issues">Known issues</a></li> + <li><a href="#credits" id="markdown-toc-credits">Credits</a></li> </ul> <h3 id="spark-coresql">Spark Core/SQL</h3> @@ -229,7 +229,7 @@ <ul> <li><a href="https://issues.apache.org/jira/browse/SPARK-10000">SPARK-10000</a> <strong>Unified Memory Management</strong> - Shared memory for execution and caching instead of exclusive division of the regions.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-11787">SPARK-11787</a> <strong>Parquet Performance</strong> - Improve Parquet scan performance when using flat schemas.</li> - <li><a href="https://issues.apache.org/jira/browse/SPARK-9241">SPARK-9241 </a> <strong>Improved query planner for queries having distinct aggregations</strong> - Query plans of distinct aggregations are more robust when distinct columns have high cardinality. </li> + <li><a href="https://issues.apache.org/jira/browse/SPARK-9241">SPARK-9241 </a> <strong>Improved query planner for queries having distinct aggregations</strong> - Query plans of distinct aggregations are more robust when distinct columns have high cardinality.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-9858">SPARK-9858 </a> <strong>Adaptive query execution</strong> - Initial support for automatically selecting the number of reducers for joins and aggregations.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-10978">SPARK-10978</a> <strong>Avoiding double filters in Data Source API</strong> - When implementing a data source with filter pushdown, developers can now tell Spark SQL to avoid double evaluating a pushed-down filter.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-11111">SPARK-11111</a> <strong>Fast null-safe joins</strong> - Joins using null-safe equality (<code><=></code>) will now execute using SortMergeJoin instead of computing a cartisian product.</li> @@ -242,7 +242,7 @@ <h3 id="spark-streaming">Spark Streaming</h3> <ul> - <li><strong>API Updates</strong> + <li><strong>API Updates</strong> <ul> <li><a href="https://issues.apache.org/jira/browse/SPARK-2629">SPARK-2629 </a> <strong>New improved state management</strong> - <code>mapWithState</code> - a DStream transformation for stateful stream processing, supercedes <code>updateStateByKey</code> in functionality and performance.</li> <li><a href="https://issues.apache.org/jira/browse/SPARK-11198">SPARK-11198</a> <strong>Kinesis record deaggregation</strong> - Kinesis streams have been upgraded to use KCL 1.4.0 and supports transparent deaggregation of KPL-aggregated records.</li> @@ -253,7 +253,7 @@ <li><strong>UI Improvements</strong> <ul> <li>Made failures visible in the streaming tab, in the timelines, batch list, and batch details page.</li> - <li>Made output operations visible in the streaming tab as progress bars. </li> + <li>Made output operations visible in the streaming tab as progress bars.</li> </ul> </li> </ul> http://git-wip-us.apache.org/repos/asf/spark-website/blob/ae58782b/site/releases/spark-release-2-0-0.html ---------------------------------------------------------------------- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index b7f4a21..01183a2 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -200,30 +200,30 @@ <p>To download Apache Spark 2.0.0, visit the <a href="http://spark.apache.org/downloads.html">downloads</a> page. You can consult JIRA for the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449">detailed changes</a>. We have curated a list of high level changes here, grouped by major modules.</p> <ul id="markdown-toc"> - <li><a href="#api-stability">API Stability</a></li> - <li><a href="#core-and-spark-sql">Core and Spark SQL</a> <ul> - <li><a href="#programming-apis">Programming APIs</a></li> - <li><a href="#sql">SQL</a></li> - <li><a href="#new-features">New Features</a></li> - <li><a href="#performance-and-runtime">Performance and Runtime</a></li> + <li><a href="#api-stability" id="markdown-toc-api-stability">API Stability</a></li> + <li><a href="#core-and-spark-sql" id="markdown-toc-core-and-spark-sql">Core and Spark SQL</a> <ul> + <li><a href="#programming-apis" id="markdown-toc-programming-apis">Programming APIs</a></li> + <li><a href="#sql" id="markdown-toc-sql">SQL</a></li> + <li><a href="#new-features" id="markdown-toc-new-features">New Features</a></li> + <li><a href="#performance-and-runtime" id="markdown-toc-performance-and-runtime">Performance and Runtime</a></li> </ul> </li> - <li><a href="#mllib">MLlib</a> <ul> - <li><a href="#new-features-1">New features</a></li> - <li><a href="#speedscaling">Speed/scaling</a></li> + <li><a href="#mllib" id="markdown-toc-mllib">MLlib</a> <ul> + <li><a href="#new-features-1" id="markdown-toc-new-features-1">New features</a></li> + <li><a href="#speedscaling" id="markdown-toc-speedscaling">Speed/scaling</a></li> </ul> </li> - <li><a href="#sparkr">SparkR</a></li> - <li><a href="#streaming">Streaming</a></li> - <li><a href="#dependency-packaging-and-operations">Dependency, Packaging, and Operations</a></li> - <li><a href="#removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</a> <ul> - <li><a href="#removals">Removals</a></li> - <li><a href="#behavior-changes">Behavior Changes</a></li> - <li><a href="#deprecations">Deprecations</a></li> + <li><a href="#sparkr" id="markdown-toc-sparkr">SparkR</a></li> + <li><a href="#streaming" id="markdown-toc-streaming">Streaming</a></li> + <li><a href="#dependency-packaging-and-operations" id="markdown-toc-dependency-packaging-and-operations">Dependency, Packaging, and Operations</a></li> + <li><a href="#removals-behavior-changes-and-deprecations" id="markdown-toc-removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</a> <ul> + <li><a href="#removals" id="markdown-toc-removals">Removals</a></li> + <li><a href="#behavior-changes" id="markdown-toc-behavior-changes">Behavior Changes</a></li> + <li><a href="#deprecations" id="markdown-toc-deprecations">Deprecations</a></li> </ul> </li> - <li><a href="#known-issues">Known Issues</a></li> - <li><a href="#credits">Credits</a></li> + <li><a href="#known-issues" id="markdown-toc-known-issues">Known Issues</a></li> + <li><a href="#credits" id="markdown-toc-credits">Credits</a></li> </ul> <h3 id="api-stability">API Stability</h3> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org