[2/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
[SPARK-16621][SQL] Generate stable SQLs in SQLBuilder ## What changes were proposed in this pull request? Currently, the generated SQLs have not-stable IDs for generated attributes. The stable generated SQL will give more benefit for understanding or testing the queries. This PR provides stable SQL generation by the followings. - Provide unique ids for generated subqueries, `gen_subquery_xxx`. - Provide unique and stable ids for generated attributes, `gen_attr_xxx`. **Before** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res0: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4`) AS gen_subquery_0 ``` **After** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res2: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 ``` ## How was this patch tested? Pass the existing Jenkins tests. Author: Dongjoon HyunCloses #14257 from dongjoon-hyun/SPARK-16621. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b8e848b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b8e848b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b8e848b Branch: refs/heads/master Commit: 5b8e848bbfbc0c99a5faf758e40b188b0bbebb7b Parents: 738b4cc Author: Dongjoon Hyun Authored: Wed Jul 27 13:23:59 2016 +0800 Committer: Cheng Lian Committed: Wed Jul 27 13:23:59 2016 +0800 -- .../apache/spark/sql/catalyst/SQLBuilder.scala | 23 +++- sql/hive/src/test/resources/sqlgen/agg1.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg2.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg3.sql | 2 +- .../sqlgen/aggregate_functions_and_window.sql | 2 +- sql/hive/src/test/resources/sqlgen/case.sql | 2 +- .../test/resources/sqlgen/case_with_else.sql| 2 +- .../src/test/resources/sqlgen/case_with_key.sql | 2 +- .../resources/sqlgen/case_with_key_and_else.sql | 2 +- .../src/test/resources/sqlgen/cluster_by.sql| 2 +- .../sqlgen/data_source_json_parquet_t0.sql | 2 +- .../sqlgen/data_source_orc_parquet_t0.sql | 2 +- .../sqlgen/data_source_parquet_parquet_t0.sql | 2 +- .../resources/sqlgen/distinct_aggregation.sql | 2 +- .../src/test/resources/sqlgen/distribute_by.sql | 2 +- .../sqlgen/distribute_by_with_sort_by.sql | 2 +- sql/hive/src/test/resources/sqlgen/except.sql | 2 +- .../resources/sqlgen/filter_after_subquery.sql | 2 +- .../resources/sqlgen/generate_with_other_1.sql | 2 +- .../resources/sqlgen/generate_with_other_2.sql | 2 +- .../sqlgen/generator_in_lateral_view_1.sql | 2 +- .../sqlgen/generator_in_lateral_view_2.sql | 2 +- .../sqlgen/generator_non_referenced_table_1.sql | 2 +- .../sqlgen/generator_non_referenced_table_2.sql | 2 +- .../resources/sqlgen/generator_non_udtf_1.sql | 2 +- .../resources/sqlgen/generator_non_udtf_2.sql | 2 +- .../sqlgen/generator_referenced_table_1.sql | 2 +- .../sqlgen/generator_referenced_table_2.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_1.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_2.sql | 2 +- .../sqlgen/generator_without_from_1.sql | 2 +- .../sqlgen/generator_without_from_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_3.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_4.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_5.sql | 2 +- sql/hive/src/test/resources/sqlgen/in.sql | 2 +- .../src/test/resources/sqlgen/intersect.sql | 2 +- .../src/test/resources/sqlgen/join_2_tables.sql | 2 +- .../resources/sqlgen/json_tuple_generator_1.sql | 2 +- .../resources/sqlgen/json_tuple_generator_2.sql | 2 +- .../test/resources/sqlgen/multi_distinct.sql| 2 +- .../nested_generator_in_lateral_view_1.sql | 2 +- .../nested_generator_in_lateral_view_2.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_in.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_like.sql | 2 +- .../resources/sqlgen/predicate_subquery.sql | 2 +- .../sqlgen/regular_expressions_and_window.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_2.sql | 2 +- .../test/resources/sqlgen/rollup_cube_2_1.sql | 2 +-
[1/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
Repository: spark Updated Branches: refs/heads/master 738b4cc54 -> 5b8e848bb http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql index eebef6a..8bf1645 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql @@ -2,4 +2,4 @@ SELECT count(*) as cnt, key % 5 as k1, key - 5 as k2, grouping_id() FROM parquet_t1 GROUP BY key % 5, key - 5 WITH CUBE -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ((`gen_attr` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_8` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ((`gen_attr_7` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql index 9474233..17e78a0 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key%2, key - 5 FROM parquet_t1) t GROUP BY key%5, key-5 WITH ROLLUP -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `k3` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `gen_attr`, (`gen_attr` % CAST(2 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `k3` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `gen_attr_7`, (`gen_attr_7` % CAST(2 AS BIGINT)) AS `gen_attr_8`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_9` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_12` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql index d36f43d..72506ef 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key % 2, key - 5 FROM parquet_t1) t GROUP BY key % 5, key - 5 WITH CUBE -SELECT
spark-website git commit: Updated Scala 2.11 note
Repository: spark-website Updated Branches: refs/heads/asf-site d7dcb6c8b -> 0915efb8d Updated Scala 2.11 note Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/0915efb8 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/0915efb8 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/0915efb8 Branch: refs/heads/asf-site Commit: 0915efb8d90b37b8d6417ff88971ce143444bf5c Parents: d7dcb6c Author: Reynold XinAuthored: Tue Jul 26 22:05:57 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 22:05:57 2016 -0700 -- downloads.md| 5 +++-- site/downloads.html | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/0915efb8/downloads.md -- diff --git a/downloads.md b/downloads.md index 5354869..178120d 100644 --- a/downloads.md +++ b/downloads.md @@ -33,8 +33,9 @@ Our latest stable version is Apache Spark 2.0.0, released on July 27, 2016 5. Verify this release using the and [project release KEYS](https://www.apache.org/dist/spark/KEYS). -_Note: Scala 2.11 users should download the Spark source package and build -[with Scala 2.11 support](http://spark.apache.org/docs/latest/building-spark.html#building-for-scala-211)._ +_Note: Starting version 2.0, Spark is built with Scala 2.11 by default. +Scala 2.10 users should download the Spark source package and build +[with Scala 2.10 support](http://spark.apache.org/docs/latest/building-spark.html#building-for-scala-210)._
spark-website git commit: removed old versions (<1.3) and re-arranged download options.
Repository: spark-website Updated Branches: refs/heads/asf-site 33d86d7bf -> d7dcb6c8b removed old versions (<1.3) and re-arranged download options. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/d7dcb6c8 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/d7dcb6c8 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/d7dcb6c8 Branch: refs/heads/asf-site Commit: d7dcb6c8b96bffa894714f2348973c107b433738 Parents: 33d86d7 Author: Reynold XinAuthored: Tue Jul 26 21:13:36 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 21:13:36 2016 -0700 -- js/downloads.js | 67 +++ site/js/downloads.js | 67 +++ 2 files changed, 66 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/d7dcb6c8/js/downloads.js -- diff --git a/js/downloads.js b/js/downloads.js index 1d125b6..88e4bb9 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -7,7 +7,7 @@ function addRelease(version, releaseDate, packages, downloadable, stable) { releases[version] = {released: releaseDate, packages: packages, downloadable: downloadable, stable: stable}; } -var sources = {pretty: "Source Code [can build several Hadoop versions]", tag: "sources"}; +var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-build with user-provided Hadoop [can use with most Hadoop distributions]", tag: "without-hadoop"}; var hadoop1 = {pretty: "Pre-built for Hadoop 1.X", tag: "hadoop1"}; var cdh4 = {pretty: "Pre-built for CDH 4", tag: "cdh4"}; @@ -19,9 +19,8 @@ var hadoop2p7 = {pretty: "Pre-built for Hadoop 2.7 and later", tag: "hadoop2.7"} var mapr3 = {pretty: "Pre-built for MapR 3.X", tag: "mapr3"}; var mapr4 = {pretty: "Pre-built for MapR 4.X", tag: "mapr4"}; -var sources = [sources]; // 0.7+ -var packagesV1 = [hadoop1, cdh4]; +var packagesV1 = [hadoop1, cdh4, sources]; // 0.8.1+ var packagesV2 = [hadoop2].concat(packagesV1); // 1.0.1+ @@ -31,39 +30,39 @@ var packagesV4 = [hadoop2p4, hadoop2p3, mapr3, mapr4].concat(packagesV1); // 1.3.1+ var packagesV5 = [hadoop2p6].concat(packagesV4); // 1.4.0+ -var packagesV6 = [hadoopFree, hadoop2p6, hadoop2p4, hadoop2p3].concat(packagesV1); +var packagesV6 = [hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree].concat(packagesV1); // 2.0.0+ -var packagesV7 = [hadoopFree, hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3]; +var packagesV7 = [hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, sources]; // addRelease("2.0.0-preview", new Date("05/24/2016"), sources.concat(packagesV7), true, false); -addRelease("2.0.0", new Date("07/27/2016"), sources.concat(packagesV7), true, true); -addRelease("1.6.2", new Date("06/25/2016"), sources.concat(packagesV6), true, true); -addRelease("1.6.1", new Date("03/09/2016"), sources.concat(packagesV6), true, true); -addRelease("1.6.0", new Date("01/04/2016"), sources.concat(packagesV6), true, true); -addRelease("1.5.2", new Date("11/09/2015"), sources.concat(packagesV6), true, true); -addRelease("1.5.1", new Date("10/02/2015"), sources.concat(packagesV6), true, true); -addRelease("1.5.0", new Date("9/09/2015"), sources.concat(packagesV6), true, true); -addRelease("1.4.1", new Date("7/15/2015"), sources.concat(packagesV6), true, true); -addRelease("1.4.0", new Date("6/11/2015"), sources.concat(packagesV6), true, true); -addRelease("1.3.1", new Date("4/17/2015"), sources.concat(packagesV5), true, true); -addRelease("1.3.0", new Date("3/13/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.2", new Date("4/17/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.1", new Date("2/9/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.0", new Date("12/18/2014"), sources.concat(packagesV4), true, true); -addRelease("1.1.1", new Date("11/26/2014"), sources.concat(packagesV4), true, true); -addRelease("1.1.0", new Date("9/11/2014"), sources.concat(packagesV4), true, true); -addRelease("1.0.2", new Date("8/5/2014"), sources.concat(packagesV3), true, true); -addRelease("1.0.1", new Date("7/11/2014"), sources.concat(packagesV3), false, true); -addRelease("1.0.0", new Date("5/30/2014"), sources.concat(packagesV2), false, true); -addRelease("0.9.2", new Date("7/23/2014"), sources.concat(packagesV2), true, true); -addRelease("0.9.1", new Date("4/9/2014"), sources.concat(packagesV2), false, true); -addRelease("0.9.0-incubating", new Date("2/2/2014"), sources.concat(packagesV2), false, true); -addRelease("0.8.1-incubating", new Date("12/19/2013"), sources.concat(packagesV2), true, true);
[1/3] spark-website git commit: 2.0.0 release
Repository: spark-website Updated Branches: refs/heads/asf-site 214938a57 -> 33d86d7bf http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-0-2.html -- diff --git a/site/releases/spark-release-1-0-2.html b/site/releases/spark-release-1-0-2.html index bb8dc49..c0b60a3 100644 --- a/site/releases/spark-release-1-0-2.html +++ b/site/releases/spark-release-1-0-2.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-1-0.html -- diff --git a/site/releases/spark-release-1-1-0.html b/site/releases/spark-release-1-1-0.html index 34ef676..a04ff02 100644 --- a/site/releases/spark-release-1-1-0.html +++ b/site/releases/spark-release-1-1-0.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-1-1.html -- diff --git a/site/releases/spark-release-1-1-1.html b/site/releases/spark-release-1-1-1.html index d94d4e8..69292a2 100644 --- a/site/releases/spark-release-1-1-1.html +++ b/site/releases/spark-release-1-1-1.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-2-0.html -- diff --git a/site/releases/spark-release-1-2-0.html b/site/releases/spark-release-1-2-0.html index 4bb1285..4394167 100644 --- a/site/releases/spark-release-1-2-0.html +++ b/site/releases/spark-release-1-2-0.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-2-1.html -- diff --git a/site/releases/spark-release-1-2-1.html b/site/releases/spark-release-1-2-1.html index afb4f3f..2de8c55 100644 --- a/site/releases/spark-release-1-2-1.html +++ b/site/releases/spark-release-1-2-1.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016)
[3/3] spark-website git commit: 2.0.0 release
2.0.0 release Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/33d86d7b Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/33d86d7b Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/33d86d7b Branch: refs/heads/asf-site Commit: 33d86d7bf576f3f4074573f80ef049bdca516da9 Parents: 214938a Author: Reynold XinAuthored: Tue Jul 26 21:00:47 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 21:00:47 2016 -0700 -- _layouts/global.html| 2 +- downloads.md| 16 +- js/downloads.js | 3 +- news/_posts/2016-07-27-spark-2-0-0-released.md | 14 ++ site/community.html | 8 +- site/docs/latest| 2 +- site/documentation.html | 8 +- site/downloads.html | 38 ++-- site/examples.html | 8 +- site/faq.html | 8 +- site/graphx/index.html | 8 +- site/index.html | 8 +- site/js/downloads.js| 3 +- site/mailing-lists.html | 8 +- site/mllib/index.html | 8 +- site/news/amp-camp-2013-registration-ope.html | 8 +- .../news/announcing-the-first-spark-summit.html | 8 +- .../news/fourth-spark-screencast-published.html | 8 +- site/news/index.html| 17 +- site/news/nsdi-paper.html | 8 +- site/news/one-month-to-spark-summit-2015.html | 8 +- .../proposals-open-for-spark-summit-east.html | 8 +- ...registration-open-for-spark-summit-east.html | 8 +- .../news/run-spark-and-shark-on-amazon-emr.html | 8 +- site/news/spark-0-6-1-and-0-5-2-released.html | 8 +- site/news/spark-0-6-2-released.html | 8 +- site/news/spark-0-7-0-released.html | 8 +- site/news/spark-0-7-2-released.html | 8 +- site/news/spark-0-7-3-released.html | 8 +- site/news/spark-0-8-0-released.html | 8 +- site/news/spark-0-8-1-released.html | 8 +- site/news/spark-0-9-0-released.html | 8 +- site/news/spark-0-9-1-released.html | 8 +- site/news/spark-0-9-2-released.html | 8 +- site/news/spark-1-0-0-released.html | 8 +- site/news/spark-1-0-1-released.html | 8 +- site/news/spark-1-0-2-released.html | 8 +- site/news/spark-1-1-0-released.html | 8 +- site/news/spark-1-1-1-released.html | 8 +- site/news/spark-1-2-0-released.html | 8 +- site/news/spark-1-2-1-released.html | 8 +- site/news/spark-1-2-2-released.html | 8 +- site/news/spark-1-3-0-released.html | 8 +- site/news/spark-1-4-0-released.html | 8 +- site/news/spark-1-4-1-released.html | 8 +- site/news/spark-1-5-0-released.html | 8 +- site/news/spark-1-5-1-released.html | 8 +- site/news/spark-1-5-2-released.html | 8 +- site/news/spark-1-6-0-released.html | 8 +- site/news/spark-1-6-1-released.html | 8 +- site/news/spark-1-6-2-released.html | 8 +- site/news/spark-2-0-0-released.html | 211 +++ site/news/spark-2.0.0-preview.html | 8 +- .../spark-accepted-into-apache-incubator.html | 8 +- site/news/spark-and-shark-in-the-news.html | 8 +- site/news/spark-becomes-tlp.html| 8 +- site/news/spark-featured-in-wired.html | 8 +- .../spark-mailing-lists-moving-to-apache.html | 8 +- site/news/spark-meetups.html| 8 +- site/news/spark-screencasts-published.html | 8 +- site/news/spark-summit-2013-is-a-wrap.html | 8 +- site/news/spark-summit-2014-videos-posted.html | 8 +- site/news/spark-summit-2015-videos-posted.html | 8 +- site/news/spark-summit-agenda-posted.html | 8 +- .../spark-summit-east-2015-videos-posted.html | 8 +- .../spark-summit-east-2016-cfp-closing.html | 8 +- site/news/spark-summit-east-agenda-posted.html | 8 +- .../news/spark-summit-europe-agenda-posted.html | 8 +- site/news/spark-summit-europe.html | 8 +- .../spark-summit-june-2016-agenda-posted.html | 8 +- site/news/spark-tips-from-quantifind.html | 8 +- .../spark-user-survey-and-powered-by-page.html | 8 +- site/news/spark-version-0-6-0-released.html | 8 +- ...-wins-daytona-gray-sort-100tb-benchmark.html | 8 +-
[2/3] spark-website git commit: 2.0.0 release
http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-1-6-1-released.html -- diff --git a/site/news/spark-1-6-1-released.html b/site/news/spark-1-6-1-released.html index 8c838a0..c0e1d15 100644 --- a/site/news/spark-1-6-1-released.html +++ b/site/news/spark-1-6-1-released.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-1-6-2-released.html -- diff --git a/site/news/spark-1-6-2-released.html b/site/news/spark-1-6-2-released.html index 6236814..5ad0532 100644 --- a/site/news/spark-1-6-2-released.html +++ b/site/news/spark-1-6-2-released.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-2-0-0-released.html -- diff --git a/site/news/spark-2-0-0-released.html b/site/news/spark-2-0-0-released.html new file mode 100644 index 000..bcce390 --- /dev/null +++ b/site/news/spark-2-0-0-released.html @@ -0,0 +1,211 @@ + + + + + + + + + Spark 2.0.0 released | Apache Spark + + + + + + + + + + + + + + + + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-32518208-2']); + _gaq.push(['_trackPageview']); + (function() { +var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; +ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; +var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + + function trackOutboundLink(link, category, action) { +try { + _gaq.push(['_trackEvent', category , action]); +} catch(err){} + +setTimeout(function() { + document.location.href = link.href; +}, 100); + } + + + + + + + + +https://code.jquery.com/jquery.js"> + + + + + + + + + + + + Lightning-fast cluster computing + + + + + + + + + + Toggle navigation + + + + + + + + + + Download + + + Libraries + + + SQL and DataFrames + Spark Streaming + MLlib (machine learning) + GraphX (graph) + + http://spark-packages.org;>Third-Party Packages + + + + + Documentation + + + Latest Release (Spark 2.0.0) + Older Versions and Other Resources + + + Examples + + + Community + + + Mailing Lists + Events and Meetups + Project History + https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark;>Powered By + https://cwiki.apache.org/confluence/display/SPARK/Committers;>Project Committers + https://issues.apache.org/jira/browse/SPARK;>Issue Tracker + + + FAQ + + + +http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> + Apache Software Foundation + + http://www.apache.org/;>Apache Homepage + http://www.apache.org/licenses/;>License + http://www.apache.org/foundation/sponsorship.html;>Sponsorship + http://www.apache.org/foundation/thanks.html;>Thanks + http://www.apache.org/security/;>Security
spark-website git commit: Update release notes
Repository: spark-website Updated Branches: refs/heads/asf-site 33bab055d -> 214938a57 Update release notes Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/214938a5 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/214938a5 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/214938a5 Branch: refs/heads/asf-site Commit: 214938a57f8c96a792b04f39a867e2cec7fa0d91 Parents: 33bab05 Author: Reynold XinAuthored: Tue Jul 26 16:42:12 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 16:42:12 2016 -0700 -- releases/_posts/2016-07-27-spark-release-2-0-0.md | 2 +- site/releases/spark-release-2-0-0.html| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/214938a5/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 8d35967..1cc5cdd 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -11,7 +11,7 @@ meta: _wpas_done_all: '1' --- -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. +Apache Spark 2.0.0 is the first release on the 2.x line. The major updates are API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. In addition, this release includes over 2500 patches from over 300 contributors. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420=12329449). We have curated a list of high level changes here, grouped by major modules. http://git-wip-us.apache.org/repos/asf/spark-website/blob/214938a5/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index 72dd661..22db510 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -186,7 +186,7 @@ Spark Release 2.0.0 -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. +Apache Spark 2.0.0 is the first release on the 2.x line. The major updates are API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. In addition, this release includes over 2500 patches from over 300 contributors. To download Apache Spark 2.0.0, visit the http://spark.apache.org/downloads.html;>downloads page. You can consult JIRA for the https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420version=12329449;>detailed changes. We have curated a list of high level changes here, grouped by major modules. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark-website git commit: Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources.
Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/33bab055 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/33bab055 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/33bab055 Branch: refs/heads/asf-site Commit: 33bab055d430315c9d61a18b21d9c48b5b8fedee Parents: f0578ab Author: Reynold XinAuthored: Tue Jul 26 15:32:17 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:32:17 2016 -0700 -- _layouts/global.html| 2 +- site/community.html | 2 +- site/documentation.html | 2 +- site/downloads.html | 2 +- site/examples.html | 2 +- site/faq.html | 2 +- site/graphx/index.html | 2 +- site/index.html | 2 +- site/mailing-lists.html | 2 +- site/mllib/index.html | 2 +- site/news/amp-camp-2013-registration-ope.html | 2 +- site/news/announcing-the-first-spark-summit.html| 2 +- site/news/fourth-spark-screencast-published.html| 2 +- site/news/index.html| 2 +- site/news/nsdi-paper.html | 2 +- site/news/one-month-to-spark-summit-2015.html | 2 +- site/news/proposals-open-for-spark-summit-east.html | 2 +- site/news/registration-open-for-spark-summit-east.html | 2 +- site/news/run-spark-and-shark-on-amazon-emr.html| 2 +- site/news/spark-0-6-1-and-0-5-2-released.html | 2 +- site/news/spark-0-6-2-released.html | 2 +- site/news/spark-0-7-0-released.html | 2 +- site/news/spark-0-7-2-released.html | 2 +- site/news/spark-0-7-3-released.html | 2 +- site/news/spark-0-8-0-released.html | 2 +- site/news/spark-0-8-1-released.html | 2 +- site/news/spark-0-9-0-released.html | 2 +- site/news/spark-0-9-1-released.html | 2 +- site/news/spark-0-9-2-released.html | 2 +- site/news/spark-1-0-0-released.html | 2 +- site/news/spark-1-0-1-released.html | 2 +- site/news/spark-1-0-2-released.html | 2 +- site/news/spark-1-1-0-released.html | 2 +- site/news/spark-1-1-1-released.html | 2 +- site/news/spark-1-2-0-released.html | 2 +- site/news/spark-1-2-1-released.html | 2 +- site/news/spark-1-2-2-released.html | 2 +- site/news/spark-1-3-0-released.html | 2 +- site/news/spark-1-4-0-released.html | 2 +- site/news/spark-1-4-1-released.html | 2 +- site/news/spark-1-5-0-released.html | 2 +- site/news/spark-1-5-1-released.html | 2 +- site/news/spark-1-5-2-released.html | 2 +- site/news/spark-1-6-0-released.html | 2 +- site/news/spark-1-6-1-released.html | 2 +- site/news/spark-1-6-2-released.html | 2 +- site/news/spark-2.0.0-preview.html | 2 +- site/news/spark-accepted-into-apache-incubator.html | 2 +- site/news/spark-and-shark-in-the-news.html | 2 +- site/news/spark-becomes-tlp.html| 2 +- site/news/spark-featured-in-wired.html | 2 +- site/news/spark-mailing-lists-moving-to-apache.html | 2 +- site/news/spark-meetups.html| 2 +- site/news/spark-screencasts-published.html | 2 +- site/news/spark-summit-2013-is-a-wrap.html | 2 +- site/news/spark-summit-2014-videos-posted.html | 2 +- site/news/spark-summit-2015-videos-posted.html | 2 +- site/news/spark-summit-agenda-posted.html | 2 +- site/news/spark-summit-east-2015-videos-posted.html | 2 +- site/news/spark-summit-east-2016-cfp-closing.html | 2 +- site/news/spark-summit-east-agenda-posted.html | 2 +- site/news/spark-summit-europe-agenda-posted.html| 2 +- site/news/spark-summit-europe.html
[1/2] spark-website git commit: Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources.
Repository: spark-website Updated Branches: refs/heads/asf-site f0578ab3f -> 33bab055d http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-5-2.html -- diff --git a/site/releases/spark-release-1-5-2.html b/site/releases/spark-release-1-5-2.html index d4d7fc8..5915943 100644 --- a/site/releases/spark-release-1-5-2.html +++ b/site/releases/spark-release-1-5-2.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-0.html -- diff --git a/site/releases/spark-release-1-6-0.html b/site/releases/spark-release-1-6-0.html index 06791cd..4c0cec6 100644 --- a/site/releases/spark-release-1-6-0.html +++ b/site/releases/spark-release-1-6-0.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-1.html -- diff --git a/site/releases/spark-release-1-6-1.html b/site/releases/spark-release-1-6-1.html index 6791de9..c190bf6 100644 --- a/site/releases/spark-release-1-6-1.html +++ b/site/releases/spark-release-1-6-1.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-2.html -- diff --git a/site/releases/spark-release-1-6-2.html b/site/releases/spark-release-1-6-2.html index a998477..10d67f1 100644 --- a/site/releases/spark-release-1-6-2.html +++ b/site/releases/spark-release-1-6-2.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index cf6f86b..72dd661 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/research.html -- diff --git a/site/research.html b/site/research.html index c00e789..f2bb59d 100644 --- a/site/research.html +++ b/site/research.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/1-first-steps-with-spark.html -- diff --git a/site/screencasts/1-first-steps-with-spark.html b/site/screencasts/1-first-steps-with-spark.html index b6fccb0..5e290a6 100644 --- a/site/screencasts/1-first-steps-with-spark.html +++ b/site/screencasts/1-first-steps-with-spark.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/2-spark-documentation-overview.html -- diff --git a/site/screencasts/2-spark-documentation-overview.html b/site/screencasts/2-spark-documentation-overview.html index 4c2e00d..8e68b7a 100644 --- a/site/screencasts/2-spark-documentation-overview.html +++ b/site/screencasts/2-spark-documentation-overview.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/3-transformations-and-caching.html -- diff --git a/site/screencasts/3-transformations-and-caching.html
spark-website git commit: Add 2.0.0 to documentation page
Repository: spark-website Updated Branches: refs/heads/asf-site 7cd1fdf23 -> f0578ab3f Add 2.0.0 to documentation page Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/f0578ab3 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/f0578ab3 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/f0578ab3 Branch: refs/heads/asf-site Commit: f0578ab3f4ad07e8f4fab2207e62dd80175d6f09 Parents: 7cd1fdf Author: Reynold XinAuthored: Tue Jul 26 15:31:28 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:31:28 2016 -0700 -- documentation.md| 5 - site/documentation.html | 5 - 2 files changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/f0578ab3/documentation.md -- diff --git a/documentation.md b/documentation.md index bf7c08a..1f87446 100644 --- a/documentation.md +++ b/documentation.md @@ -12,7 +12,8 @@ navigation: Setup instructions, programming guides, and other documentation are available for each stable version of Spark below: - Spark 1.6.2 (latest release) + Spark 2.0.0 (latest release) + Spark 1.6.2 Spark 1.6.1 Spark 1.6.0 Spark 1.5.2 @@ -31,11 +32,13 @@ navigation: Spark 0.6.2 + The documentation linked to above covers getting started with Spark, as well the built-in components MLlib, Spark Streaming, and GraphX. http://git-wip-us.apache.org/repos/asf/spark-website/blob/f0578ab3/site/documentation.html -- diff --git a/site/documentation.html b/site/documentation.html index 4d5dbc7..56066c4 100644 --- a/site/documentation.html +++ b/site/documentation.html @@ -188,7 +188,8 @@ Setup instructions, programming guides, and other documentation are available for each stable version of Spark below: - Spark 1.6.2 (latest release) + Spark 2.0.0 (latest release) + Spark 1.6.2 Spark 1.6.1 Spark 1.6.0 Spark 1.5.2 @@ -207,11 +208,13 @@ Spark 0.6.2 + The documentation linked to above covers getting started with Spark, as well the built-in components MLlib, Spark Streaming, and GraphX. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: More comprehensive new features
Repository: spark-website Updated Branches: refs/heads/asf-site 175d31a25 -> 7cd1fdf23 More comprehensive new features Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/7cd1fdf2 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/7cd1fdf2 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/7cd1fdf2 Branch: refs/heads/asf-site Commit: 7cd1fdf235b270b2aa38f8bb68d2e451ff618e2e Parents: 175d31a Author: Reynold XinAuthored: Tue Jul 26 15:29:07 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:29:07 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 40 +- site/releases/spark-release-2-0-0.html | 58 +--- 2 files changed, 66 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/7cd1fdf2/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 9969ce8..8d35967 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -34,38 +34,46 @@ One of the largest changes in Spark 2.0 is the new updated APIs: - SparkSession: new entry point that replaces the old SQLContext and HiveContext for DataFrame and Dataset APIs. SQLContext and HiveContext are kept for backward compatibility. - A new, streamlined configuration API for SparkSession - Simpler, more performant accumulator API + - A new, improved Aggregator API for typed aggregation in Datasets SQL Spark 2.0 substantially improved SQL functionalities with SQL2003 support. Spark SQL can now run all 99 TPC-DS queries. More prominently, we have improved: + - A native SQL parser that supports both ANSI-SQL as well as Hive QL + - Native DDL command implementations - Subquery support, including - - Uncorrelated Scalar Subqueries - - Correlated Scalar Subqueries - - NOT IN predicate Subqueries (in WHERE/HAVING clauses) - - IN predicate subqueries (in WHERE/HAVING clauses) - - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) + - Uncorrelated Scalar Subqueries + - Correlated Scalar Subqueries + - NOT IN predicate Subqueries (in WHERE/HAVING clauses) + - IN predicate subqueries (in WHERE/HAVING clauses) + - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) - View canonicalization support In addition, when building without Hive support, Spark SQL should have almost all the functionality as when building with Hive support, with the exception of Hive connectivity, Hive UDFs, and script transforms. - Performance + New Features + + - Native CSV data source, based on Databricks' [spark-csv module](https://github.com/databricks/spark-csv) + - Off-heap memory management for both caching and runtime execution + - Hive style bucketing support + - Approximate summary statistics using sketches, including approximate quantile, Bloom filter, and count-min sketch. + + + Performance and Runtime - Substantial (2 - 10X) performance speedups for common operators in SQL and DataFrames via a new technique called whole stage code generation. - Improved Parquet scan throughput through vectorization - Improved ORC performance - Many improvements in the Catalyst query optimizer for common workloads - Improved window function performance via native implementations for all window functions + - Automatic file coalescing for native data sources ### MLlib -The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details. - - API changes -The largest API change is in linear algebra. The DataFrame-based API (spark.ml) now depends upon local linear algebra in spark.ml.linalg, rather than in spark.mllib.linalg. This removes the last dependencies of spark.ml.* on spark.mllib.*. (SPARK-13944) -See the MLlib migration guide for a full list of API changes. +The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details New features @@ -99,9 +107,14 @@ Spark 2.0 ships the initial experimental release for Structured Streaming, a hig For the DStream API, the most prominent update is the new experimental support for Kafka 0.10. -### Operational and Packaging Improvements +### Dependency and Packaging Improvements + +There are a variety of changes to Spark's operations and packaging process: -There are a variety of improvements to Spark's operations and packaging process. The most prominent change is that Spark 2.0 no longer
spark-website git commit: Better release notes for 2.0.0
Repository: spark-website Updated Branches: refs/heads/asf-site 0323eb078 -> a9ba7a4c6 Better release notes for 2.0.0 Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/a9ba7a4c Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/a9ba7a4c Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/a9ba7a4c Branch: refs/heads/asf-site Commit: a9ba7a4c65bf97c1762f300cbf5ba1459e59b765 Parents: 0323eb0 Author: Reynold XinAuthored: Tue Jul 26 15:10:23 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:10:23 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 33 ++-- site/releases/spark-release-2-0-0.html | 41 +--- 2 files changed, 48 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/a9ba7a4c/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 8b26c04..eb267cf 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -11,14 +11,19 @@ meta: _wpas_done_all: '1' --- -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Some breaking changes have been made with respect to the 1.x line. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. +Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. -You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420=12329449). We have curated a list of high level changes here, grouped by major modules. +To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420=12329449). We have curated a list of high level changes here, grouped by major modules. * This will become a table of contents (this text will be scraped). {:toc} +### API Stability + +Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented at the end of this release notes. + + ### Core and Spark SQL Programming APIs @@ -94,7 +99,12 @@ Spark 2.0 ships the initial experimental release for Structured Streaming, a hig For the DStream API, the most prominent update is the new experimental support for Kafka 0.10. -### Removals, Deprecations and Breaking Changes +### Operational and Packaging Improvements + +There are a variety of improvements to Spark's operations and packaging process. The most prominent change is that Spark 2.0 no longer requires a fat assembly jar for production deployment. + + +### Removals, Behavior Changes and Deprecations Removals The following features have been removed in Spark 2.0: @@ -113,15 +123,9 @@ The following features have been removed in Spark 2.0: - Hash-based shuffle manager - History serving functionality from standalone Master - For Java and Scala, DataFrame no longer exists as a class. As a result, data sources would need to be updated. +- Spark EC2 script has been fully moved to an [external repository hosted by the UC Berkeley AMPLab](https://github.com/amplab/spark-ec2) - Deprecations -The following features have been deprecated in Spark 2.0, and might be removed in future versions of Spark 2.x: - -- Fine-grained mode in Apache Mesos -- Support for Java 7 -- Support for Python 2.6 - - Breaking Changes + Behavior Changes The following changes might require updating existing applications that depend on the old behavior or API. - The default build is now using Scala 2.11 rather than Scala 2.10. @@ -134,6 +138,13 @@ The following changes might require updating existing applications that depend o For a more complete list, please see [SPARK-11806](https://issues.apache.org/jira/browse/SPARK-11806) for deprecations and removals. + Deprecations +The following features have been deprecated in Spark 2.0, and might be removed in
svn commit: r14549 - /dev/spark/spark-2.0.0/ /release/spark/spark-2.0.0/
Author: rxin Date: Tue Jul 26 22:06:04 2016 New Revision: 14549 Log: Spark 2.0.0 Added: release/spark/spark-2.0.0/ - copied from r14548, dev/spark/spark-2.0.0/ Removed: dev/spark/spark-2.0.0/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r14548 - /dev/spark/spark-2.0.0/
Author: rxin Date: Tue Jul 26 21:58:58 2016 New Revision: 14548 Log: Add Spark 2.0.0 Added: dev/spark/spark-2.0.0/ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0.tgz.sha Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz == Binary file - no diff available. Propchange: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc Tue Jul 26 21:58:58 2016 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJXjpt2AAoJEHxsEF/8jtCJVMcH+QGkDahdQQn41OFPSQoCsElv +TdrnUWa2OakvlYPu85/efFB1HabGnKb0zowGgaWfMwCbrLq3KxBEIpPEV2+nW1wK +ymntYPFD5BEbuiSzMHKfQOevp8+tK1xzWBgMidexldHfZXFDlC5H8q1kqSaBoAUk +GEqWuixD8Lb8aVbDb2BqT18FPvsrgWJodxQcvjmOtGU1MddrSvWKDhCv0g+l0181 +cYxJLWJoQYbRAWEbMay/yEYwQ2zfz7/j4LhAxUP/y+y0JE08sbqIBN+ddjjjYGVz +CqpvY08I6R/SDnCoFYh6PF8w1DktrGui29lbNQSvZicomTEaUYuHxF2O8O4Zxq8= +=209q +-END PGP SIGNATURE- Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 Tue Jul 26 21:58:58 2016 @@ -0,0 +1 @@ +spark-2.0.0-bin-hadoop2.3.tgz: 83 E6 FB 6F 78 1F CA 2C C5 05 C6 4C 26 06 E2 8D Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha Tue Jul 26 21:58:58 2016 @@ -0,0 +1,4 @@ +spark-2.0.0-bin-hadoop2.3.tgz: 4C5241B6 297B5EC3 CD300A0B 04001F02 530F7EE7 + 634D9258 54F42DE6 F4597269 36754062 A9C6FD10 + 5AE70069 AF90FDA0 768BCFD9 362FF9DD E12BC456 + 331EED2C Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz == Binary file - no diff available. Propchange: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc Tue Jul 26 21:58:58 2016 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJXjpt8AAoJEHxsEF/8jtCJcfAIAKhwDbrF3cclOEbjCev7Cd3L +0zc5aHR4qI6xtqeeMjj+2/pKHtRJFG9S3beSIjcUA1kkiO6g/mWrceK0MJQAAEKO
spark-website git commit: Add 2.0.0 release notes
Repository: spark-website Updated Branches: refs/heads/asf-site effcd547b -> 0323eb078 Add 2.0.0 release notes Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/0323eb07 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/0323eb07 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/0323eb07 Branch: refs/heads/asf-site Commit: 0323eb0787282068ce34de0b953a5c3a5c24e84c Parents: effcd54 Author: Reynold XinAuthored: Tue Jul 26 14:41:18 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:41:18 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 145 site/releases/spark-release-2-0-0.html | 372 +++ 2 files changed, 517 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/0323eb07/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md new file mode 100644 index 000..8b26c04 --- /dev/null +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -0,0 +1,145 @@ +--- +layout: post +title: Spark Release 2.0.0 +categories: [] +tags: [] +status: publish +type: post +published: true +meta: + _edit_last: '4' + _wpas_done_all: '1' +--- + +Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Some breaking changes have been made with respect to the 1.x line. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. + +You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420=12329449). We have curated a list of high level changes here, grouped by major modules. + +* This will become a table of contents (this text will be scraped). +{:toc} + + +### Core and Spark SQL + + Programming APIs + +One of the largest changes in Spark 2.0 is the new updated APIs: + + - Unifying DataFrame and Dataset: In Scala and Java, DataFrame and Dataset have been unified, i.e. DataFrame is just a type alias for Dataset of Row. In Python and R, given the lack of type safety, DataFrame is the main programming interface. + - SparkSession: new entry point that replaces the old SQLContext and HiveContext for DataFrame and Dataset APIs. SQLContext and HiveContext are kept for backward compatibility. + - A new, streamlined configuration API for SparkSession + - Simpler, more performant accumulator API + + + SQL + +Spark 2.0 substantially improved SQL functionalities with SQL2003 support. Spark SQL can now run all 99 TPC-DS queries. More prominently, we have improved: + + - Subquery support, including + - Uncorrelated Scalar Subqueries + - Correlated Scalar Subqueries + - NOT IN predicate Subqueries (in WHERE/HAVING clauses) + - IN predicate subqueries (in WHERE/HAVING clauses) + - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) + - View canonicalization support + +In addition, when building without Hive support, Spark SQL should have almost all the functionality as when building with Hive support, with the exception of Hive connectivity, Hive UDFs, and script transforms. + + + Performance + + - Substantial (2 - 10X) performance speedups for common operators in SQL and DataFrames via a new technique called whole stage code generation. + - Improved Parquet scan throughput through vectorization + - Improved ORC performance + - Many improvements in the Catalyst query optimizer for common workloads + - Improved window function performance via native implementations for all window functions + + +### MLlib +The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details. + + API changes +The largest API change is in linear algebra. The DataFrame-based API (spark.ml) now depends upon local linear algebra in spark.ml.linalg, rather than in spark.mllib.linalg. This removes the last dependencies of spark.ml.* on spark.mllib.*. (SPARK-13944) +See the MLlib migration guide for a full list of API changes. + + New features + +- ML persistence: The DataFrames-based API provides near-complete support for saving and loading ML models and Pipelines in Scala, Java, Python, and R. See this blog post for details. (SPARK-6725, SPARK-11939, SPARK-14311) +- MLlib in R: SparkR now offers MLlib APIs for generalized linear models, naive Bayes, k-means clustering, and survival regression. See this talk to learn more. +- Python: PySpark now offers many more MLlib algorithms, including LDA, Gaussian Mixture Model, Generalized
spark-website git commit: Remove test.html
Repository: spark-website Updated Branches: refs/heads/asf-site b98c7b9d1 -> effcd547b Remove test.html Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/effcd547 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/effcd547 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/effcd547 Branch: refs/heads/asf-site Commit: effcd547b94573f406f9f4afc602c742e9eb5710 Parents: b98c7b9 Author: Reynold XinAuthored: Tue Jul 26 14:41:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:41:02 2016 -0700 -- site/test.html | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/effcd547/site/test.html -- diff --git a/site/test.html b/site/test.html deleted file mode 100644 index 7048861..000 --- a/site/test.html +++ /dev/null @@ -1 +0,0 @@ -Adding a test file \ No newline at end of file - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Test
Repository: spark-website Updated Branches: refs/heads/asf-site aff7e088c -> b98c7b9d1 Test Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/b98c7b9d Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/b98c7b9d Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/b98c7b9d Branch: refs/heads/asf-site Commit: b98c7b9d1db84e8692787fb35eecbee0bde3467a Parents: aff7e08 Author: Reynold XinAuthored: Tue Jul 26 14:17:31 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:17:31 2016 -0700 -- site/test.html | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/b98c7b9d/site/test.html -- diff --git a/site/test.html b/site/test.html new file mode 100644 index 000..7048861 --- /dev/null +++ b/site/test.html @@ -0,0 +1 @@ +Adding a test file \ No newline at end of file - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15703][SCHEDULER][CORE][WEBUI] Make ListenerBus event queue size configurable
Repository: spark Updated Branches: refs/heads/master 0869b3a5f -> 0b71d9ae0 [SPARK-15703][SCHEDULER][CORE][WEBUI] Make ListenerBus event queue size configurable ## What changes were proposed in this pull request? This change adds a new configuration entry to specify the size of the spark listener bus event queue. The value for this config ("spark.scheduler.listenerbus.eventqueue.size") is set to a default to 1. Note: I haven't currently documented the configuration entry. We can decide whether it would be appropriate to make it a public configuration or keep it as an undocumented one. Refer JIRA for more details. ## How was this patch tested? Ran existing jobs and verified the event queue size with debug logs and from the Spark WebUI Environment tab. Author: Dhruve AsharCloses #14269 from dhruve/bug/SPARK-15703. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b71d9ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b71d9ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b71d9ae Branch: refs/heads/master Commit: 0b71d9ae0804b0394e4abd02c7cebf52a9102216 Parents: 0869b3a Author: Dhruve Ashar Authored: Tue Jul 26 13:23:33 2016 -0500 Committer: Tom Graves Committed: Tue Jul 26 13:23:33 2016 -0500 -- .../scala/org/apache/spark/SparkContext.scala | 4 +-- .../apache/spark/internal/config/package.scala | 5 .../spark/scheduler/LiveListenerBus.scala | 23 +-- .../scheduler/EventLoggingListenerSuite.scala | 4 +-- .../spark/scheduler/SparkListenerSuite.scala| 30 +++- .../storage/BlockManagerReplicationSuite.scala | 9 -- .../spark/storage/BlockManagerSuite.scala | 6 ++-- .../spark/ui/storage/StorageTabSuite.scala | 11 +++ .../streaming/ReceivedBlockHandlerSuite.scala | 5 +++- 9 files changed, 60 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 6d7f05d..d48e2b4 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -249,7 +249,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli def isStopped: Boolean = stopped.get() // An asynchronous listener bus for Spark events - private[spark] val listenerBus = new LiveListenerBus + private[spark] val listenerBus = new LiveListenerBus(this) // This function allows components created by SparkEnv to be mocked in unit tests: private[spark] def createSparkEnv( @@ -2148,7 +2148,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli } } -listenerBus.start(this) +listenerBus.start() _listenerBusStarted = true } http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 05dd683..ebb21e9 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -103,4 +103,9 @@ package object config { .stringConf .checkValues(Set("hive", "in-memory")) .createWithDefault("in-memory") + + private[spark] val LISTENER_BUS_EVENT_QUEUE_SIZE = +ConfigBuilder("spark.scheduler.listenerbus.eventqueue.size") + .intConf + .createWithDefault(1) } http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala index 1c21313..bfa3c40 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala @@ -22,7 +22,8 @@ import java.util.concurrent.atomic.AtomicBoolean import scala.util.DynamicVariable -import org.apache.spark.SparkContext +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.internal.config._ import org.apache.spark.util.Utils /** @@ -32,18 +33,24 @@ import org.apache.spark.util.Utils
spark git commit: [SPARK-15271][MESOS] Allow force pulling executor docker images
Repository: spark Updated Branches: refs/heads/master a2abb583c -> 0869b3a5f [SPARK-15271][MESOS] Allow force pulling executor docker images ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Mesos agents by default will not pull docker images which are cached locally already. In order to run Spark executors from mutable tags like `:latest` this commit introduces a Spark setting (`spark.mesos.executor.docker.forcePullImage`). Setting this flag to true will tell the Mesos agent to force pull the docker image (default is `false` which is consistent with the previous implementation and Mesos' default behaviour). Author: Philipp HoffmannCloses #14348 from philipphoffmann/force-pull-image. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0869b3a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0869b3a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0869b3a5 Branch: refs/heads/master Commit: 0869b3a5f028b64c2da511e70b02ab42f65fc949 Parents: a2abb58 Author: Philipp Hoffmann Authored: Tue Jul 26 16:09:10 2016 +0100 Committer: Sean Owen Committed: Tue Jul 26 16:09:10 2016 +0100 -- .../cluster/mesos/MesosClusterScheduler.scala | 14 ++--- .../MesosCoarseGrainedSchedulerBackend.scala| 7 ++- .../MesosFineGrainedSchedulerBackend.scala | 7 ++- .../mesos/MesosSchedulerBackendUtil.scala | 20 --- ...esosCoarseGrainedSchedulerBackendSuite.scala | 63 .../MesosFineGrainedSchedulerBackendSuite.scala | 2 + dev/deps/spark-deps-hadoop-2.2 | 2 +- dev/deps/spark-deps-hadoop-2.3 | 2 +- dev/deps/spark-deps-hadoop-2.4 | 2 +- dev/deps/spark-deps-hadoop-2.6 | 2 +- dev/deps/spark-deps-hadoop-2.7 | 2 +- docs/_config.yml| 2 +- docs/running-on-mesos.md| 12 pom.xml | 2 +- 14 files changed, 110 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0869b3a5/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 39b0f4d..1e9644d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -537,16 +537,10 @@ private[spark] class MesosClusterScheduler( .addAllResources(memResourcesToUse.asJava) offer.resources = finalResources.asJava submission.schedulerProperties.get("spark.mesos.executor.docker.image").foreach { image => - val container = taskInfo.getContainerBuilder() - val volumes = submission.schedulerProperties -.get("spark.mesos.executor.docker.volumes") -.map(MesosSchedulerBackendUtil.parseVolumesSpec) - val portmaps = submission.schedulerProperties -.get("spark.mesos.executor.docker.portmaps") -.map(MesosSchedulerBackendUtil.parsePortMappingsSpec) - MesosSchedulerBackendUtil.addDockerInfo( -container, image, volumes = volumes, portmaps = portmaps) - taskInfo.setContainer(container.build()) + MesosSchedulerBackendUtil.setupContainerBuilderDockerInfo( +image, +submission.schedulerProperties.get, +taskInfo.getContainerBuilder()) } val queuedTasks = tasks.getOrElseUpdate(offer.offerId, new ArrayBuffer[TaskInfo]) queuedTasks += taskInfo.build() http://git-wip-us.apache.org/repos/asf/spark/blob/0869b3a5/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index 99e6d39..52993ca 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++
64DB3746CD44CB49
64DB3746CD44CB49.docm Description: application/vnd.ms-word.document.macroenabled.12 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
Repository: spark Updated Branches: refs/heads/master 4c9695598 -> a2abb583c [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables ## What changes were proposed in this pull request? Currently there are 2 inconsistence: 1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema 2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null ## How was this patch tested? new test in `HiveDDLSuite` Author: Wenchen FanCloses #14302 from cloud-fan/minor3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2abb583 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2abb583 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2abb583 Branch: refs/heads/master Commit: a2abb583caaec9a2cecd5d65b05d172fc096c125 Parents: 4c96955 Author: Wenchen Fan Authored: Tue Jul 26 18:46:12 2016 +0800 Committer: Cheng Lian Committed: Tue Jul 26 18:46:12 2016 +0800 -- .../spark/sql/execution/command/tables.scala| 12 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++-- .../sql/hive/MetastoreDataSourcesSuite.scala| 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++- .../sql/hive/execution/HiveQuerySuite.scala | 4 +-- 5 files changed, 47 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c6daa95..8263380 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") -append(buffer, s"# ${output.head.name}", "", "") -partCols.foreach(col => append(buffer, col, "", "")) +append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) +describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => - val comment = column.getComment().getOrElse("") - append(buffer, column.name, column.dataType.simpleString, comment) + append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index d0ad319..e535d4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), -Row("stringType", "string", ""), -Row("dateType", "date", ""), -Row("timestampType", "timestamp", ""), -Row("doubleType", "double", ""), -Row("bigintType", "bigint", ""), -Row("tinyintType", "tinyint", ""), -Row("decimalType", "decimal(10,0)", ""), -Row("fixedDecimalType", "decimal(5,1)", ""), -Row("binaryType", "binary", ""), -Row("booleanType", "boolean", ""), -Row("smallIntType", "smallint",
spark git commit: [SPARK-16697][ML][MLLIB] improve LDA submitMiniBatch method to avoid redundant RDD computation
Repository: spark Updated Branches: refs/heads/master 3b2b785ec -> 4c9695598 [SPARK-16697][ML][MLLIB] improve LDA submitMiniBatch method to avoid redundant RDD computation ## What changes were proposed in this pull request? In `LDAOptimizer.submitMiniBatch`, do persist on `stats: RDD[(BDM[Double], List[BDV[Double]])]` and also move the place of unpersisting `expElogbetaBc` broadcast variable, to avoid the `expElogbetaBc` broadcast variable to be unpersisted too early, and update previous `expElogbetaBc.unpersist()` into `expElogbetaBc.destroy(false)` ## How was this patch tested? Existing test. Author: WeichenXuCloses #14335 from WeichenXu123/improve_LDA. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c969559 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c969559 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c969559 Branch: refs/heads/master Commit: 4c9695598ee00f68aff4eb32d4629edf6facb29f Parents: 3b2b785 Author: WeichenXu Authored: Tue Jul 26 10:41:41 2016 +0100 Committer: Sean Owen Committed: Tue Jul 26 10:41:41 2016 +0100 -- .../scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c969559/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index e2c6aca..ae324f8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -28,6 +28,7 @@ import org.apache.spark.graphx._ import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vector, Vectors} import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel /** * :: DeveloperApi :: @@ -472,12 +473,13 @@ final class OnlineLDAOptimizer extends LDAOptimizer { gammaPart = gammad :: gammaPart } Iterator((stat, gammaPart)) -} +}.persist(StorageLevel.MEMORY_AND_DISK) val statsSum: BDM[Double] = stats.map(_._1).treeAggregate(BDM.zeros[Double](k, vocabSize))( _ += _, _ += _) -expElogbetaBc.unpersist() val gammat: BDM[Double] = breeze.linalg.DenseMatrix.vertcat( stats.map(_._2).flatMap(list => list).collect().map(_.toDenseMatrix): _*) +stats.unpersist() +expElogbetaBc.destroy(false) val batchResult = statsSum :* expElogbeta.t // Note that this is an optimization to avoid batch.count - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16675][SQL] Avoid per-record type dispatch in JDBC when writing
Repository: spark Updated Branches: refs/heads/master 03c27435a -> 3b2b785ec [SPARK-16675][SQL] Avoid per-record type dispatch in JDBC when writing ## What changes were proposed in this pull request? Currently, `JdbcUtils.savePartition` is doing type-based dispatch for each row to write appropriate values. So, appropriate setters for `PreparedStatement` can be created first according to the schema, and then apply them to each row. This approach is similar with `CatalystWriteSupport`. This PR simply make the setters to avoid this. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwonCloses #14323 from HyukjinKwon/SPARK-16675. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b2b785e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b2b785e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b2b785e Branch: refs/heads/master Commit: 3b2b785ece4394ca332377647a6305ea493f411b Parents: 03c2743 Author: hyukjinkwon Authored: Tue Jul 26 17:14:58 2016 +0800 Committer: Wenchen Fan Committed: Tue Jul 26 17:14:58 2016 +0800 -- .../execution/datasources/jdbc/JDBCRDD.scala| 22 ++-- .../execution/datasources/jdbc/JdbcUtils.scala | 102 ++- 2 files changed, 88 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3b2b785e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 4c98430..e267e77 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -322,19 +322,19 @@ private[sql] class JDBCRDD( } } - // A `JDBCValueSetter` is responsible for converting and setting a value from `ResultSet` - // into a field for `MutableRow`. The last argument `Int` means the index for the - // value to be set in the row and also used for the value to retrieve from `ResultSet`. - private type JDBCValueSetter = (ResultSet, MutableRow, Int) => Unit + // A `JDBCValueGetter` is responsible for getting a value from `ResultSet` into a field + // for `MutableRow`. The last argument `Int` means the index for the value to be set in + // the row and also used for the value in `ResultSet`. + private type JDBCValueGetter = (ResultSet, MutableRow, Int) => Unit /** - * Creates `JDBCValueSetter`s according to [[StructType]], which can set + * Creates `JDBCValueGetter`s according to [[StructType]], which can set * each value from `ResultSet` to each field of [[MutableRow]] correctly. */ - def makeSetters(schema: StructType): Array[JDBCValueSetter] = -schema.fields.map(sf => makeSetter(sf.dataType, sf.metadata)) + def makeGetters(schema: StructType): Array[JDBCValueGetter] = +schema.fields.map(sf => makeGetter(sf.dataType, sf.metadata)) - private def makeSetter(dt: DataType, metadata: Metadata): JDBCValueSetter = dt match { + private def makeGetter(dt: DataType, metadata: Metadata): JDBCValueGetter = dt match { case BooleanType => (rs: ResultSet, row: MutableRow, pos: Int) => row.setBoolean(pos, rs.getBoolean(pos + 1)) @@ -489,15 +489,15 @@ private[sql] class JDBCRDD( stmt.setFetchSize(fetchSize) val rs = stmt.executeQuery() -val setters: Array[JDBCValueSetter] = makeSetters(schema) +val getters: Array[JDBCValueGetter] = makeGetters(schema) val mutableRow = new SpecificMutableRow(schema.fields.map(x => x.dataType)) def getNext(): InternalRow = { if (rs.next()) { inputMetrics.incRecordsRead(1) var i = 0 -while (i < setters.length) { - setters(i).apply(rs, mutableRow, i) +while (i < getters.length) { + getters(i).apply(rs, mutableRow, i) if (rs.wasNull) mutableRow.setNullAt(i) i = i + 1 } http://git-wip-us.apache.org/repos/asf/spark/blob/3b2b785e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index cb474cb..81d38e3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++
spark git commit: [TEST][STREAMING] Fix flaky Kafka rate controlling test
Repository: spark Updated Branches: refs/heads/branch-2.0 4391d4a3c -> 44234b1c4 [TEST][STREAMING] Fix flaky Kafka rate controlling test ## What changes were proposed in this pull request? The current test is incorrect, because - The expected number of messages does not take into account that the topic has 2 partitions, and rate is set per partition. - Also in some cases, the test ran out of data in Kafka while waiting for the right amount of data per batch. The PR - Reduces the number of partitions to 1 - Adds more data to Kafka - Runs with 0.5 second so that batches are created slowly ## How was this patch tested? Ran many times locally, going to run it many times in Jenkins (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Tathagata DasCloses #14361 from tdas/kafka-rate-test-fix. (cherry picked from commit 03c27435aee4e319abe290771ba96e69469109ac) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44234b1c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44234b1c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44234b1c Branch: refs/heads/branch-2.0 Commit: 44234b1c4266ac7be56892817d043fe6d9ea62f7 Parents: 4391d4a Author: Tathagata Das Authored: Tue Jul 26 00:41:46 2016 -0700 Committer: Tathagata Das Committed: Tue Jul 26 00:41:58 2016 -0700 -- .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44234b1c/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala -- diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala index c9e15bc..b1d90b8 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala @@ -544,15 +544,14 @@ class DirectKafkaStreamSuite test("using rate controller") { val topic = "backpressure" -val topicPartitions = Set(new TopicPartition(topic, 0), new TopicPartition(topic, 1)) -kafkaTestUtils.createTopic(topic, 2) +kafkaTestUtils.createTopic(topic, 1) val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest") val executorKafkaParams = new JHashMap[String, Object](kafkaParams) KafkaUtils.fixKafkaParams(executorKafkaParams) -val batchIntervalMilliseconds = 100 +val batchIntervalMilliseconds = 500 val estimator = new ConstantEstimator(100) -val messages = Map("foo" -> 200) +val messages = Map("foo" -> 5000) kafkaTestUtils.sendMessages(topic, messages) val sparkConf = new SparkConf() @@ -596,7 +595,7 @@ class DirectKafkaStreamSuite estimator.updateRate(rate) // Set a new rate. // Expect blocks of data equal to "rate", scaled by the interval length in secs. val expectedSize = Math.round(rate * batchIntervalMilliseconds * 0.001) - eventually(timeout(5.seconds), interval(batchIntervalMilliseconds.milliseconds)) { + eventually(timeout(5.seconds), interval(10 milliseconds)) { // Assert that rate estimator values are used to determine maxMessagesPerPartition. // Funky "-" in message makes the complete assertion message read better. assert(collectedData.asScala.exists(_.size == expectedSize), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [TEST][STREAMING] Fix flaky Kafka rate controlling test
Repository: spark Updated Branches: refs/heads/master 6959061f0 -> 03c27435a [TEST][STREAMING] Fix flaky Kafka rate controlling test ## What changes were proposed in this pull request? The current test is incorrect, because - The expected number of messages does not take into account that the topic has 2 partitions, and rate is set per partition. - Also in some cases, the test ran out of data in Kafka while waiting for the right amount of data per batch. The PR - Reduces the number of partitions to 1 - Adds more data to Kafka - Runs with 0.5 second so that batches are created slowly ## How was this patch tested? Ran many times locally, going to run it many times in Jenkins (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Tathagata DasCloses #14361 from tdas/kafka-rate-test-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/03c27435 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/03c27435 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/03c27435 Branch: refs/heads/master Commit: 03c27435aee4e319abe290771ba96e69469109ac Parents: 6959061 Author: Tathagata Das Authored: Tue Jul 26 00:41:46 2016 -0700 Committer: Tathagata Das Committed: Tue Jul 26 00:41:46 2016 -0700 -- .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/03c27435/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala -- diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala index c9e15bc..b1d90b8 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala @@ -544,15 +544,14 @@ class DirectKafkaStreamSuite test("using rate controller") { val topic = "backpressure" -val topicPartitions = Set(new TopicPartition(topic, 0), new TopicPartition(topic, 1)) -kafkaTestUtils.createTopic(topic, 2) +kafkaTestUtils.createTopic(topic, 1) val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest") val executorKafkaParams = new JHashMap[String, Object](kafkaParams) KafkaUtils.fixKafkaParams(executorKafkaParams) -val batchIntervalMilliseconds = 100 +val batchIntervalMilliseconds = 500 val estimator = new ConstantEstimator(100) -val messages = Map("foo" -> 200) +val messages = Map("foo" -> 5000) kafkaTestUtils.sendMessages(topic, messages) val sparkConf = new SparkConf() @@ -596,7 +595,7 @@ class DirectKafkaStreamSuite estimator.updateRate(rate) // Set a new rate. // Expect blocks of data equal to "rate", scaled by the interval length in secs. val expectedSize = Math.round(rate * batchIntervalMilliseconds * 0.001) - eventually(timeout(5.seconds), interval(batchIntervalMilliseconds.milliseconds)) { + eventually(timeout(5.seconds), interval(10 milliseconds)) { // Assert that rate estimator values are used to determine maxMessagesPerPartition. // Funky "-" in message makes the complete assertion message read better. assert(collectedData.asScala.exists(_.size == expectedSize), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16706][SQL] support java map in encoder
Repository: spark Updated Branches: refs/heads/master 7b06a8948 -> 6959061f0 [SPARK-16706][SQL] support java map in encoder ## What changes were proposed in this pull request? finish the TODO, create a new expression `ExternalMapToCatalyst` to iterate the map directly. ## How was this patch tested? new test in `JavaDatasetSuite` Author: Wenchen FanCloses #14344 from cloud-fan/java-map. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6959061f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6959061f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6959061f Branch: refs/heads/master Commit: 6959061f02b02afd4cef683b5eea0b7097eedee7 Parents: 7b06a89 Author: Wenchen Fan Authored: Tue Jul 26 15:33:05 2016 +0800 Committer: Cheng Lian Committed: Tue Jul 26 15:33:05 2016 +0800 -- .../spark/sql/catalyst/JavaTypeInference.scala | 12 +- .../spark/sql/catalyst/ScalaReflection.scala| 34 ++-- .../catalyst/expressions/objects/objects.scala | 158 ++- .../encoders/ExpressionEncoderSuite.scala | 6 + .../org/apache/spark/sql/JavaDatasetSuite.java | 58 ++- 5 files changed, 236 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6959061f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index b3a233a..e6f61b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -395,10 +395,14 @@ object JavaTypeInference { toCatalystArray(inputObject, elementType(typeToken)) case _ if mapType.isAssignableFrom(typeToken) => - // TODO: for java map, if we get the keys and values by `keySet` and `values`, we can - // not guarantee they have same iteration order(which is different from scala map). - // A possible solution is creating a new `MapObjects` that can iterate a map directly. - throw new UnsupportedOperationException("map type is not supported currently") + val (keyType, valueType) = mapKeyValueType(typeToken) + ExternalMapToCatalyst( +inputObject, +ObjectType(keyType.getRawType), +serializerFor(_, keyType), +ObjectType(valueType.getRawType), +serializerFor(_, valueType) + ) case other => val properties = getJavaBeanProperties(other) http://git-wip-us.apache.org/repos/asf/spark/blob/6959061f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 8affb03..76f87f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -472,29 +472,17 @@ object ScalaReflection extends ScalaReflection { case t if t <:< localTypeOf[Map[_, _]] => val TypeRef(_, _, Seq(keyType, valueType)) = t - -val keys = - Invoke( -Invoke(inputObject, "keysIterator", - ObjectType(classOf[scala.collection.Iterator[_]])), -"toSeq", -ObjectType(classOf[scala.collection.Seq[_]])) -val convertedKeys = toCatalystArray(keys, keyType) - -val values = - Invoke( -Invoke(inputObject, "valuesIterator", - ObjectType(classOf[scala.collection.Iterator[_]])), -"toSeq", -ObjectType(classOf[scala.collection.Seq[_]])) -val convertedValues = toCatalystArray(values, valueType) - -val Schema(keyDataType, _) = schemaFor(keyType) -val Schema(valueDataType, valueNullable) = schemaFor(valueType) -NewInstance( - classOf[ArrayBasedMapData], - convertedKeys :: convertedValues :: Nil, - dataType = MapType(keyDataType, valueDataType, valueNullable)) +val keyClsName = getClassNameFromType(keyType) +val valueClsName = getClassNameFromType(valueType) +val keyPath = s"""- map key class: "$keyClsName +: walkedTypePath +val valuePath = s"""- map value class: "$valueClsName +: walkedTypePath + +