[1/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
Repository: spark Updated Branches: refs/heads/branch-2.0 44234b1c4 -> be9965b07 http://git-wip-us.apache.org/repos/asf/spark/blob/be9965b0/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql index eebef6a..8bf1645 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql @@ -2,4 +2,4 @@ SELECT count(*) as cnt, key % 5 as k1, key - 5 as k2, grouping_id() FROM parquet_t1 GROUP BY key % 5, key - 5 WITH CUBE -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ((`gen_attr` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_8` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ((`gen_attr_7` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/be9965b0/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql index 9474233..17e78a0 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key%2, key - 5 FROM parquet_t1) t GROUP BY key%5, key-5 WITH ROLLUP -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `k3` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `gen_attr`, (`gen_attr` % CAST(2 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `k3` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `gen_attr_7`, (`gen_attr_7` % CAST(2 AS BIGINT)) AS `gen_attr_8`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_9` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_12` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/be9965b0/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql index d36f43d..72506ef 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key % 2, key - 5 FROM parquet_t1) t GROUP BY key % 5, key - 5 WITH CUBE -SELECT `gen_a
[2/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
[SPARK-16621][SQL] Generate stable SQLs in SQLBuilder Currently, the generated SQLs have not-stable IDs for generated attributes. The stable generated SQL will give more benefit for understanding or testing the queries. This PR provides stable SQL generation by the followings. - Provide unique ids for generated subqueries, `gen_subquery_xxx`. - Provide unique and stable ids for generated attributes, `gen_attr_xxx`. **Before** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res0: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4`) AS gen_subquery_0 ``` **After** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res2: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 ``` Pass the existing Jenkins tests. Author: Dongjoon Hyun Closes #14257 from dongjoon-hyun/SPARK-16621. (cherry picked from commit 5b8e848bbfbc0c99a5faf758e40b188b0bbebb7b) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be9965b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be9965b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be9965b0 Branch: refs/heads/branch-2.0 Commit: be9965b077cded3d30a2d35342f3440f4708c357 Parents: 44234b1 Author: Dongjoon Hyun Authored: Wed Jul 27 13:23:59 2016 +0800 Committer: Reynold Xin Committed: Tue Jul 26 23:51:51 2016 -0700 -- .../apache/spark/sql/catalyst/SQLBuilder.scala | 23 +++- sql/hive/src/test/resources/sqlgen/agg1.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg2.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg3.sql | 2 +- .../sqlgen/aggregate_functions_and_window.sql | 2 +- sql/hive/src/test/resources/sqlgen/case.sql | 2 +- .../test/resources/sqlgen/case_with_else.sql| 2 +- .../src/test/resources/sqlgen/case_with_key.sql | 2 +- .../resources/sqlgen/case_with_key_and_else.sql | 2 +- .../src/test/resources/sqlgen/cluster_by.sql| 2 +- .../sqlgen/data_source_json_parquet_t0.sql | 2 +- .../sqlgen/data_source_orc_parquet_t0.sql | 2 +- .../sqlgen/data_source_parquet_parquet_t0.sql | 2 +- .../resources/sqlgen/distinct_aggregation.sql | 2 +- .../src/test/resources/sqlgen/distribute_by.sql | 2 +- .../sqlgen/distribute_by_with_sort_by.sql | 2 +- sql/hive/src/test/resources/sqlgen/except.sql | 2 +- .../resources/sqlgen/filter_after_subquery.sql | 2 +- .../resources/sqlgen/generate_with_other_1.sql | 2 +- .../resources/sqlgen/generate_with_other_2.sql | 2 +- .../sqlgen/generator_in_lateral_view_1.sql | 2 +- .../sqlgen/generator_in_lateral_view_2.sql | 2 +- .../sqlgen/generator_non_referenced_table_1.sql | 2 +- .../sqlgen/generator_non_referenced_table_2.sql | 2 +- .../resources/sqlgen/generator_non_udtf_1.sql | 2 +- .../resources/sqlgen/generator_non_udtf_2.sql | 2 +- .../sqlgen/generator_referenced_table_1.sql | 2 +- .../sqlgen/generator_referenced_table_2.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_1.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_2.sql | 2 +- .../sqlgen/generator_without_from_1.sql | 2 +- .../sqlgen/generator_without_from_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_3.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_4.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_5.sql | 2 +- sql/hive/src/test/resources/sqlgen/in.sql | 2 +- .../src/test/resources/sqlgen/intersect.sql | 2 +- .../src/test/resources/sqlgen/join_2_tables.sql | 2 +- .../resources/sqlgen/json_tuple_generator_1.sql | 2 +- .../resources/sqlgen/json_tuple_generator_2.sql | 2 +- .../test/resources/sqlgen/multi_distinct.sql| 2 +- .../nested_generator_in_lateral_view_1.sql | 2 +- .../nested_generator_in_lateral_view_2.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_in.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_like.sql | 2 +- .../resources/sqlgen/predicate_subquery.sql | 2 +- .../sqlgen/regular_expressions_and_window.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_2.sql | 2 +- .../test/resources/sqlgen/rollup_cube_2_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_2_2.sql | 2
[2/2] spark-website git commit: Move 2.0.0 release date to July 26 since I managed to push it out tonight.
Move 2.0.0 release date to July 26 since I managed to push it out tonight. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/46fb65a4 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/46fb65a4 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/46fb65a4 Branch: refs/heads/asf-site Commit: 46fb65a409296036cd5ffcb153d2a24e9f229323 Parents: 0915efb Author: Reynold Xin Authored: Tue Jul 26 23:20:31 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 23:20:31 2016 -0700 -- downloads.md| 2 +- js/downloads.js | 2 +- news/_posts/2016-07-26-spark-2-0-0-released.md | 14 ++ news/_posts/2016-07-27-spark-2-0-0-released.md | 14 -- .../_posts/2016-07-26-spark-release-2-0-0.md| 170 +++ .../_posts/2016-07-27-spark-release-2-0-0.md| 170 --- site/community.html | 2 +- site/documentation.html | 2 +- site/downloads.html | 4 +- site/examples.html | 2 +- site/faq.html | 2 +- site/graphx/index.html | 2 +- site/index.html | 2 +- site/js/downloads.js| 2 +- site/mailing-lists.html | 2 +- site/mllib/index.html | 2 +- site/news/amp-camp-2013-registration-ope.html | 2 +- .../news/announcing-the-first-spark-summit.html | 2 +- .../news/fourth-spark-screencast-published.html | 2 +- site/news/index.html| 4 +- site/news/nsdi-paper.html | 2 +- site/news/one-month-to-spark-summit-2015.html | 2 +- .../proposals-open-for-spark-summit-east.html | 2 +- ...registration-open-for-spark-summit-east.html | 2 +- .../news/run-spark-and-shark-on-amazon-emr.html | 2 +- site/news/spark-0-6-1-and-0-5-2-released.html | 2 +- site/news/spark-0-6-2-released.html | 2 +- site/news/spark-0-7-0-released.html | 2 +- site/news/spark-0-7-2-released.html | 2 +- site/news/spark-0-7-3-released.html | 2 +- site/news/spark-0-8-0-released.html | 2 +- site/news/spark-0-8-1-released.html | 2 +- site/news/spark-0-9-0-released.html | 2 +- site/news/spark-0-9-1-released.html | 2 +- site/news/spark-0-9-2-released.html | 2 +- site/news/spark-1-0-0-released.html | 2 +- site/news/spark-1-0-1-released.html | 2 +- site/news/spark-1-0-2-released.html | 2 +- site/news/spark-1-1-0-released.html | 2 +- site/news/spark-1-1-1-released.html | 2 +- site/news/spark-1-2-0-released.html | 2 +- site/news/spark-1-2-1-released.html | 2 +- site/news/spark-1-2-2-released.html | 2 +- site/news/spark-1-3-0-released.html | 2 +- site/news/spark-1-4-0-released.html | 2 +- site/news/spark-1-4-1-released.html | 2 +- site/news/spark-1-5-0-released.html | 2 +- site/news/spark-1-5-1-released.html | 2 +- site/news/spark-1-5-2-released.html | 2 +- site/news/spark-1-6-0-released.html | 2 +- site/news/spark-1-6-1-released.html | 2 +- site/news/spark-1-6-2-released.html | 2 +- site/news/spark-2-0-0-released.html | 2 +- site/news/spark-2.0.0-preview.html | 2 +- .../spark-accepted-into-apache-incubator.html | 2 +- site/news/spark-and-shark-in-the-news.html | 2 +- site/news/spark-becomes-tlp.html| 2 +- site/news/spark-featured-in-wired.html | 2 +- .../spark-mailing-lists-moving-to-apache.html | 2 +- site/news/spark-meetups.html| 2 +- site/news/spark-screencasts-published.html | 2 +- site/news/spark-summit-2013-is-a-wrap.html | 2 +- site/news/spark-summit-2014-videos-posted.html | 2 +- site/news/spark-summit-2015-videos-posted.html | 2 +- site/news/spark-summit-agenda-posted.html | 2 +- .../spark-summit-east-2015-videos-posted.html | 2 +- .../spark-summit-east-2016-cfp-closing.html | 2 +- site/news/spark-summit-east-agenda-posted.html | 2 +- .../news/spark-summit-europe-agenda-posted.html | 2 +- site/news/spark-summit-europe.html | 2 +- .../spark-summit-june-2016-agenda-posted.html | 2 +- site/news/spark-tips-from-quantifind.html | 2 +- .../spark-user-survey-and-powered-by-page.html | 2 +- site/news/spark-version-0-6-0-released.html | 2 +- ...-wins-d
[1/2] spark-website git commit: Move 2.0.0 release date to July 26 since I managed to push it out tonight.
Repository: spark-website Updated Branches: refs/heads/asf-site 0915efb8d -> 46fb65a40 http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-summit-europe-agenda-posted.html -- diff --git a/site/news/spark-summit-europe-agenda-posted.html b/site/news/spark-summit-europe-agenda-posted.html index d64bcb9..5ce9ba0 100644 --- a/site/news/spark-summit-europe-agenda-posted.html +++ b/site/news/spark-summit-europe-agenda-posted.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-summit-europe.html -- diff --git a/site/news/spark-summit-europe.html b/site/news/spark-summit-europe.html index 15abf9d..b9d774e 100644 --- a/site/news/spark-summit-europe.html +++ b/site/news/spark-summit-europe.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-summit-june-2016-agenda-posted.html -- diff --git a/site/news/spark-summit-june-2016-agenda-posted.html b/site/news/spark-summit-june-2016-agenda-posted.html index 2a83ac0..70efee0 100644 --- a/site/news/spark-summit-june-2016-agenda-posted.html +++ b/site/news/spark-summit-june-2016-agenda-posted.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-tips-from-quantifind.html -- diff --git a/site/news/spark-tips-from-quantifind.html b/site/news/spark-tips-from-quantifind.html index 2f4e4ea..ec5a6d5 100644 --- a/site/news/spark-tips-from-quantifind.html +++ b/site/news/spark-tips-from-quantifind.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-user-survey-and-powered-by-page.html -- diff --git a/site/news/spark-user-survey-and-powered-by-page.html b/site/news/spark-user-survey-and-powered-by-page.html index 8548a51..b3780f5 100644 --- a/site/news/spark-user-survey-and-powered-by-page.html +++ b/site/news/spark-user-survey-and-powered-by-page.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-version-0-6-0-released.html -- diff --git a/site/news/spark-version-0-6-0-released.html b/site/news/spark-version-0-6-0-released.html index e5148d7..344b465 100644 --- a/site/news/spark-version-0-6-0-released.html +++ b/site/news/spark-version-0-6-0-released.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html -- diff --git a/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html b/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html index dd0a900..0723321 100644 --- a/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html +++ b/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html @@ -151,7 +151,7 @@ Spark 2.0.0 released - (Jul 27, 2016) + (Jul 26, 2016) Spark 1.6.2 released (Jun 25, 2016) http://git-wip-us.apache.org/repos/asf/spark-website/blob/46fb65a4/site/news/strata-exercises-now-available-online.html -- diff --git a/site/news/strata-exercises-now-available-online.html b/site/news/strata-exercises-now-available-online.html index 74d6bc2..b5915cd 100644 --- a/site/news/strata-exercises-now-available-online.html +++ b/site/news/strata-exercises-now-available-online.html @@ -151,7 +151,7 @@
[2/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
[SPARK-16621][SQL] Generate stable SQLs in SQLBuilder ## What changes were proposed in this pull request? Currently, the generated SQLs have not-stable IDs for generated attributes. The stable generated SQL will give more benefit for understanding or testing the queries. This PR provides stable SQL generation by the followings. - Provide unique ids for generated subqueries, `gen_subquery_xxx`. - Provide unique and stable ids for generated attributes, `gen_attr_xxx`. **Before** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res0: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4`) AS gen_subquery_0 ``` **After** ```scala scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res1: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 scala> new org.apache.spark.sql.catalyst.SQLBuilder(sql("select 1")).toSQL res2: String = SELECT `gen_attr_0` AS `1` FROM (SELECT 1 AS `gen_attr_0`) AS gen_subquery_0 ``` ## How was this patch tested? Pass the existing Jenkins tests. Author: Dongjoon Hyun Closes #14257 from dongjoon-hyun/SPARK-16621. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b8e848b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b8e848b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b8e848b Branch: refs/heads/master Commit: 5b8e848bbfbc0c99a5faf758e40b188b0bbebb7b Parents: 738b4cc Author: Dongjoon Hyun Authored: Wed Jul 27 13:23:59 2016 +0800 Committer: Cheng Lian Committed: Wed Jul 27 13:23:59 2016 +0800 -- .../apache/spark/sql/catalyst/SQLBuilder.scala | 23 +++- sql/hive/src/test/resources/sqlgen/agg1.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg2.sql | 2 +- sql/hive/src/test/resources/sqlgen/agg3.sql | 2 +- .../sqlgen/aggregate_functions_and_window.sql | 2 +- sql/hive/src/test/resources/sqlgen/case.sql | 2 +- .../test/resources/sqlgen/case_with_else.sql| 2 +- .../src/test/resources/sqlgen/case_with_key.sql | 2 +- .../resources/sqlgen/case_with_key_and_else.sql | 2 +- .../src/test/resources/sqlgen/cluster_by.sql| 2 +- .../sqlgen/data_source_json_parquet_t0.sql | 2 +- .../sqlgen/data_source_orc_parquet_t0.sql | 2 +- .../sqlgen/data_source_parquet_parquet_t0.sql | 2 +- .../resources/sqlgen/distinct_aggregation.sql | 2 +- .../src/test/resources/sqlgen/distribute_by.sql | 2 +- .../sqlgen/distribute_by_with_sort_by.sql | 2 +- sql/hive/src/test/resources/sqlgen/except.sql | 2 +- .../resources/sqlgen/filter_after_subquery.sql | 2 +- .../resources/sqlgen/generate_with_other_1.sql | 2 +- .../resources/sqlgen/generate_with_other_2.sql | 2 +- .../sqlgen/generator_in_lateral_view_1.sql | 2 +- .../sqlgen/generator_in_lateral_view_2.sql | 2 +- .../sqlgen/generator_non_referenced_table_1.sql | 2 +- .../sqlgen/generator_non_referenced_table_2.sql | 2 +- .../resources/sqlgen/generator_non_udtf_1.sql | 2 +- .../resources/sqlgen/generator_non_udtf_2.sql | 2 +- .../sqlgen/generator_referenced_table_1.sql | 2 +- .../sqlgen/generator_referenced_table_2.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_1.sql | 2 +- .../sqlgen/generator_with_ambiguous_names_2.sql | 2 +- .../sqlgen/generator_without_from_1.sql | 2 +- .../sqlgen/generator_without_from_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_1.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_2.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_3.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_4.sql | 2 +- .../test/resources/sqlgen/grouping_sets_2_5.sql | 2 +- sql/hive/src/test/resources/sqlgen/in.sql | 2 +- .../src/test/resources/sqlgen/intersect.sql | 2 +- .../src/test/resources/sqlgen/join_2_tables.sql | 2 +- .../resources/sqlgen/json_tuple_generator_1.sql | 2 +- .../resources/sqlgen/json_tuple_generator_2.sql | 2 +- .../test/resources/sqlgen/multi_distinct.sql| 2 +- .../nested_generator_in_lateral_view_1.sql | 2 +- .../nested_generator_in_lateral_view_2.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_in.sql | 2 +- sql/hive/src/test/resources/sqlgen/not_like.sql | 2 +- .../resources/sqlgen/predicate_subquery.sql | 2 +- .../sqlgen/regular_expressions_and_window.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_1_2.sql | 2 +- .../test/resources/sqlgen/rollup_cube_2_1.sql | 2 +- .../test/resources/sqlgen/rollup_cube_2_2.sql | 2 +- .../test/resou
[1/2] spark git commit: [SPARK-16621][SQL] Generate stable SQLs in SQLBuilder
Repository: spark Updated Branches: refs/heads/master 738b4cc54 -> 5b8e848bb http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql index eebef6a..8bf1645 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_4_2.sql @@ -2,4 +2,4 @@ SELECT count(*) as cnt, key % 5 as k1, key - 5 as k2, grouping_id() FROM parquet_t1 GROUP BY key % 5, key - 5 WITH CUBE -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ((`gen_attr` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `grouping_id()` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_8` FROM `default`.`parquet_t1`) AS gen_subquery_0 GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ((`gen_attr_7` - CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql index 9474233..17e78a0 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_1.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key%2, key - 5 FROM parquet_t1) t GROUP BY key%5, key-5 WITH ROLLUP -SELECT `gen_attr` AS `cnt`, `gen_attr` AS `k1`, `gen_attr` AS `k2`, `gen_attr` AS `k3` FROM (SELECT count(1) AS `gen_attr`, (`gen_attr` % CAST(5 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr`, grouping_id() AS `gen_attr` FROM (SELECT `gen_attr`, (`gen_attr` % CAST(2 AS BIGINT)) AS `gen_attr`, (`gen_attr` - CAST(5 AS BIGINT)) AS `gen_attr` FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr` % CAST(5 AS BIGINT)), (`gen_attr` - CAST(5 AS BIGINT))), ((`gen_attr` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 +SELECT `gen_attr_3` AS `cnt`, `gen_attr_4` AS `k1`, `gen_attr_5` AS `k2`, `gen_attr_6` AS `k3` FROM (SELECT count(1) AS `gen_attr_3`, (`gen_attr_7` % CAST(5 AS BIGINT)) AS `gen_attr_4`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_5`, grouping_id() AS `gen_attr_6` FROM (SELECT `gen_attr_7`, (`gen_attr_7` % CAST(2 AS BIGINT)) AS `gen_attr_8`, (`gen_attr_7` - CAST(5 AS BIGINT)) AS `gen_attr_9` FROM (SELECT `key` AS `gen_attr_7`, `value` AS `gen_attr_12` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS t GROUP BY (`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT)) GROUPING SETS(((`gen_attr_7` % CAST(5 AS BIGINT)), (`gen_attr_7` - CAST(5 AS BIGINT))), ((`gen_attr_7` % CAST(5 AS BIGINT))), ())) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/5b8e848b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql index d36f43d..72506ef 100644 --- a/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql +++ b/sql/hive/src/test/resources/sqlgen/rollup_cube_5_2.sql @@ -3,4 +3,4 @@ SELECT count(*) AS cnt, key % 5 AS k1, key - 5 AS k2, grouping_id(key % 5, key - FROM (SELECT key, key % 2, key - 5 FROM parquet_t1) t GROUP BY key % 5, key - 5 WITH CUBE -SELECT `gen_attr`
spark-website git commit: Updated Scala 2.11 note
Repository: spark-website Updated Branches: refs/heads/asf-site d7dcb6c8b -> 0915efb8d Updated Scala 2.11 note Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/0915efb8 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/0915efb8 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/0915efb8 Branch: refs/heads/asf-site Commit: 0915efb8d90b37b8d6417ff88971ce143444bf5c Parents: d7dcb6c Author: Reynold Xin Authored: Tue Jul 26 22:05:57 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 22:05:57 2016 -0700 -- downloads.md| 5 +++-- site/downloads.html | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/0915efb8/downloads.md -- diff --git a/downloads.md b/downloads.md index 5354869..178120d 100644 --- a/downloads.md +++ b/downloads.md @@ -33,8 +33,9 @@ Our latest stable version is Apache Spark 2.0.0, released on July 27, 2016 5. Verify this release using the and [project release KEYS](https://www.apache.org/dist/spark/KEYS). -_Note: Scala 2.11 users should download the Spark source package and build -[with Scala 2.11 support](http://spark.apache.org/docs/latest/building-spark.html#building-for-scala-211)._ +_Note: Starting version 2.0, Spark is built with Scala 2.11 by default. +Scala 2.10 users should download the Spark source package and build +[with Scala 2.10 support](http://spark.apache.org/docs/latest/building-spark.html#building-for-scala-210)._
spark-website git commit: removed old versions (<1.3) and re-arranged download options.
Repository: spark-website Updated Branches: refs/heads/asf-site 33d86d7bf -> d7dcb6c8b removed old versions (<1.3) and re-arranged download options. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/d7dcb6c8 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/d7dcb6c8 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/d7dcb6c8 Branch: refs/heads/asf-site Commit: d7dcb6c8b96bffa894714f2348973c107b433738 Parents: 33d86d7 Author: Reynold Xin Authored: Tue Jul 26 21:13:36 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 21:13:36 2016 -0700 -- js/downloads.js | 67 +++ site/js/downloads.js | 67 +++ 2 files changed, 66 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/d7dcb6c8/js/downloads.js -- diff --git a/js/downloads.js b/js/downloads.js index 1d125b6..88e4bb9 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -7,7 +7,7 @@ function addRelease(version, releaseDate, packages, downloadable, stable) { releases[version] = {released: releaseDate, packages: packages, downloadable: downloadable, stable: stable}; } -var sources = {pretty: "Source Code [can build several Hadoop versions]", tag: "sources"}; +var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-build with user-provided Hadoop [can use with most Hadoop distributions]", tag: "without-hadoop"}; var hadoop1 = {pretty: "Pre-built for Hadoop 1.X", tag: "hadoop1"}; var cdh4 = {pretty: "Pre-built for CDH 4", tag: "cdh4"}; @@ -19,9 +19,8 @@ var hadoop2p7 = {pretty: "Pre-built for Hadoop 2.7 and later", tag: "hadoop2.7"} var mapr3 = {pretty: "Pre-built for MapR 3.X", tag: "mapr3"}; var mapr4 = {pretty: "Pre-built for MapR 4.X", tag: "mapr4"}; -var sources = [sources]; // 0.7+ -var packagesV1 = [hadoop1, cdh4]; +var packagesV1 = [hadoop1, cdh4, sources]; // 0.8.1+ var packagesV2 = [hadoop2].concat(packagesV1); // 1.0.1+ @@ -31,39 +30,39 @@ var packagesV4 = [hadoop2p4, hadoop2p3, mapr3, mapr4].concat(packagesV1); // 1.3.1+ var packagesV5 = [hadoop2p6].concat(packagesV4); // 1.4.0+ -var packagesV6 = [hadoopFree, hadoop2p6, hadoop2p4, hadoop2p3].concat(packagesV1); +var packagesV6 = [hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree].concat(packagesV1); // 2.0.0+ -var packagesV7 = [hadoopFree, hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3]; +var packagesV7 = [hadoop2p7, hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, sources]; // addRelease("2.0.0-preview", new Date("05/24/2016"), sources.concat(packagesV7), true, false); -addRelease("2.0.0", new Date("07/27/2016"), sources.concat(packagesV7), true, true); -addRelease("1.6.2", new Date("06/25/2016"), sources.concat(packagesV6), true, true); -addRelease("1.6.1", new Date("03/09/2016"), sources.concat(packagesV6), true, true); -addRelease("1.6.0", new Date("01/04/2016"), sources.concat(packagesV6), true, true); -addRelease("1.5.2", new Date("11/09/2015"), sources.concat(packagesV6), true, true); -addRelease("1.5.1", new Date("10/02/2015"), sources.concat(packagesV6), true, true); -addRelease("1.5.0", new Date("9/09/2015"), sources.concat(packagesV6), true, true); -addRelease("1.4.1", new Date("7/15/2015"), sources.concat(packagesV6), true, true); -addRelease("1.4.0", new Date("6/11/2015"), sources.concat(packagesV6), true, true); -addRelease("1.3.1", new Date("4/17/2015"), sources.concat(packagesV5), true, true); -addRelease("1.3.0", new Date("3/13/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.2", new Date("4/17/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.1", new Date("2/9/2015"), sources.concat(packagesV4), true, true); -addRelease("1.2.0", new Date("12/18/2014"), sources.concat(packagesV4), true, true); -addRelease("1.1.1", new Date("11/26/2014"), sources.concat(packagesV4), true, true); -addRelease("1.1.0", new Date("9/11/2014"), sources.concat(packagesV4), true, true); -addRelease("1.0.2", new Date("8/5/2014"), sources.concat(packagesV3), true, true); -addRelease("1.0.1", new Date("7/11/2014"), sources.concat(packagesV3), false, true); -addRelease("1.0.0", new Date("5/30/2014"), sources.concat(packagesV2), false, true); -addRelease("0.9.2", new Date("7/23/2014"), sources.concat(packagesV2), true, true); -addRelease("0.9.1", new Date("4/9/2014"), sources.concat(packagesV2), false, true); -addRelease("0.9.0-incubating", new Date("2/2/2014"), sources.concat(packagesV2), false, true); -addRelease("0.8.1-incubating", new Date("12/19/2013"), sources.concat(packagesV2), true, true); -addRelease("0.8.0-incubating", new Date("9/25/2013"),
[1/3] spark-website git commit: 2.0.0 release
Repository: spark-website Updated Branches: refs/heads/asf-site 214938a57 -> 33d86d7bf http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-0-2.html -- diff --git a/site/releases/spark-release-1-0-2.html b/site/releases/spark-release-1-0-2.html index bb8dc49..c0b60a3 100644 --- a/site/releases/spark-release-1-0-2.html +++ b/site/releases/spark-release-1-0-2.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-1-0.html -- diff --git a/site/releases/spark-release-1-1-0.html b/site/releases/spark-release-1-1-0.html index 34ef676..a04ff02 100644 --- a/site/releases/spark-release-1-1-0.html +++ b/site/releases/spark-release-1-1-0.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-1-1.html -- diff --git a/site/releases/spark-release-1-1-1.html b/site/releases/spark-release-1-1-1.html index d94d4e8..69292a2 100644 --- a/site/releases/spark-release-1-1-1.html +++ b/site/releases/spark-release-1-1-1.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-2-0.html -- diff --git a/site/releases/spark-release-1-2-0.html b/site/releases/spark-release-1-2-0.html index 4bb1285..4394167 100644 --- a/site/releases/spark-release-1-2-0.html +++ b/site/releases/spark-release-1-2-0.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/releases/spark-release-1-2-1.html -- diff --git a/site/releases/spark-release-1-2-1.html b/site/releases/spark-release-1-2-1.html index afb4f3f..2de8c55 100644 --- a/site/releases/spark-release-1-2-1.html +++ b/site/releases/spark-release-1-2-1.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016)
[3/3] spark-website git commit: 2.0.0 release
2.0.0 release Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/33d86d7b Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/33d86d7b Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/33d86d7b Branch: refs/heads/asf-site Commit: 33d86d7bf576f3f4074573f80ef049bdca516da9 Parents: 214938a Author: Reynold Xin Authored: Tue Jul 26 21:00:47 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 21:00:47 2016 -0700 -- _layouts/global.html| 2 +- downloads.md| 16 +- js/downloads.js | 3 +- news/_posts/2016-07-27-spark-2-0-0-released.md | 14 ++ site/community.html | 8 +- site/docs/latest| 2 +- site/documentation.html | 8 +- site/downloads.html | 38 ++-- site/examples.html | 8 +- site/faq.html | 8 +- site/graphx/index.html | 8 +- site/index.html | 8 +- site/js/downloads.js| 3 +- site/mailing-lists.html | 8 +- site/mllib/index.html | 8 +- site/news/amp-camp-2013-registration-ope.html | 8 +- .../news/announcing-the-first-spark-summit.html | 8 +- .../news/fourth-spark-screencast-published.html | 8 +- site/news/index.html| 17 +- site/news/nsdi-paper.html | 8 +- site/news/one-month-to-spark-summit-2015.html | 8 +- .../proposals-open-for-spark-summit-east.html | 8 +- ...registration-open-for-spark-summit-east.html | 8 +- .../news/run-spark-and-shark-on-amazon-emr.html | 8 +- site/news/spark-0-6-1-and-0-5-2-released.html | 8 +- site/news/spark-0-6-2-released.html | 8 +- site/news/spark-0-7-0-released.html | 8 +- site/news/spark-0-7-2-released.html | 8 +- site/news/spark-0-7-3-released.html | 8 +- site/news/spark-0-8-0-released.html | 8 +- site/news/spark-0-8-1-released.html | 8 +- site/news/spark-0-9-0-released.html | 8 +- site/news/spark-0-9-1-released.html | 8 +- site/news/spark-0-9-2-released.html | 8 +- site/news/spark-1-0-0-released.html | 8 +- site/news/spark-1-0-1-released.html | 8 +- site/news/spark-1-0-2-released.html | 8 +- site/news/spark-1-1-0-released.html | 8 +- site/news/spark-1-1-1-released.html | 8 +- site/news/spark-1-2-0-released.html | 8 +- site/news/spark-1-2-1-released.html | 8 +- site/news/spark-1-2-2-released.html | 8 +- site/news/spark-1-3-0-released.html | 8 +- site/news/spark-1-4-0-released.html | 8 +- site/news/spark-1-4-1-released.html | 8 +- site/news/spark-1-5-0-released.html | 8 +- site/news/spark-1-5-1-released.html | 8 +- site/news/spark-1-5-2-released.html | 8 +- site/news/spark-1-6-0-released.html | 8 +- site/news/spark-1-6-1-released.html | 8 +- site/news/spark-1-6-2-released.html | 8 +- site/news/spark-2-0-0-released.html | 211 +++ site/news/spark-2.0.0-preview.html | 8 +- .../spark-accepted-into-apache-incubator.html | 8 +- site/news/spark-and-shark-in-the-news.html | 8 +- site/news/spark-becomes-tlp.html| 8 +- site/news/spark-featured-in-wired.html | 8 +- .../spark-mailing-lists-moving-to-apache.html | 8 +- site/news/spark-meetups.html| 8 +- site/news/spark-screencasts-published.html | 8 +- site/news/spark-summit-2013-is-a-wrap.html | 8 +- site/news/spark-summit-2014-videos-posted.html | 8 +- site/news/spark-summit-2015-videos-posted.html | 8 +- site/news/spark-summit-agenda-posted.html | 8 +- .../spark-summit-east-2015-videos-posted.html | 8 +- .../spark-summit-east-2016-cfp-closing.html | 8 +- site/news/spark-summit-east-agenda-posted.html | 8 +- .../news/spark-summit-europe-agenda-posted.html | 8 +- site/news/spark-summit-europe.html | 8 +- .../spark-summit-june-2016-agenda-posted.html | 8 +- site/news/spark-tips-from-quantifind.html | 8 +- .../spark-user-survey-and-powered-by-page.html | 8 +- site/news/spark-version-0-6-0-released.html | 8 +- ...-wins-daytona-gray-sort-100tb-benchmark.html | 8 +- .../strata-exercises-now-available-online.html | 8 +- .../news/submit-talks-to-spa
[2/3] spark-website git commit: 2.0.0 release
http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-1-6-1-released.html -- diff --git a/site/news/spark-1-6-1-released.html b/site/news/spark-1-6-1-released.html index 8c838a0..c0e1d15 100644 --- a/site/news/spark-1-6-1-released.html +++ b/site/news/spark-1-6-1-released.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-1-6-2-released.html -- diff --git a/site/news/spark-1-6-2-released.html b/site/news/spark-1-6-2-released.html index 6236814..5ad0532 100644 --- a/site/news/spark-1-6-2-released.html +++ b/site/news/spark-1-6-2-released.html @@ -106,7 +106,7 @@ Documentation - Latest Release (Spark 1.6.2) + Latest Release (Spark 2.0.0) Older Versions and Other Resources @@ -150,6 +150,9 @@ Latest News + Spark 2.0.0 released + (Jul 27, 2016) + Spark 1.6.2 released (Jun 25, 2016) @@ -159,9 +162,6 @@ Preview release of Spark 2.0 (May 26, 2016) - Spark Summit (June 6, 2016, San Francisco) agenda posted - (Apr 17, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/33d86d7b/site/news/spark-2-0-0-released.html -- diff --git a/site/news/spark-2-0-0-released.html b/site/news/spark-2-0-0-released.html new file mode 100644 index 000..bcce390 --- /dev/null +++ b/site/news/spark-2-0-0-released.html @@ -0,0 +1,211 @@ + + + + + + + + + Spark 2.0.0 released | Apache Spark + + + + + + + + + + + + + + + + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-32518208-2']); + _gaq.push(['_trackPageview']); + (function() { +var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; +ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; +var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + + function trackOutboundLink(link, category, action) { +try { + _gaq.push(['_trackEvent', category , action]); +} catch(err){} + +setTimeout(function() { + document.location.href = link.href; +}, 100); + } + + + + + + + + +https://code.jquery.com/jquery.js";> + + + + + + + + + + + + Lightning-fast cluster computing + + + + + + + + + + Toggle navigation + + + + + + + + + + Download + + + Libraries + + + SQL and DataFrames + Spark Streaming + MLlib (machine learning) + GraphX (graph) + + http://spark-packages.org";>Third-Party Packages + + + + + Documentation + + + Latest Release (Spark 2.0.0) + Older Versions and Other Resources + + + Examples + + + Community + + + Mailing Lists + Events and Meetups + Project History + https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark";>Powered By + https://cwiki.apache.org/confluence/display/SPARK/Committers";>Project Committers + https://issues.apache.org/jira/browse/SPARK";>Issue Tracker + + + FAQ + + + +http://www.apache.org/"; class="dropdown-toggle" data-toggle="dropdown"> + Apache Software Foundation + + http://www.apache.org/";>Apache Homepage + http://www.apache.org/licenses/";>License + http://www.apache.org/foundation/sponsorship.html";>Sponsorship + http://www.apache.org/foundation/thanks.html";>Thanks + http://www.apache.org/security/";>Sec
spark git commit: [SPARK-16524][SQL] Add RowBatch and RowBasedHashMapGenerator
Repository: spark Updated Branches: refs/heads/master 0b71d9ae0 -> 738b4cc54 [SPARK-16524][SQL] Add RowBatch and RowBasedHashMapGenerator ## What changes were proposed in this pull request? This PR is the first step for the following feature: For hash aggregation in Spark SQL, we use a fast aggregation hashmap to act as a "cache" in order to boost aggregation performance. Previously, the hashmap is backed by a `ColumnarBatch`. This has performance issues when we have wide schema for the aggregation table (large number of key fields or value fields). In this JIRA, we support another implementation of fast hashmap, which is backed by a `RowBasedKeyValueBatch`. We then automatically pick between the two implementations based on certain knobs. In this first-step PR, implementations for `RowBasedKeyValueBatch` and `RowBasedHashMapGenerator` are added. ## How was this patch tested? Unit tests: `RowBasedKeyValueBatchSuite` Author: Qifan Pu Closes #14349 from ooq/SPARK-16524. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/738b4cc5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/738b4cc5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/738b4cc5 Branch: refs/heads/master Commit: 738b4cc548ca48c010b682b8bc19a2f7e1947cfe Parents: 0b71d9a Author: Qifan Pu Authored: Tue Jul 26 18:08:07 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 18:08:07 2016 -0700 -- .../FixedLengthRowBasedKeyValueBatch.java | 174 .../expressions/RowBasedKeyValueBatch.java | 182 .../VariableLengthRowBasedKeyValueBatch.java| 185 .../expressions/RowBasedKeyValueBatchSuite.java | 425 +++ .../execution/aggregate/HashMapGenerator.scala | 176 .../aggregate/RowBasedHashMapGenerator.scala| 205 + .../aggregate/VectorizedHashMapGenerator.scala | 135 +- 7 files changed, 1356 insertions(+), 126 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/738b4cc5/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java -- diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java new file mode 100644 index 000..b6130d1 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions; + +import org.apache.spark.memory.TaskMemoryManager; +import org.apache.spark.sql.types.*; +import org.apache.spark.unsafe.Platform; + +/** + * An implementation of `RowBasedKeyValueBatch` in which all key-value records have same length. + * + * The format for each record looks like this: + * [UnsafeRow for key of length klen] [UnsafeRow for Value of length vlen] + * [8 bytes pointer to next] + * Thus, record length = klen + vlen + 8 + */ +public final class FixedLengthRowBasedKeyValueBatch extends RowBasedKeyValueBatch { + private final int klen; + private final int vlen; + private final int recordLength; + + private final long getKeyOffsetForFixedLengthRecords(int rowId) { +return recordStartOffset + rowId * (long) recordLength; + } + + /** + * Append a key value pair. + * It copies data into the backing MemoryBlock. + * Returns an UnsafeRow pointing to the value if succeeds, otherwise returns null. + */ + @Override + public final UnsafeRow appendRow(Object kbase, long koff, int klen, + Object vbase, long voff, int vlen) { +// if run out of max supported rows or page size, return null +if (numRows >= capacity || page == null || page.size() - pageCursor < recordLength) { + return null; +} + +long offset = page.getBaseOffset() + pageCursor;
spark-website git commit: Update release notes
Repository: spark-website Updated Branches: refs/heads/asf-site 33bab055d -> 214938a57 Update release notes Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/214938a5 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/214938a5 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/214938a5 Branch: refs/heads/asf-site Commit: 214938a57f8c96a792b04f39a867e2cec7fa0d91 Parents: 33bab05 Author: Reynold Xin Authored: Tue Jul 26 16:42:12 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 16:42:12 2016 -0700 -- releases/_posts/2016-07-27-spark-release-2-0-0.md | 2 +- site/releases/spark-release-2-0-0.html| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/214938a5/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 8d35967..1cc5cdd 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -11,7 +11,7 @@ meta: _wpas_done_all: '1' --- -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. +Apache Spark 2.0.0 is the first release on the 2.x line. The major updates are API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. In addition, this release includes over 2500 patches from over 300 contributors. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449). We have curated a list of high level changes here, grouped by major modules. http://git-wip-us.apache.org/repos/asf/spark-website/blob/214938a5/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index 72dd661..22db510 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -186,7 +186,7 @@ Spark Release 2.0.0 -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. +Apache Spark 2.0.0 is the first release on the 2.x line. The major updates are API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. In addition, this release includes over 2500 patches from over 300 contributors. To download Apache Spark 2.0.0, visit the http://spark.apache.org/downloads.html";>downloads page. You can consult JIRA for the https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449";>detailed changes. We have curated a list of high level changes here, grouped by major modules. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark-website git commit: Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources.
Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/33bab055 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/33bab055 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/33bab055 Branch: refs/heads/asf-site Commit: 33bab055d430315c9d61a18b21d9c48b5b8fedee Parents: f0578ab Author: Reynold Xin Authored: Tue Jul 26 15:32:17 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:32:17 2016 -0700 -- _layouts/global.html| 2 +- site/community.html | 2 +- site/documentation.html | 2 +- site/downloads.html | 2 +- site/examples.html | 2 +- site/faq.html | 2 +- site/graphx/index.html | 2 +- site/index.html | 2 +- site/mailing-lists.html | 2 +- site/mllib/index.html | 2 +- site/news/amp-camp-2013-registration-ope.html | 2 +- site/news/announcing-the-first-spark-summit.html| 2 +- site/news/fourth-spark-screencast-published.html| 2 +- site/news/index.html| 2 +- site/news/nsdi-paper.html | 2 +- site/news/one-month-to-spark-summit-2015.html | 2 +- site/news/proposals-open-for-spark-summit-east.html | 2 +- site/news/registration-open-for-spark-summit-east.html | 2 +- site/news/run-spark-and-shark-on-amazon-emr.html| 2 +- site/news/spark-0-6-1-and-0-5-2-released.html | 2 +- site/news/spark-0-6-2-released.html | 2 +- site/news/spark-0-7-0-released.html | 2 +- site/news/spark-0-7-2-released.html | 2 +- site/news/spark-0-7-3-released.html | 2 +- site/news/spark-0-8-0-released.html | 2 +- site/news/spark-0-8-1-released.html | 2 +- site/news/spark-0-9-0-released.html | 2 +- site/news/spark-0-9-1-released.html | 2 +- site/news/spark-0-9-2-released.html | 2 +- site/news/spark-1-0-0-released.html | 2 +- site/news/spark-1-0-1-released.html | 2 +- site/news/spark-1-0-2-released.html | 2 +- site/news/spark-1-1-0-released.html | 2 +- site/news/spark-1-1-1-released.html | 2 +- site/news/spark-1-2-0-released.html | 2 +- site/news/spark-1-2-1-released.html | 2 +- site/news/spark-1-2-2-released.html | 2 +- site/news/spark-1-3-0-released.html | 2 +- site/news/spark-1-4-0-released.html | 2 +- site/news/spark-1-4-1-released.html | 2 +- site/news/spark-1-5-0-released.html | 2 +- site/news/spark-1-5-1-released.html | 2 +- site/news/spark-1-5-2-released.html | 2 +- site/news/spark-1-6-0-released.html | 2 +- site/news/spark-1-6-1-released.html | 2 +- site/news/spark-1-6-2-released.html | 2 +- site/news/spark-2.0.0-preview.html | 2 +- site/news/spark-accepted-into-apache-incubator.html | 2 +- site/news/spark-and-shark-in-the-news.html | 2 +- site/news/spark-becomes-tlp.html| 2 +- site/news/spark-featured-in-wired.html | 2 +- site/news/spark-mailing-lists-moving-to-apache.html | 2 +- site/news/spark-meetups.html| 2 +- site/news/spark-screencasts-published.html | 2 +- site/news/spark-summit-2013-is-a-wrap.html | 2 +- site/news/spark-summit-2014-videos-posted.html | 2 +- site/news/spark-summit-2015-videos-posted.html | 2 +- site/news/spark-summit-agenda-posted.html | 2 +- site/news/spark-summit-east-2015-videos-posted.html | 2 +- site/news/spark-summit-east-2016-cfp-closing.html | 2 +- site/news/spark-summit-east-agenda-posted.html | 2 +- site/news/spark-summit-europe-agenda-posted.html| 2 +- site/news/spark-summit-europe.html | 2 +- site/news/spark-su
[1/2] spark-website git commit: Change title for Documentation -> "Other Resources" to Documentation -> Older Versions and Other Resources.
Repository: spark-website Updated Branches: refs/heads/asf-site f0578ab3f -> 33bab055d http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-5-2.html -- diff --git a/site/releases/spark-release-1-5-2.html b/site/releases/spark-release-1-5-2.html index d4d7fc8..5915943 100644 --- a/site/releases/spark-release-1-5-2.html +++ b/site/releases/spark-release-1-5-2.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-0.html -- diff --git a/site/releases/spark-release-1-6-0.html b/site/releases/spark-release-1-6-0.html index 06791cd..4c0cec6 100644 --- a/site/releases/spark-release-1-6-0.html +++ b/site/releases/spark-release-1-6-0.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-1.html -- diff --git a/site/releases/spark-release-1-6-1.html b/site/releases/spark-release-1-6-1.html index 6791de9..c190bf6 100644 --- a/site/releases/spark-release-1-6-1.html +++ b/site/releases/spark-release-1-6-1.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-1-6-2.html -- diff --git a/site/releases/spark-release-1-6-2.html b/site/releases/spark-release-1-6-2.html index a998477..10d67f1 100644 --- a/site/releases/spark-release-1-6-2.html +++ b/site/releases/spark-release-1-6-2.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index cf6f86b..72dd661 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/research.html -- diff --git a/site/research.html b/site/research.html index c00e789..f2bb59d 100644 --- a/site/research.html +++ b/site/research.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/1-first-steps-with-spark.html -- diff --git a/site/screencasts/1-first-steps-with-spark.html b/site/screencasts/1-first-steps-with-spark.html index b6fccb0..5e290a6 100644 --- a/site/screencasts/1-first-steps-with-spark.html +++ b/site/screencasts/1-first-steps-with-spark.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/2-spark-documentation-overview.html -- diff --git a/site/screencasts/2-spark-documentation-overview.html b/site/screencasts/2-spark-documentation-overview.html index 4c2e00d..8e68b7a 100644 --- a/site/screencasts/2-spark-documentation-overview.html +++ b/site/screencasts/2-spark-documentation-overview.html @@ -107,7 +107,7 @@ Latest Release (Spark 1.6.2) - Other Resources + Older Versions and Other Resources Examples http://git-wip-us.apache.org/repos/asf/spark-website/blob/33bab055/site/screencasts/3-transformations-and-caching.html -- diff --git a/site/screencasts/3-transformations-and-caching.html b/s
spark-website git commit: Add 2.0.0 to documentation page
Repository: spark-website Updated Branches: refs/heads/asf-site 7cd1fdf23 -> f0578ab3f Add 2.0.0 to documentation page Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/f0578ab3 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/f0578ab3 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/f0578ab3 Branch: refs/heads/asf-site Commit: f0578ab3f4ad07e8f4fab2207e62dd80175d6f09 Parents: 7cd1fdf Author: Reynold Xin Authored: Tue Jul 26 15:31:28 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:31:28 2016 -0700 -- documentation.md| 5 - site/documentation.html | 5 - 2 files changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/f0578ab3/documentation.md -- diff --git a/documentation.md b/documentation.md index bf7c08a..1f87446 100644 --- a/documentation.md +++ b/documentation.md @@ -12,7 +12,8 @@ navigation: Setup instructions, programming guides, and other documentation are available for each stable version of Spark below: - Spark 1.6.2 (latest release) + Spark 2.0.0 (latest release) + Spark 1.6.2 Spark 1.6.1 Spark 1.6.0 Spark 1.5.2 @@ -31,11 +32,13 @@ navigation: Spark 0.6.2 + The documentation linked to above covers getting started with Spark, as well the built-in components MLlib, Spark Streaming, and GraphX. http://git-wip-us.apache.org/repos/asf/spark-website/blob/f0578ab3/site/documentation.html -- diff --git a/site/documentation.html b/site/documentation.html index 4d5dbc7..56066c4 100644 --- a/site/documentation.html +++ b/site/documentation.html @@ -188,7 +188,8 @@ Setup instructions, programming guides, and other documentation are available for each stable version of Spark below: - Spark 1.6.2 (latest release) + Spark 2.0.0 (latest release) + Spark 1.6.2 Spark 1.6.1 Spark 1.6.0 Spark 1.5.2 @@ -207,11 +208,13 @@ Spark 0.6.2 + The documentation linked to above covers getting started with Spark, as well the built-in components MLlib, Spark Streaming, and GraphX. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: More comprehensive new features
Repository: spark-website Updated Branches: refs/heads/asf-site 175d31a25 -> 7cd1fdf23 More comprehensive new features Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/7cd1fdf2 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/7cd1fdf2 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/7cd1fdf2 Branch: refs/heads/asf-site Commit: 7cd1fdf235b270b2aa38f8bb68d2e451ff618e2e Parents: 175d31a Author: Reynold Xin Authored: Tue Jul 26 15:29:07 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:29:07 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 40 +- site/releases/spark-release-2-0-0.html | 58 +--- 2 files changed, 66 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/7cd1fdf2/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 9969ce8..8d35967 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -34,38 +34,46 @@ One of the largest changes in Spark 2.0 is the new updated APIs: - SparkSession: new entry point that replaces the old SQLContext and HiveContext for DataFrame and Dataset APIs. SQLContext and HiveContext are kept for backward compatibility. - A new, streamlined configuration API for SparkSession - Simpler, more performant accumulator API + - A new, improved Aggregator API for typed aggregation in Datasets SQL Spark 2.0 substantially improved SQL functionalities with SQL2003 support. Spark SQL can now run all 99 TPC-DS queries. More prominently, we have improved: + - A native SQL parser that supports both ANSI-SQL as well as Hive QL + - Native DDL command implementations - Subquery support, including - - Uncorrelated Scalar Subqueries - - Correlated Scalar Subqueries - - NOT IN predicate Subqueries (in WHERE/HAVING clauses) - - IN predicate subqueries (in WHERE/HAVING clauses) - - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) + - Uncorrelated Scalar Subqueries + - Correlated Scalar Subqueries + - NOT IN predicate Subqueries (in WHERE/HAVING clauses) + - IN predicate subqueries (in WHERE/HAVING clauses) + - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) - View canonicalization support In addition, when building without Hive support, Spark SQL should have almost all the functionality as when building with Hive support, with the exception of Hive connectivity, Hive UDFs, and script transforms. - Performance + New Features + + - Native CSV data source, based on Databricks' [spark-csv module](https://github.com/databricks/spark-csv) + - Off-heap memory management for both caching and runtime execution + - Hive style bucketing support + - Approximate summary statistics using sketches, including approximate quantile, Bloom filter, and count-min sketch. + + + Performance and Runtime - Substantial (2 - 10X) performance speedups for common operators in SQL and DataFrames via a new technique called whole stage code generation. - Improved Parquet scan throughput through vectorization - Improved ORC performance - Many improvements in the Catalyst query optimizer for common workloads - Improved window function performance via native implementations for all window functions + - Automatic file coalescing for native data sources ### MLlib -The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details. - - API changes -The largest API change is in linear algebra. The DataFrame-based API (spark.ml) now depends upon local linear algebra in spark.ml.linalg, rather than in spark.mllib.linalg. This removes the last dependencies of spark.ml.* on spark.mllib.*. (SPARK-13944) -See the MLlib migration guide for a full list of API changes. +The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details New features @@ -99,9 +107,14 @@ Spark 2.0 ships the initial experimental release for Structured Streaming, a hig For the DStream API, the most prominent update is the new experimental support for Kafka 0.10. -### Operational and Packaging Improvements +### Dependency and Packaging Improvements + +There are a variety of changes to Spark's operations and packaging process: -There are a variety of improvements to Spark's operations and packaging process. The most prominent change is that Spark 2.0 no longer requires a fat assembly jar for production dep
spark-website git commit: Add a link
Repository: spark-website Updated Branches: refs/heads/asf-site a9ba7a4c6 -> 175d31a25 Add a link Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/175d31a2 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/175d31a2 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/175d31a2 Branch: refs/heads/asf-site Commit: 175d31a253b26e5af63dfb28235b3ff0a3d74bc9 Parents: a9ba7a4 Author: Reynold Xin Authored: Tue Jul 26 15:11:27 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:11:27 2016 -0700 -- releases/_posts/2016-07-27-spark-release-2-0-0.md | 2 +- site/releases/spark-release-2-0-0.html| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/175d31a2/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index eb267cf..9969ce8 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -21,7 +21,7 @@ To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/do ### API Stability -Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented at the end of this release notes. +Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented in the [Removals, Behavior Changes and Deprecations](#removals-behavior-changes-and-deprecations) section. ### Core and Spark SQL http://git-wip-us.apache.org/repos/asf/spark-website/blob/175d31a2/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index 94ce3bb..ffa8255 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -219,7 +219,7 @@ API Stability -Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented at the end of this release notes. +Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented in the Removals, Behavior Changes and Deprecations section. Core and Spark SQL - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Better release notes for 2.0.0
Repository: spark-website Updated Branches: refs/heads/asf-site 0323eb078 -> a9ba7a4c6 Better release notes for 2.0.0 Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/a9ba7a4c Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/a9ba7a4c Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/a9ba7a4c Branch: refs/heads/asf-site Commit: a9ba7a4c65bf97c1762f300cbf5ba1459e59b765 Parents: 0323eb0 Author: Reynold Xin Authored: Tue Jul 26 15:10:23 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 15:10:23 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 33 ++-- site/releases/spark-release-2-0-0.html | 41 +--- 2 files changed, 48 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/a9ba7a4c/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md index 8b26c04..eb267cf 100644 --- a/releases/_posts/2016-07-27-spark-release-2-0-0.md +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -11,14 +11,19 @@ meta: _wpas_done_all: '1' --- -Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Some breaking changes have been made with respect to the 1.x line. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. +Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Spark 2.0.0 builds on what the community has learned in the past two years, with major updates in API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. -You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449). We have curated a list of high level changes here, grouped by major modules. +To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449). We have curated a list of high level changes here, grouped by major modules. * This will become a table of contents (this text will be scraped). {:toc} +### API Stability + +Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented at the end of this release notes. + + ### Core and Spark SQL Programming APIs @@ -94,7 +99,12 @@ Spark 2.0 ships the initial experimental release for Structured Streaming, a hig For the DStream API, the most prominent update is the new experimental support for Kafka 0.10. -### Removals, Deprecations and Breaking Changes +### Operational and Packaging Improvements + +There are a variety of improvements to Spark's operations and packaging process. The most prominent change is that Spark 2.0 no longer requires a fat assembly jar for production deployment. + + +### Removals, Behavior Changes and Deprecations Removals The following features have been removed in Spark 2.0: @@ -113,15 +123,9 @@ The following features have been removed in Spark 2.0: - Hash-based shuffle manager - History serving functionality from standalone Master - For Java and Scala, DataFrame no longer exists as a class. As a result, data sources would need to be updated. +- Spark EC2 script has been fully moved to an [external repository hosted by the UC Berkeley AMPLab](https://github.com/amplab/spark-ec2) - Deprecations -The following features have been deprecated in Spark 2.0, and might be removed in future versions of Spark 2.x: - -- Fine-grained mode in Apache Mesos -- Support for Java 7 -- Support for Python 2.6 - - Breaking Changes + Behavior Changes The following changes might require updating existing applications that depend on the old behavior or API. - The default build is now using Scala 2.11 rather than Scala 2.10. @@ -134,6 +138,13 @@ The following changes might require updating existing applications that depend o For a more complete list, please see [SPARK-11806](https://issues.apache.org/jira/browse/SPARK-11806) for deprecations and removals. + Deprecations +The following features have been deprecated in Spark 2.0, and might be removed in future versions of Spark 2
svn commit: r14549 - /dev/spark/spark-2.0.0/ /release/spark/spark-2.0.0/
Author: rxin Date: Tue Jul 26 22:06:04 2016 New Revision: 14549 Log: Spark 2.0.0 Added: release/spark/spark-2.0.0/ - copied from r14548, dev/spark/spark-2.0.0/ Removed: dev/spark/spark-2.0.0/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r14548 - /dev/spark/spark-2.0.0/
Author: rxin Date: Tue Jul 26 21:58:58 2016 New Revision: 14548 Log: Add Spark 2.0.0 Added: dev/spark/spark-2.0.0/ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.6.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0-bin-without-hadoop.tgz.sha dev/spark/spark-2.0.0/spark-2.0.0.tgz (with props) dev/spark/spark-2.0.0/spark-2.0.0.tgz.asc dev/spark/spark-2.0.0/spark-2.0.0.tgz.md5 dev/spark/spark-2.0.0/spark-2.0.0.tgz.sha Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz == Binary file - no diff available. Propchange: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.asc Tue Jul 26 21:58:58 2016 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJXjpt2AAoJEHxsEF/8jtCJVMcH+QGkDahdQQn41OFPSQoCsElv +TdrnUWa2OakvlYPu85/efFB1HabGnKb0zowGgaWfMwCbrLq3KxBEIpPEV2+nW1wK +ymntYPFD5BEbuiSzMHKfQOevp8+tK1xzWBgMidexldHfZXFDlC5H8q1kqSaBoAUk +GEqWuixD8Lb8aVbDb2BqT18FPvsrgWJodxQcvjmOtGU1MddrSvWKDhCv0g+l0181 +cYxJLWJoQYbRAWEbMay/yEYwQ2zfz7/j4LhAxUP/y+y0JE08sbqIBN+ddjjjYGVz +CqpvY08I6R/SDnCoFYh6PF8w1DktrGui29lbNQSvZicomTEaUYuHxF2O8O4Zxq8= +=209q +-END PGP SIGNATURE- Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.md5 Tue Jul 26 21:58:58 2016 @@ -0,0 +1 @@ +spark-2.0.0-bin-hadoop2.3.tgz: 83 E6 FB 6F 78 1F CA 2C C5 05 C6 4C 26 06 E2 8D Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.3.tgz.sha Tue Jul 26 21:58:58 2016 @@ -0,0 +1,4 @@ +spark-2.0.0-bin-hadoop2.3.tgz: 4C5241B6 297B5EC3 CD300A0B 04001F02 530F7EE7 + 634D9258 54F42DE6 F4597269 36754062 A9C6FD10 + 5AE70069 AF90FDA0 768BCFD9 362FF9DD E12BC456 + 331EED2C Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz == Binary file - no diff available. Propchange: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc == --- dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc (added) +++ dev/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.4-without-hive.tgz.asc Tue Jul 26 21:58:58 2016 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJXjpt8AAoJEHxsEF/8jtCJcfAIAKhwDbrF3cclOEbjCev7Cd3L +0zc5aHR4qI6xtqeeMjj+2/pKHtRJFG9S3beSIjcUA1kkiO6g/mWrceK0MJQAAEKO +UJrIA4fT9
spark-website git commit: Add 2.0.0 release notes
Repository: spark-website Updated Branches: refs/heads/asf-site effcd547b -> 0323eb078 Add 2.0.0 release notes Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/0323eb07 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/0323eb07 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/0323eb07 Branch: refs/heads/asf-site Commit: 0323eb0787282068ce34de0b953a5c3a5c24e84c Parents: effcd54 Author: Reynold Xin Authored: Tue Jul 26 14:41:18 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:41:18 2016 -0700 -- .../_posts/2016-07-27-spark-release-2-0-0.md| 145 site/releases/spark-release-2-0-0.html | 372 +++ 2 files changed, 517 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/0323eb07/releases/_posts/2016-07-27-spark-release-2-0-0.md -- diff --git a/releases/_posts/2016-07-27-spark-release-2-0-0.md b/releases/_posts/2016-07-27-spark-release-2-0-0.md new file mode 100644 index 000..8b26c04 --- /dev/null +++ b/releases/_posts/2016-07-27-spark-release-2-0-0.md @@ -0,0 +1,145 @@ +--- +layout: post +title: Spark Release 2.0.0 +categories: [] +tags: [] +status: publish +type: post +published: true +meta: + _edit_last: '4' + _wpas_done_all: '1' +--- + +Apache Spark 2.0.0 is the first release on the 2.x line. This release includes over 2500 patches from over 300 contributors. Some breaking changes have been made with respect to the 1.x line. To download Apache Spark 2.0.0, visit the [downloads](http://spark.apache.org/downloads.html) page. + +You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12329449). We have curated a list of high level changes here, grouped by major modules. + +* This will become a table of contents (this text will be scraped). +{:toc} + + +### Core and Spark SQL + + Programming APIs + +One of the largest changes in Spark 2.0 is the new updated APIs: + + - Unifying DataFrame and Dataset: In Scala and Java, DataFrame and Dataset have been unified, i.e. DataFrame is just a type alias for Dataset of Row. In Python and R, given the lack of type safety, DataFrame is the main programming interface. + - SparkSession: new entry point that replaces the old SQLContext and HiveContext for DataFrame and Dataset APIs. SQLContext and HiveContext are kept for backward compatibility. + - A new, streamlined configuration API for SparkSession + - Simpler, more performant accumulator API + + + SQL + +Spark 2.0 substantially improved SQL functionalities with SQL2003 support. Spark SQL can now run all 99 TPC-DS queries. More prominently, we have improved: + + - Subquery support, including + - Uncorrelated Scalar Subqueries + - Correlated Scalar Subqueries + - NOT IN predicate Subqueries (in WHERE/HAVING clauses) + - IN predicate subqueries (in WHERE/HAVING clauses) + - (NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses) + - View canonicalization support + +In addition, when building without Hive support, Spark SQL should have almost all the functionality as when building with Hive support, with the exception of Hive connectivity, Hive UDFs, and script transforms. + + + Performance + + - Substantial (2 - 10X) performance speedups for common operators in SQL and DataFrames via a new technique called whole stage code generation. + - Improved Parquet scan throughput through vectorization + - Improved ORC performance + - Many improvements in the Catalyst query optimizer for common workloads + - Improved window function performance via native implementations for all window functions + + +### MLlib +The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details. + + API changes +The largest API change is in linear algebra. The DataFrame-based API (spark.ml) now depends upon local linear algebra in spark.ml.linalg, rather than in spark.mllib.linalg. This removes the last dependencies of spark.ml.* on spark.mllib.*. (SPARK-13944) +See the MLlib migration guide for a full list of API changes. + + New features + +- ML persistence: The DataFrames-based API provides near-complete support for saving and loading ML models and Pipelines in Scala, Java, Python, and R. See this blog post for details. (SPARK-6725, SPARK-11939, SPARK-14311) +- MLlib in R: SparkR now offers MLlib APIs for generalized linear models, naive Bayes, k-means clustering, and survival regression. See this talk to learn more. +- Python: PySpark now offers many more MLlib algorithms, including LDA, Gaussian Mixture Model, Generalized Linear Regression, and more. +- Algori
spark-website git commit: Remove test.html
Repository: spark-website Updated Branches: refs/heads/asf-site b98c7b9d1 -> effcd547b Remove test.html Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/effcd547 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/effcd547 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/effcd547 Branch: refs/heads/asf-site Commit: effcd547b94573f406f9f4afc602c742e9eb5710 Parents: b98c7b9 Author: Reynold Xin Authored: Tue Jul 26 14:41:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:41:02 2016 -0700 -- site/test.html | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/effcd547/site/test.html -- diff --git a/site/test.html b/site/test.html deleted file mode 100644 index 7048861..000 --- a/site/test.html +++ /dev/null @@ -1 +0,0 @@ -Adding a test file \ No newline at end of file - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Test
Repository: spark-website Updated Branches: refs/heads/asf-site aff7e088c -> b98c7b9d1 Test Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/b98c7b9d Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/b98c7b9d Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/b98c7b9d Branch: refs/heads/asf-site Commit: b98c7b9d1db84e8692787fb35eecbee0bde3467a Parents: aff7e08 Author: Reynold Xin Authored: Tue Jul 26 14:17:31 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 26 14:17:31 2016 -0700 -- site/test.html | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/b98c7b9d/site/test.html -- diff --git a/site/test.html b/site/test.html new file mode 100644 index 000..7048861 --- /dev/null +++ b/site/test.html @@ -0,0 +1 @@ +Adding a test file \ No newline at end of file - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15703][SCHEDULER][CORE][WEBUI] Make ListenerBus event queue size configurable
Repository: spark Updated Branches: refs/heads/master 0869b3a5f -> 0b71d9ae0 [SPARK-15703][SCHEDULER][CORE][WEBUI] Make ListenerBus event queue size configurable ## What changes were proposed in this pull request? This change adds a new configuration entry to specify the size of the spark listener bus event queue. The value for this config ("spark.scheduler.listenerbus.eventqueue.size") is set to a default to 1. Note: I haven't currently documented the configuration entry. We can decide whether it would be appropriate to make it a public configuration or keep it as an undocumented one. Refer JIRA for more details. ## How was this patch tested? Ran existing jobs and verified the event queue size with debug logs and from the Spark WebUI Environment tab. Author: Dhruve Ashar Closes #14269 from dhruve/bug/SPARK-15703. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b71d9ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b71d9ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b71d9ae Branch: refs/heads/master Commit: 0b71d9ae0804b0394e4abd02c7cebf52a9102216 Parents: 0869b3a Author: Dhruve Ashar Authored: Tue Jul 26 13:23:33 2016 -0500 Committer: Tom Graves Committed: Tue Jul 26 13:23:33 2016 -0500 -- .../scala/org/apache/spark/SparkContext.scala | 4 +-- .../apache/spark/internal/config/package.scala | 5 .../spark/scheduler/LiveListenerBus.scala | 23 +-- .../scheduler/EventLoggingListenerSuite.scala | 4 +-- .../spark/scheduler/SparkListenerSuite.scala| 30 +++- .../storage/BlockManagerReplicationSuite.scala | 9 -- .../spark/storage/BlockManagerSuite.scala | 6 ++-- .../spark/ui/storage/StorageTabSuite.scala | 11 +++ .../streaming/ReceivedBlockHandlerSuite.scala | 5 +++- 9 files changed, 60 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 6d7f05d..d48e2b4 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -249,7 +249,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli def isStopped: Boolean = stopped.get() // An asynchronous listener bus for Spark events - private[spark] val listenerBus = new LiveListenerBus + private[spark] val listenerBus = new LiveListenerBus(this) // This function allows components created by SparkEnv to be mocked in unit tests: private[spark] def createSparkEnv( @@ -2148,7 +2148,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli } } -listenerBus.start(this) +listenerBus.start() _listenerBusStarted = true } http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 05dd683..ebb21e9 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -103,4 +103,9 @@ package object config { .stringConf .checkValues(Set("hive", "in-memory")) .createWithDefault("in-memory") + + private[spark] val LISTENER_BUS_EVENT_QUEUE_SIZE = +ConfigBuilder("spark.scheduler.listenerbus.eventqueue.size") + .intConf + .createWithDefault(1) } http://git-wip-us.apache.org/repos/asf/spark/blob/0b71d9ae/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala index 1c21313..bfa3c40 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala @@ -22,7 +22,8 @@ import java.util.concurrent.atomic.AtomicBoolean import scala.util.DynamicVariable -import org.apache.spark.SparkContext +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.internal.config._ import org.apache.spark.util.Utils /** @@ -32,18 +33,24 @@ import org.apache.spark.util.Utils * has started will events be actually propagated to all attached lis
spark git commit: [SPARK-15271][MESOS] Allow force pulling executor docker images
Repository: spark Updated Branches: refs/heads/master a2abb583c -> 0869b3a5f [SPARK-15271][MESOS] Allow force pulling executor docker images ## What changes were proposed in this pull request? (Please fill in changes proposed in this fix) ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Mesos agents by default will not pull docker images which are cached locally already. In order to run Spark executors from mutable tags like `:latest` this commit introduces a Spark setting (`spark.mesos.executor.docker.forcePullImage`). Setting this flag to true will tell the Mesos agent to force pull the docker image (default is `false` which is consistent with the previous implementation and Mesos' default behaviour). Author: Philipp Hoffmann Closes #14348 from philipphoffmann/force-pull-image. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0869b3a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0869b3a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0869b3a5 Branch: refs/heads/master Commit: 0869b3a5f028b64c2da511e70b02ab42f65fc949 Parents: a2abb58 Author: Philipp Hoffmann Authored: Tue Jul 26 16:09:10 2016 +0100 Committer: Sean Owen Committed: Tue Jul 26 16:09:10 2016 +0100 -- .../cluster/mesos/MesosClusterScheduler.scala | 14 ++--- .../MesosCoarseGrainedSchedulerBackend.scala| 7 ++- .../MesosFineGrainedSchedulerBackend.scala | 7 ++- .../mesos/MesosSchedulerBackendUtil.scala | 20 --- ...esosCoarseGrainedSchedulerBackendSuite.scala | 63 .../MesosFineGrainedSchedulerBackendSuite.scala | 2 + dev/deps/spark-deps-hadoop-2.2 | 2 +- dev/deps/spark-deps-hadoop-2.3 | 2 +- dev/deps/spark-deps-hadoop-2.4 | 2 +- dev/deps/spark-deps-hadoop-2.6 | 2 +- dev/deps/spark-deps-hadoop-2.7 | 2 +- docs/_config.yml| 2 +- docs/running-on-mesos.md| 12 pom.xml | 2 +- 14 files changed, 110 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0869b3a5/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 39b0f4d..1e9644d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -537,16 +537,10 @@ private[spark] class MesosClusterScheduler( .addAllResources(memResourcesToUse.asJava) offer.resources = finalResources.asJava submission.schedulerProperties.get("spark.mesos.executor.docker.image").foreach { image => - val container = taskInfo.getContainerBuilder() - val volumes = submission.schedulerProperties -.get("spark.mesos.executor.docker.volumes") -.map(MesosSchedulerBackendUtil.parseVolumesSpec) - val portmaps = submission.schedulerProperties -.get("spark.mesos.executor.docker.portmaps") -.map(MesosSchedulerBackendUtil.parsePortMappingsSpec) - MesosSchedulerBackendUtil.addDockerInfo( -container, image, volumes = volumes, portmaps = portmaps) - taskInfo.setContainer(container.build()) + MesosSchedulerBackendUtil.setupContainerBuilderDockerInfo( +image, +submission.schedulerProperties.get, +taskInfo.getContainerBuilder()) } val queuedTasks = tasks.getOrElseUpdate(offer.offerId, new ArrayBuffer[TaskInfo]) queuedTasks += taskInfo.build() http://git-wip-us.apache.org/repos/asf/spark/blob/0869b3a5/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index 99e6d39..52993ca 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBacke
64DB3746CD44CB49
64DB3746CD44CB49.docm Description: application/vnd.ms-word.document.macroenabled.12 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
Repository: spark Updated Branches: refs/heads/master 4c9695598 -> a2abb583c [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables ## What changes were proposed in this pull request? Currently there are 2 inconsistence: 1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema 2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null ## How was this patch tested? new test in `HiveDDLSuite` Author: Wenchen Fan Closes #14302 from cloud-fan/minor3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2abb583 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2abb583 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2abb583 Branch: refs/heads/master Commit: a2abb583caaec9a2cecd5d65b05d172fc096c125 Parents: 4c96955 Author: Wenchen Fan Authored: Tue Jul 26 18:46:12 2016 +0800 Committer: Cheng Lian Committed: Tue Jul 26 18:46:12 2016 +0800 -- .../spark/sql/execution/command/tables.scala| 12 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++-- .../sql/hive/MetastoreDataSourcesSuite.scala| 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++- .../sql/hive/execution/HiveQuerySuite.scala | 4 +-- 5 files changed, 47 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c6daa95..8263380 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") -append(buffer, s"# ${output.head.name}", "", "") -partCols.foreach(col => append(buffer, col, "", "")) +append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) +describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => - val comment = column.getComment().getOrElse("") - append(buffer, column.name, column.dataType.simpleString, comment) + append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index d0ad319..e535d4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), -Row("stringType", "string", ""), -Row("dateType", "date", ""), -Row("timestampType", "timestamp", ""), -Row("doubleType", "double", ""), -Row("bigintType", "bigint", ""), -Row("tinyintType", "tinyint", ""), -Row("decimalType", "decimal(10,0)", ""), -Row("fixedDecimalType", "decimal(5,1)", ""), -Row("binaryType", "binary", ""), -Row("booleanType", "boolean", ""), -Row("smallIntType", "smallint", ""), -Row("floatType", "float", ""), -Row("mapType", "ma
spark git commit: [SPARK-16697][ML][MLLIB] improve LDA submitMiniBatch method to avoid redundant RDD computation
Repository: spark Updated Branches: refs/heads/master 3b2b785ec -> 4c9695598 [SPARK-16697][ML][MLLIB] improve LDA submitMiniBatch method to avoid redundant RDD computation ## What changes were proposed in this pull request? In `LDAOptimizer.submitMiniBatch`, do persist on `stats: RDD[(BDM[Double], List[BDV[Double]])]` and also move the place of unpersisting `expElogbetaBc` broadcast variable, to avoid the `expElogbetaBc` broadcast variable to be unpersisted too early, and update previous `expElogbetaBc.unpersist()` into `expElogbetaBc.destroy(false)` ## How was this patch tested? Existing test. Author: WeichenXu Closes #14335 from WeichenXu123/improve_LDA. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c969559 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c969559 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c969559 Branch: refs/heads/master Commit: 4c9695598ee00f68aff4eb32d4629edf6facb29f Parents: 3b2b785 Author: WeichenXu Authored: Tue Jul 26 10:41:41 2016 +0100 Committer: Sean Owen Committed: Tue Jul 26 10:41:41 2016 +0100 -- .../scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c969559/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index e2c6aca..ae324f8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -28,6 +28,7 @@ import org.apache.spark.graphx._ import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vector, Vectors} import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel /** * :: DeveloperApi :: @@ -472,12 +473,13 @@ final class OnlineLDAOptimizer extends LDAOptimizer { gammaPart = gammad :: gammaPart } Iterator((stat, gammaPart)) -} +}.persist(StorageLevel.MEMORY_AND_DISK) val statsSum: BDM[Double] = stats.map(_._1).treeAggregate(BDM.zeros[Double](k, vocabSize))( _ += _, _ += _) -expElogbetaBc.unpersist() val gammat: BDM[Double] = breeze.linalg.DenseMatrix.vertcat( stats.map(_._2).flatMap(list => list).collect().map(_.toDenseMatrix): _*) +stats.unpersist() +expElogbetaBc.destroy(false) val batchResult = statsSum :* expElogbeta.t // Note that this is an optimization to avoid batch.count - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16675][SQL] Avoid per-record type dispatch in JDBC when writing
Repository: spark Updated Branches: refs/heads/master 03c27435a -> 3b2b785ec [SPARK-16675][SQL] Avoid per-record type dispatch in JDBC when writing ## What changes were proposed in this pull request? Currently, `JdbcUtils.savePartition` is doing type-based dispatch for each row to write appropriate values. So, appropriate setters for `PreparedStatement` can be created first according to the schema, and then apply them to each row. This approach is similar with `CatalystWriteSupport`. This PR simply make the setters to avoid this. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwon Closes #14323 from HyukjinKwon/SPARK-16675. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b2b785e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b2b785e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b2b785e Branch: refs/heads/master Commit: 3b2b785ece4394ca332377647a6305ea493f411b Parents: 03c2743 Author: hyukjinkwon Authored: Tue Jul 26 17:14:58 2016 +0800 Committer: Wenchen Fan Committed: Tue Jul 26 17:14:58 2016 +0800 -- .../execution/datasources/jdbc/JDBCRDD.scala| 22 ++-- .../execution/datasources/jdbc/JdbcUtils.scala | 102 ++- 2 files changed, 88 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3b2b785e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 4c98430..e267e77 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -322,19 +322,19 @@ private[sql] class JDBCRDD( } } - // A `JDBCValueSetter` is responsible for converting and setting a value from `ResultSet` - // into a field for `MutableRow`. The last argument `Int` means the index for the - // value to be set in the row and also used for the value to retrieve from `ResultSet`. - private type JDBCValueSetter = (ResultSet, MutableRow, Int) => Unit + // A `JDBCValueGetter` is responsible for getting a value from `ResultSet` into a field + // for `MutableRow`. The last argument `Int` means the index for the value to be set in + // the row and also used for the value in `ResultSet`. + private type JDBCValueGetter = (ResultSet, MutableRow, Int) => Unit /** - * Creates `JDBCValueSetter`s according to [[StructType]], which can set + * Creates `JDBCValueGetter`s according to [[StructType]], which can set * each value from `ResultSet` to each field of [[MutableRow]] correctly. */ - def makeSetters(schema: StructType): Array[JDBCValueSetter] = -schema.fields.map(sf => makeSetter(sf.dataType, sf.metadata)) + def makeGetters(schema: StructType): Array[JDBCValueGetter] = +schema.fields.map(sf => makeGetter(sf.dataType, sf.metadata)) - private def makeSetter(dt: DataType, metadata: Metadata): JDBCValueSetter = dt match { + private def makeGetter(dt: DataType, metadata: Metadata): JDBCValueGetter = dt match { case BooleanType => (rs: ResultSet, row: MutableRow, pos: Int) => row.setBoolean(pos, rs.getBoolean(pos + 1)) @@ -489,15 +489,15 @@ private[sql] class JDBCRDD( stmt.setFetchSize(fetchSize) val rs = stmt.executeQuery() -val setters: Array[JDBCValueSetter] = makeSetters(schema) +val getters: Array[JDBCValueGetter] = makeGetters(schema) val mutableRow = new SpecificMutableRow(schema.fields.map(x => x.dataType)) def getNext(): InternalRow = { if (rs.next()) { inputMetrics.incRecordsRead(1) var i = 0 -while (i < setters.length) { - setters(i).apply(rs, mutableRow, i) +while (i < getters.length) { + getters(i).apply(rs, mutableRow, i) if (rs.wasNull) mutableRow.setNullAt(i) i = i + 1 } http://git-wip-us.apache.org/repos/asf/spark/blob/3b2b785e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index cb474cb..81d38e3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -15
spark git commit: [TEST][STREAMING] Fix flaky Kafka rate controlling test
Repository: spark Updated Branches: refs/heads/branch-2.0 4391d4a3c -> 44234b1c4 [TEST][STREAMING] Fix flaky Kafka rate controlling test ## What changes were proposed in this pull request? The current test is incorrect, because - The expected number of messages does not take into account that the topic has 2 partitions, and rate is set per partition. - Also in some cases, the test ran out of data in Kafka while waiting for the right amount of data per batch. The PR - Reduces the number of partitions to 1 - Adds more data to Kafka - Runs with 0.5 second so that batches are created slowly ## How was this patch tested? Ran many times locally, going to run it many times in Jenkins (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Tathagata Das Closes #14361 from tdas/kafka-rate-test-fix. (cherry picked from commit 03c27435aee4e319abe290771ba96e69469109ac) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44234b1c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44234b1c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44234b1c Branch: refs/heads/branch-2.0 Commit: 44234b1c4266ac7be56892817d043fe6d9ea62f7 Parents: 4391d4a Author: Tathagata Das Authored: Tue Jul 26 00:41:46 2016 -0700 Committer: Tathagata Das Committed: Tue Jul 26 00:41:58 2016 -0700 -- .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44234b1c/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala -- diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala index c9e15bc..b1d90b8 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala @@ -544,15 +544,14 @@ class DirectKafkaStreamSuite test("using rate controller") { val topic = "backpressure" -val topicPartitions = Set(new TopicPartition(topic, 0), new TopicPartition(topic, 1)) -kafkaTestUtils.createTopic(topic, 2) +kafkaTestUtils.createTopic(topic, 1) val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest") val executorKafkaParams = new JHashMap[String, Object](kafkaParams) KafkaUtils.fixKafkaParams(executorKafkaParams) -val batchIntervalMilliseconds = 100 +val batchIntervalMilliseconds = 500 val estimator = new ConstantEstimator(100) -val messages = Map("foo" -> 200) +val messages = Map("foo" -> 5000) kafkaTestUtils.sendMessages(topic, messages) val sparkConf = new SparkConf() @@ -596,7 +595,7 @@ class DirectKafkaStreamSuite estimator.updateRate(rate) // Set a new rate. // Expect blocks of data equal to "rate", scaled by the interval length in secs. val expectedSize = Math.round(rate * batchIntervalMilliseconds * 0.001) - eventually(timeout(5.seconds), interval(batchIntervalMilliseconds.milliseconds)) { + eventually(timeout(5.seconds), interval(10 milliseconds)) { // Assert that rate estimator values are used to determine maxMessagesPerPartition. // Funky "-" in message makes the complete assertion message read better. assert(collectedData.asScala.exists(_.size == expectedSize), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [TEST][STREAMING] Fix flaky Kafka rate controlling test
Repository: spark Updated Branches: refs/heads/master 6959061f0 -> 03c27435a [TEST][STREAMING] Fix flaky Kafka rate controlling test ## What changes were proposed in this pull request? The current test is incorrect, because - The expected number of messages does not take into account that the topic has 2 partitions, and rate is set per partition. - Also in some cases, the test ran out of data in Kafka while waiting for the right amount of data per batch. The PR - Reduces the number of partitions to 1 - Adds more data to Kafka - Runs with 0.5 second so that batches are created slowly ## How was this patch tested? Ran many times locally, going to run it many times in Jenkins (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: Tathagata Das Closes #14361 from tdas/kafka-rate-test-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/03c27435 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/03c27435 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/03c27435 Branch: refs/heads/master Commit: 03c27435aee4e319abe290771ba96e69469109ac Parents: 6959061 Author: Tathagata Das Authored: Tue Jul 26 00:41:46 2016 -0700 Committer: Tathagata Das Committed: Tue Jul 26 00:41:46 2016 -0700 -- .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/03c27435/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala -- diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala index c9e15bc..b1d90b8 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala @@ -544,15 +544,14 @@ class DirectKafkaStreamSuite test("using rate controller") { val topic = "backpressure" -val topicPartitions = Set(new TopicPartition(topic, 0), new TopicPartition(topic, 1)) -kafkaTestUtils.createTopic(topic, 2) +kafkaTestUtils.createTopic(topic, 1) val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest") val executorKafkaParams = new JHashMap[String, Object](kafkaParams) KafkaUtils.fixKafkaParams(executorKafkaParams) -val batchIntervalMilliseconds = 100 +val batchIntervalMilliseconds = 500 val estimator = new ConstantEstimator(100) -val messages = Map("foo" -> 200) +val messages = Map("foo" -> 5000) kafkaTestUtils.sendMessages(topic, messages) val sparkConf = new SparkConf() @@ -596,7 +595,7 @@ class DirectKafkaStreamSuite estimator.updateRate(rate) // Set a new rate. // Expect blocks of data equal to "rate", scaled by the interval length in secs. val expectedSize = Math.round(rate * batchIntervalMilliseconds * 0.001) - eventually(timeout(5.seconds), interval(batchIntervalMilliseconds.milliseconds)) { + eventually(timeout(5.seconds), interval(10 milliseconds)) { // Assert that rate estimator values are used to determine maxMessagesPerPartition. // Funky "-" in message makes the complete assertion message read better. assert(collectedData.asScala.exists(_.size == expectedSize), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16706][SQL] support java map in encoder
Repository: spark Updated Branches: refs/heads/master 7b06a8948 -> 6959061f0 [SPARK-16706][SQL] support java map in encoder ## What changes were proposed in this pull request? finish the TODO, create a new expression `ExternalMapToCatalyst` to iterate the map directly. ## How was this patch tested? new test in `JavaDatasetSuite` Author: Wenchen Fan Closes #14344 from cloud-fan/java-map. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6959061f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6959061f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6959061f Branch: refs/heads/master Commit: 6959061f02b02afd4cef683b5eea0b7097eedee7 Parents: 7b06a89 Author: Wenchen Fan Authored: Tue Jul 26 15:33:05 2016 +0800 Committer: Cheng Lian Committed: Tue Jul 26 15:33:05 2016 +0800 -- .../spark/sql/catalyst/JavaTypeInference.scala | 12 +- .../spark/sql/catalyst/ScalaReflection.scala| 34 ++-- .../catalyst/expressions/objects/objects.scala | 158 ++- .../encoders/ExpressionEncoderSuite.scala | 6 + .../org/apache/spark/sql/JavaDatasetSuite.java | 58 ++- 5 files changed, 236 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6959061f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index b3a233a..e6f61b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -395,10 +395,14 @@ object JavaTypeInference { toCatalystArray(inputObject, elementType(typeToken)) case _ if mapType.isAssignableFrom(typeToken) => - // TODO: for java map, if we get the keys and values by `keySet` and `values`, we can - // not guarantee they have same iteration order(which is different from scala map). - // A possible solution is creating a new `MapObjects` that can iterate a map directly. - throw new UnsupportedOperationException("map type is not supported currently") + val (keyType, valueType) = mapKeyValueType(typeToken) + ExternalMapToCatalyst( +inputObject, +ObjectType(keyType.getRawType), +serializerFor(_, keyType), +ObjectType(valueType.getRawType), +serializerFor(_, valueType) + ) case other => val properties = getJavaBeanProperties(other) http://git-wip-us.apache.org/repos/asf/spark/blob/6959061f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 8affb03..76f87f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -472,29 +472,17 @@ object ScalaReflection extends ScalaReflection { case t if t <:< localTypeOf[Map[_, _]] => val TypeRef(_, _, Seq(keyType, valueType)) = t - -val keys = - Invoke( -Invoke(inputObject, "keysIterator", - ObjectType(classOf[scala.collection.Iterator[_]])), -"toSeq", -ObjectType(classOf[scala.collection.Seq[_]])) -val convertedKeys = toCatalystArray(keys, keyType) - -val values = - Invoke( -Invoke(inputObject, "valuesIterator", - ObjectType(classOf[scala.collection.Iterator[_]])), -"toSeq", -ObjectType(classOf[scala.collection.Seq[_]])) -val convertedValues = toCatalystArray(values, valueType) - -val Schema(keyDataType, _) = schemaFor(keyType) -val Schema(valueDataType, valueNullable) = schemaFor(valueType) -NewInstance( - classOf[ArrayBasedMapData], - convertedKeys :: convertedValues :: Nil, - dataType = MapType(keyDataType, valueDataType, valueNullable)) +val keyClsName = getClassNameFromType(keyType) +val valueClsName = getClassNameFromType(valueType) +val keyPath = s"""- map key class: "$keyClsName +: walkedTypePath +val valuePath = s"""- map value class: "$valueClsName +: walkedTypePath + +ExternalMapToCatalyst( + inputObject, + dataType