spark git commit: [MINOR] Remove unused arg in als.py
Repository: spark Updated Branches: refs/heads/master 69c773052 -> e5fbb182c [MINOR] Remove unused arg in als.py ## What changes were proposed in this pull request? The second arg in method `update()` is never used. So I delete it. ## How was this patch tested? local run with `./bin/spark-submit examples/src/main/python/als.py` Author: Zheng RuiFengCloses #14247 from zhengruifeng/als_refine. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5fbb182 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5fbb182 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5fbb182 Branch: refs/heads/master Commit: e5fbb182c04be8524045fc90541497f506b42f4a Parents: 69c7730 Author: Zheng RuiFeng Authored: Mon Jul 18 22:57:13 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 22:57:13 2016 -0700 -- examples/src/main/python/als.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e5fbb182/examples/src/main/python/als.py -- diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py index 80290e7..6d32418 100755 --- a/examples/src/main/python/als.py +++ b/examples/src/main/python/als.py @@ -39,7 +39,7 @@ def rmse(R, ms, us): return np.sqrt(np.sum(np.power(diff, 2)) / (M * U)) -def update(i, vec, mat, ratings): +def update(i, mat, ratings): uu = mat.shape[0] ff = mat.shape[1] @@ -88,7 +88,7 @@ if __name__ == "__main__": for i in range(ITERATIONS): ms = sc.parallelize(range(M), partitions) \ - .map(lambda x: update(x, msb.value[x, :], usb.value, Rb.value)) \ + .map(lambda x: update(x, usb.value, Rb.value)) \ .collect() # collect() returns a list, so array ends up being # a 3-d array, we take the first 2 dims for the matrix @@ -96,7 +96,7 @@ if __name__ == "__main__": msb = sc.broadcast(ms) us = sc.parallelize(range(U), partitions) \ - .map(lambda x: update(x, usb.value[x, :], msb.value, Rb.value.T)) \ + .map(lambda x: update(x, msb.value, Rb.value.T)) \ .collect() us = matrix(np.array(us)[:, :, 0]) usb = sc.broadcast(us) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16615][SQL] Expose sqlContext in SparkSession
Repository: spark Updated Branches: refs/heads/master c4524f519 -> 69c773052 [SPARK-16615][SQL] Expose sqlContext in SparkSession ## What changes were proposed in this pull request? This patch removes the private[spark] qualifier for SparkSession.sqlContext, as discussed in http://apache-spark-developers-list.1001551.n3.nabble.com/Re-transtition-SQLContext-to-SparkSession-td18342.html ## How was this patch tested? N/A - this is a visibility change. Author: Reynold XinCloses #14252 from rxin/SPARK-16615. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69c77305 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69c77305 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69c77305 Branch: refs/heads/master Commit: 69c773052acc627eb033614797de9b913dfa35c1 Parents: c4524f5 Author: Reynold Xin Authored: Mon Jul 18 18:03:35 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 18:03:35 2016 -0700 -- sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/69c77305/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 1a40b7e..2ade36d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -115,9 +115,11 @@ class SparkSession private( /** * A wrapped version of this session in the form of a [[SQLContext]], for backward compatibility. + * + * @since 2.0.0 */ @transient - private[spark] val sqlContext: SQLContext = new SQLContext(this) + val sqlContext: SQLContext = new SQLContext(this) /** * Runtime configuration interface for Spark. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16615][SQL] Expose sqlContext in SparkSession
Repository: spark Updated Branches: refs/heads/branch-2.0 1dd152656 -> 24ea87519 [SPARK-16615][SQL] Expose sqlContext in SparkSession ## What changes were proposed in this pull request? This patch removes the private[spark] qualifier for SparkSession.sqlContext, as discussed in http://apache-spark-developers-list.1001551.n3.nabble.com/Re-transtition-SQLContext-to-SparkSession-td18342.html ## How was this patch tested? N/A - this is a visibility change. Author: Reynold XinCloses #14252 from rxin/SPARK-16615. (cherry picked from commit 69c773052acc627eb033614797de9b913dfa35c1) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24ea8751 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24ea8751 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24ea8751 Branch: refs/heads/branch-2.0 Commit: 24ea875198ffcef4a4c3ba28aba128d6d7d9a395 Parents: 1dd1526 Author: Reynold Xin Authored: Mon Jul 18 18:03:35 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 18:03:42 2016 -0700 -- sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/24ea8751/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index df0950d..946d8cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -115,9 +115,11 @@ class SparkSession private( /** * A wrapped version of this session in the form of a [[SQLContext]], for backward compatibility. + * + * @since 2.0.0 */ @transient - private[spark] val sqlContext: SQLContext = new SQLContext(this) + val sqlContext: SQLContext = new SQLContext(this) /** * Runtime configuration interface for Spark. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Fix Scala 2.10 compilation
Repository: spark Updated Branches: refs/heads/branch-2.0 aac860802 -> 1dd152656 [HOTFIX] Fix Scala 2.10 compilation (cherry picked from commit c4524f5193e1b3ce1c56c5aed126f4121ce26d23) Signed-off-by: Reynold XinProject: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1dd15265 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1dd15265 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1dd15265 Branch: refs/heads/branch-2.0 Commit: 1dd152656a56b83c6daabda22148c495357ea3e3 Parents: aac8608 Author: Reynold Xin Authored: Mon Jul 18 17:56:36 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 17:57:10 2016 -0700 -- .../org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1dd15265/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala index 698c7c3..1f5078d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala @@ -40,8 +40,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { import testImplicits._ // Used for generating new query answer files by saving - private val regenerateGoldenFiles = -Option(System.getenv("SPARK_GENERATE_GOLDEN_FILES")).contains("1") + private val regenerateGoldenFiles: Boolean = +Option(System.getenv("SPARK_GENERATE_GOLDEN_FILES")) == Some("1") private val goldenSQLPath = "src/test/resources/sqlgen/" protected override def beforeAll(): Unit = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Fix Scala 2.10 compilation
Repository: spark Updated Branches: refs/heads/master ea78edb80 -> c4524f519 [HOTFIX] Fix Scala 2.10 compilation Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4524f51 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4524f51 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4524f51 Branch: refs/heads/master Commit: c4524f5193e1b3ce1c56c5aed126f4121ce26d23 Parents: ea78edb Author: Reynold XinAuthored: Mon Jul 18 17:56:36 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 17:56:36 2016 -0700 -- .../org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c4524f51/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala index 698c7c3..1f5078d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala @@ -40,8 +40,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { import testImplicits._ // Used for generating new query answer files by saving - private val regenerateGoldenFiles = -Option(System.getenv("SPARK_GENERATE_GOLDEN_FILES")).contains("1") + private val regenerateGoldenFiles: Boolean = +Option(System.getenv("SPARK_GENERATE_GOLDEN_FILES")) == Some("1") private val goldenSQLPath = "src/test/resources/sqlgen/" protected override def beforeAll(): Unit = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly
Repository: spark Updated Branches: refs/heads/master 75f0efe74 -> ea78edb80 http://git-wip-us.apache.org/repos/asf/spark/blob/ea78edb8/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql new file mode 100644 index 000..dd62289 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql @@ -0,0 +1,6 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +USING 'cat' AS (tKey) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey` FROM (SELECT TRANSFORM (`gen_attr`) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' USING 'cat' AS (`gen_attr` string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/ea78edb8/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql new file mode 100644 index 000..2ad3698 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql @@ -0,0 +1,10 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key, value) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES('field.delim' = '|') +USING 'cat' AS (tKey, tValue) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES('field.delim' = '|') +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey`, `gen_attr` AS `tValue` FROM (SELECT TRANSFORM (`gen_attr`, `gen_attr`) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES('field.delim' = '|') USING 'cat' AS (`gen_attr` string, `gen_attr` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES('field.delim' = '|') FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/ea78edb8/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql new file mode 100644 index 000..a90b42d --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql @@ -0,0 +1,8 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key, value) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +USING 'cat' AS (tKey, tValue) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey`, `gen_attr` AS `tValue` FROM (SELECT TRANSFORM (`gen_attr`, `gen_attr`) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' USING 'cat' AS (`gen_attr` string, `gen_attr` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/ea78edb8/sql/hive/src/test/resources/sqlgen/select_distinct.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/select_distinct.sql b/sql/hive/src/test/resources/sqlgen/select_distinct.sql new file mode 100644 index 000..3bc8e55 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/select_distinct.sql @@ -0,0 +1,4 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT DISTINCT id FROM parquet_t0 + +SELECT `gen_attr` AS `id` FROM (SELECT DISTINCT `gen_attr` FROM (SELECT `id` AS `gen_attr` FROM `default`.`parquet_t0`) AS gen_subquery_0) AS parquet_t0
[2/2] spark git commit: [SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly
[SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly ## What changes were proposed in this pull request? This PR improves `LogicalPlanToSQLSuite` to check the generated SQL directly by **structure**. So far, `LogicalPlanToSQLSuite` relies on `checkHiveQl` to ensure the **successful SQL generation** and **answer equality**. However, it does not guarantee the generated SQL is the same or will not be changed unnoticeably. ## How was this patch tested? Pass the Jenkins. This is only a testsuite change. Author: Dongjoon HyunCloses #14235 from dongjoon-hyun/SPARK-16590. (cherry picked from commit ea78edb80bf46e925d53e2aec29666c4eeb66188) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aac86080 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aac86080 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aac86080 Branch: refs/heads/branch-2.0 Commit: aac860802efbae2c61387dbcb8989b3b226a57ff Parents: 7889585 Author: Dongjoon Hyun Authored: Mon Jul 18 17:17:37 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 17:17:44 2016 -0700 -- sql/hive/src/test/resources/sqlgen/agg1.sql | 4 + sql/hive/src/test/resources/sqlgen/agg2.sql | 4 + sql/hive/src/test/resources/sqlgen/agg3.sql | 4 + .../sqlgen/aggregate_functions_and_window.sql | 4 + sql/hive/src/test/resources/sqlgen/case.sql | 4 + .../test/resources/sqlgen/case_with_else.sql| 4 + .../src/test/resources/sqlgen/case_with_key.sql | 4 + .../resources/sqlgen/case_with_key_and_else.sql | 4 + .../src/test/resources/sqlgen/cluster_by.sql| 4 + .../sqlgen/data_source_json_parquet_t0.sql | 4 + .../sqlgen/data_source_orc_parquet_t0.sql | 4 + .../sqlgen/data_source_parquet_parquet_t0.sql | 4 + .../resources/sqlgen/distinct_aggregation.sql | 4 + .../src/test/resources/sqlgen/distribute_by.sql | 4 + .../sqlgen/distribute_by_with_sort_by.sql | 4 + sql/hive/src/test/resources/sqlgen/except.sql | 4 + .../resources/sqlgen/filter_after_subquery.sql | 4 + .../resources/sqlgen/generate_with_other_1.sql | 8 + .../resources/sqlgen/generate_with_other_2.sql | 10 + .../sqlgen/generator_in_lateral_view_1.sql | 4 + .../sqlgen/generator_in_lateral_view_2.sql | 4 + .../sqlgen/generator_non_referenced_table_1.sql | 4 + .../sqlgen/generator_non_referenced_table_2.sql | 4 + .../resources/sqlgen/generator_non_udtf_1.sql | 4 + .../resources/sqlgen/generator_non_udtf_2.sql | 4 + .../sqlgen/generator_referenced_table_1.sql | 4 + .../sqlgen/generator_referenced_table_2.sql | 4 + .../sqlgen/generator_with_ambiguous_names_1.sql | 6 + .../sqlgen/generator_with_ambiguous_names_2.sql | 6 + .../sqlgen/generator_without_from_1.sql | 4 + .../sqlgen/generator_without_from_2.sql | 4 + .../test/resources/sqlgen/grouping_sets_1.sql | 6 + .../test/resources/sqlgen/grouping_sets_2_1.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_2.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_3.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_4.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_5.sql | 5 + sql/hive/src/test/resources/sqlgen/in.sql | 4 + .../src/test/resources/sqlgen/intersect.sql | 4 + .../src/test/resources/sqlgen/join_2_tables.sql | 7 + .../resources/sqlgen/json_tuple_generator_1.sql | 6 + .../resources/sqlgen/json_tuple_generator_2.sql | 6 + .../test/resources/sqlgen/multi_distinct.sql| 4 + .../nested_generator_in_lateral_view_1.sql | 7 + .../nested_generator_in_lateral_view_2.sql | 7 + sql/hive/src/test/resources/sqlgen/not_in.sql | 4 + sql/hive/src/test/resources/sqlgen/not_like.sql | 4 + .../sqlgen/regular_expressions_and_window.sql | 4 + .../test/resources/sqlgen/rollup_cube_1_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_1_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_2_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_2_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_3_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_3_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_4_1.sql | 5 + .../test/resources/sqlgen/rollup_cube_4_2.sql | 5 + .../test/resources/sqlgen/rollup_cube_5_1.sql | 6 + .../test/resources/sqlgen/rollup_cube_5_2.sql | 6 + .../test/resources/sqlgen/rollup_cube_6_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_3.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_4.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_5.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_6.sql | 4
[1/2] spark git commit: [SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly
Repository: spark Updated Branches: refs/heads/branch-2.0 7889585cc -> aac860802 http://git-wip-us.apache.org/repos/asf/spark/blob/aac86080/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql new file mode 100644 index 000..dd62289 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_one.sql @@ -0,0 +1,6 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +USING 'cat' AS (tKey) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey` FROM (SELECT TRANSFORM (`gen_attr`) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' USING 'cat' AS (`gen_attr` string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/aac86080/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql new file mode 100644 index 000..2ad3698 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_serde.sql @@ -0,0 +1,10 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key, value) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES('field.delim' = '|') +USING 'cat' AS (tKey, tValue) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES('field.delim' = '|') +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey`, `gen_attr` AS `tValue` FROM (SELECT TRANSFORM (`gen_attr`, `gen_attr`) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES('field.delim' = '|') USING 'cat' AS (`gen_attr` string, `gen_attr` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES('field.delim' = '|') FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/aac86080/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql new file mode 100644 index 000..a90b42d --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/script_transformation_row_format_without_serde.sql @@ -0,0 +1,8 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT TRANSFORM (key, value) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +USING 'cat' AS (tKey, tValue) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +FROM parquet_t1 + +SELECT `gen_attr` AS `tKey`, `gen_attr` AS `tValue` FROM (SELECT TRANSFORM (`gen_attr`, `gen_attr`) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' USING 'cat' AS (`gen_attr` string, `gen_attr` string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM (SELECT `key` AS `gen_attr`, `value` AS `gen_attr` FROM `default`.`parquet_t1`) AS gen_subquery_0) AS gen_subquery_1 http://git-wip-us.apache.org/repos/asf/spark/blob/aac86080/sql/hive/src/test/resources/sqlgen/select_distinct.sql -- diff --git a/sql/hive/src/test/resources/sqlgen/select_distinct.sql b/sql/hive/src/test/resources/sqlgen/select_distinct.sql new file mode 100644 index 000..3bc8e55 --- /dev/null +++ b/sql/hive/src/test/resources/sqlgen/select_distinct.sql @@ -0,0 +1,4 @@ +-- This file is automatically generated by LogicalPlanToSQLSuite. +SELECT DISTINCT id FROM parquet_t0 + +SELECT `gen_attr` AS `id` FROM (SELECT DISTINCT `gen_attr` FROM (SELECT `id` AS `gen_attr` FROM `default`.`parquet_t0`) AS gen_subquery_0) AS parquet_t0
[2/2] spark git commit: [SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly
[SPARK-16590][SQL] Improve LogicalPlanToSQLSuite to check generated SQL directly ## What changes were proposed in this pull request? This PR improves `LogicalPlanToSQLSuite` to check the generated SQL directly by **structure**. So far, `LogicalPlanToSQLSuite` relies on `checkHiveQl` to ensure the **successful SQL generation** and **answer equality**. However, it does not guarantee the generated SQL is the same or will not be changed unnoticeably. ## How was this patch tested? Pass the Jenkins. This is only a testsuite change. Author: Dongjoon HyunCloses #14235 from dongjoon-hyun/SPARK-16590. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea78edb8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea78edb8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea78edb8 Branch: refs/heads/master Commit: ea78edb80bf46e925d53e2aec29666c4eeb66188 Parents: 75f0efe Author: Dongjoon Hyun Authored: Mon Jul 18 17:17:37 2016 -0700 Committer: Reynold Xin Committed: Mon Jul 18 17:17:37 2016 -0700 -- sql/hive/src/test/resources/sqlgen/agg1.sql | 4 + sql/hive/src/test/resources/sqlgen/agg2.sql | 4 + sql/hive/src/test/resources/sqlgen/agg3.sql | 4 + .../sqlgen/aggregate_functions_and_window.sql | 4 + sql/hive/src/test/resources/sqlgen/case.sql | 4 + .../test/resources/sqlgen/case_with_else.sql| 4 + .../src/test/resources/sqlgen/case_with_key.sql | 4 + .../resources/sqlgen/case_with_key_and_else.sql | 4 + .../src/test/resources/sqlgen/cluster_by.sql| 4 + .../sqlgen/data_source_json_parquet_t0.sql | 4 + .../sqlgen/data_source_orc_parquet_t0.sql | 4 + .../sqlgen/data_source_parquet_parquet_t0.sql | 4 + .../resources/sqlgen/distinct_aggregation.sql | 4 + .../src/test/resources/sqlgen/distribute_by.sql | 4 + .../sqlgen/distribute_by_with_sort_by.sql | 4 + sql/hive/src/test/resources/sqlgen/except.sql | 4 + .../resources/sqlgen/filter_after_subquery.sql | 4 + .../resources/sqlgen/generate_with_other_1.sql | 8 + .../resources/sqlgen/generate_with_other_2.sql | 10 + .../sqlgen/generator_in_lateral_view_1.sql | 4 + .../sqlgen/generator_in_lateral_view_2.sql | 4 + .../sqlgen/generator_non_referenced_table_1.sql | 4 + .../sqlgen/generator_non_referenced_table_2.sql | 4 + .../resources/sqlgen/generator_non_udtf_1.sql | 4 + .../resources/sqlgen/generator_non_udtf_2.sql | 4 + .../sqlgen/generator_referenced_table_1.sql | 4 + .../sqlgen/generator_referenced_table_2.sql | 4 + .../sqlgen/generator_with_ambiguous_names_1.sql | 6 + .../sqlgen/generator_with_ambiguous_names_2.sql | 6 + .../sqlgen/generator_without_from_1.sql | 4 + .../sqlgen/generator_without_from_2.sql | 4 + .../test/resources/sqlgen/grouping_sets_1.sql | 6 + .../test/resources/sqlgen/grouping_sets_2_1.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_2.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_3.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_4.sql | 4 + .../test/resources/sqlgen/grouping_sets_2_5.sql | 5 + sql/hive/src/test/resources/sqlgen/in.sql | 4 + .../src/test/resources/sqlgen/intersect.sql | 4 + .../src/test/resources/sqlgen/join_2_tables.sql | 7 + .../resources/sqlgen/json_tuple_generator_1.sql | 6 + .../resources/sqlgen/json_tuple_generator_2.sql | 6 + .../test/resources/sqlgen/multi_distinct.sql| 4 + .../nested_generator_in_lateral_view_1.sql | 7 + .../nested_generator_in_lateral_view_2.sql | 7 + sql/hive/src/test/resources/sqlgen/not_in.sql | 4 + sql/hive/src/test/resources/sqlgen/not_like.sql | 4 + .../sqlgen/regular_expressions_and_window.sql | 4 + .../test/resources/sqlgen/rollup_cube_1_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_1_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_2_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_2_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_3_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_3_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_4_1.sql | 5 + .../test/resources/sqlgen/rollup_cube_4_2.sql | 5 + .../test/resources/sqlgen/rollup_cube_5_1.sql | 6 + .../test/resources/sqlgen/rollup_cube_5_2.sql | 6 + .../test/resources/sqlgen/rollup_cube_6_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_2.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_3.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_4.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_5.sql | 4 + .../test/resources/sqlgen/rollup_cube_6_6.sql | 4 + .../test/resources/sqlgen/rollup_cube_7_1.sql | 4 + .../test/resources/sqlgen/rollup_cube_7_2.sql | 4 +
spark git commit: [SPARKR][DOCS] minor code sample update in R programming guide
Repository: spark Updated Branches: refs/heads/branch-2.0 33d92f7f3 -> 7889585cc [SPARKR][DOCS] minor code sample update in R programming guide ## What changes were proposed in this pull request? Fix code style from ad hoc review of RC4 doc ## How was this patch tested? manual shivaram Author: Felix CheungCloses #14250 from felixcheung/rdocs2rc4. (cherry picked from commit 75f0efe74d0c9a7acb525339c5184b99fee4dafc) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7889585c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7889585c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7889585c Branch: refs/heads/branch-2.0 Commit: 7889585ccb9b99eb9dc3a80b8381ae8d2329e26c Parents: 33d92f7 Author: Felix Cheung Authored: Mon Jul 18 16:01:57 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jul 18 16:02:33 2016 -0700 -- docs/sparkr.md | 4 ++-- examples/src/main/r/RSparkSQLExample.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7889585c/docs/sparkr.md -- diff --git a/docs/sparkr.md b/docs/sparkr.md index a5235b2..dfa5278 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -54,7 +54,7 @@ if (nchar(Sys.getenv("SPARK_HOME")) < 1) { Sys.setenv(SPARK_HOME = "/home/spark") } library(SparkR, lib.loc = c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"))) -sc <- sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory="2g")) +sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory = "2g")) {% endhighlight %} @@ -115,7 +115,7 @@ specifying `--packages` with `spark-submit` or `sparkR` commands, or if initiali {% highlight r %} -sc <- sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0") +sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0") {% endhighlight %} http://git-wip-us.apache.org/repos/asf/spark/blob/7889585c/examples/src/main/r/RSparkSQLExample.R -- diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R index f20875c..33e88e1 100644 --- a/examples/src/main/r/RSparkSQLExample.R +++ b/examples/src/main/r/RSparkSQLExample.R @@ -144,7 +144,7 @@ write.df(df1, "data/test_table/key=1", "parquet", "overwrite") write.df(df2, "data/test_table/key=2", "parquet", "overwrite") # Read the partitioned table -df3 <- read.df("data/test_table", "parquet", mergeSchema="true") +df3 <- read.df("data/test_table", "parquet", mergeSchema = "true") printSchema(df3) # The final schema consists of all 3 columns in the Parquet files together - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][DOCS] minor code sample update in R programming guide
Repository: spark Updated Branches: refs/heads/master 96e9afaae -> 75f0efe74 [SPARKR][DOCS] minor code sample update in R programming guide ## What changes were proposed in this pull request? Fix code style from ad hoc review of RC4 doc ## How was this patch tested? manual shivaram Author: Felix CheungCloses #14250 from felixcheung/rdocs2rc4. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/75f0efe7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/75f0efe7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/75f0efe7 Branch: refs/heads/master Commit: 75f0efe74d0c9a7acb525339c5184b99fee4dafc Parents: 96e9afa Author: Felix Cheung Authored: Mon Jul 18 16:01:57 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jul 18 16:01:57 2016 -0700 -- docs/sparkr.md | 4 ++-- examples/src/main/r/RSparkSQLExample.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/75f0efe7/docs/sparkr.md -- diff --git a/docs/sparkr.md b/docs/sparkr.md index a5235b2..dfa5278 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -54,7 +54,7 @@ if (nchar(Sys.getenv("SPARK_HOME")) < 1) { Sys.setenv(SPARK_HOME = "/home/spark") } library(SparkR, lib.loc = c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"))) -sc <- sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory="2g")) +sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory = "2g")) {% endhighlight %} @@ -115,7 +115,7 @@ specifying `--packages` with `spark-submit` or `sparkR` commands, or if initiali {% highlight r %} -sc <- sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0") +sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0") {% endhighlight %} http://git-wip-us.apache.org/repos/asf/spark/blob/75f0efe7/examples/src/main/r/RSparkSQLExample.R -- diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R index f20875c..33e88e1 100644 --- a/examples/src/main/r/RSparkSQLExample.R +++ b/examples/src/main/r/RSparkSQLExample.R @@ -144,7 +144,7 @@ write.df(df1, "data/test_table/key=1", "parquet", "overwrite") write.df(df2, "data/test_table/key=2", "parquet", "overwrite") # Read the partitioned table -df3 <- read.df("data/test_table", "parquet", mergeSchema="true") +df3 <- read.df("data/test_table", "parquet", mergeSchema = "true") printSchema(df3) # The final schema consists of all 3 columns in the Parquet files together - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16515][SQL] set default record reader and writer for script transformation
Repository: spark Updated Branches: refs/heads/branch-2.0 085f3cc85 -> 33d92f7f3 [SPARK-16515][SQL] set default record reader and writer for script transformation ## What changes were proposed in this pull request? In ScriptInputOutputSchema, we read default RecordReader and RecordWriter from conf. Since Spark 2.0 has deleted those config keys from hive conf, we have to set default reader/writer class name by ourselves. Otherwise we will get None for LazySimpleSerde, the data written would not be able to read by script. The test case added worked fine with previous version of Spark, but would fail now. ## How was this patch tested? added a test case in SQLQuerySuite. Closes #14169 Author: Daoyuan WangAuthor: Yin Huai Closes #14249 from yhuai/scriptTransformation. (cherry picked from commit 96e9afaae93318250334211cc80ed0fee3d055b9) Signed-off-by: Yin Huai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/33d92f7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/33d92f7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/33d92f7f Branch: refs/heads/branch-2.0 Commit: 33d92f7f39136bd399e1f7cabd264e7eeca9b958 Parents: 085f3cc Author: Daoyuan Wang Authored: Mon Jul 18 13:58:12 2016 -0700 Committer: Yin Huai Committed: Mon Jul 18 13:58:56 2016 -0700 -- .../spark/sql/execution/SparkSqlParser.scala| 16 +- sql/hive/src/test/resources/test_script.sh | 23 .../sql/hive/execution/SQLQuerySuite.scala | 11 ++ 3 files changed, 45 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/33d92f7f/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 42ec210..3573a86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1315,7 +1315,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // Decode and input/output format. type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String]) -def format(fmt: RowFormatContext, configKey: String): Format = fmt match { +def format( +fmt: RowFormatContext, +configKey: String, +defaultConfigValue: String): Format = fmt match { case c: RowFormatDelimitedContext => // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema // expects a seq of pairs in which the old parsers' token names are used as keys. @@ -1338,7 +1341,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // SPARK-10310: Special cases LazySimpleSerDe val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { - Try(conf.getConfString(configKey)).toOption + Option(conf.getConfString(configKey, defaultConfigValue)) } else { None } @@ -1349,15 +1352,18 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { val name = conf.getConfString("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") val props = Seq("field.delim" -> "\t") -val recordHandler = Try(conf.getConfString(configKey)).toOption +val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) (Nil, Option(name), props, recordHandler) } val (inFormat, inSerdeClass, inSerdeProps, reader) = - format(inRowFormat, "hive.script.recordreader") + format( +inRowFormat, "hive.script.recordreader", "org.apache.hadoop.hive.ql.exec.TextRecordReader") val (outFormat, outSerdeClass, outSerdeProps, writer) = - format(outRowFormat, "hive.script.recordwriter") + format( +outRowFormat, "hive.script.recordwriter", +"org.apache.hadoop.hive.ql.exec.TextRecordWriter") ScriptInputOutputSchema( inFormat, outFormat, http://git-wip-us.apache.org/repos/asf/spark/blob/33d92f7f/sql/hive/src/test/resources/test_script.sh -- diff --git a/sql/hive/src/test/resources/test_script.sh b/sql/hive/src/test/resources/test_script.sh new file mode 100755 index 000..ab998c4 --- /dev/null +++ b/sql/hive/src/test/resources/test_script.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# +#
spark git commit: [SPARK-16515][SQL] set default record reader and writer for script transformation
Repository: spark Updated Branches: refs/heads/master 2877f1a52 -> 96e9afaae [SPARK-16515][SQL] set default record reader and writer for script transformation ## What changes were proposed in this pull request? In ScriptInputOutputSchema, we read default RecordReader and RecordWriter from conf. Since Spark 2.0 has deleted those config keys from hive conf, we have to set default reader/writer class name by ourselves. Otherwise we will get None for LazySimpleSerde, the data written would not be able to read by script. The test case added worked fine with previous version of Spark, but would fail now. ## How was this patch tested? added a test case in SQLQuerySuite. Closes #14169 Author: Daoyuan WangAuthor: Yin Huai Closes #14249 from yhuai/scriptTransformation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96e9afaa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96e9afaa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96e9afaa Branch: refs/heads/master Commit: 96e9afaae93318250334211cc80ed0fee3d055b9 Parents: 2877f1a Author: Daoyuan Wang Authored: Mon Jul 18 13:58:12 2016 -0700 Committer: Yin Huai Committed: Mon Jul 18 13:58:12 2016 -0700 -- .../spark/sql/execution/SparkSqlParser.scala| 16 +- sql/hive/src/test/resources/test_script.sh | 23 .../sql/hive/execution/SQLQuerySuite.scala | 11 ++ 3 files changed, 45 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/96e9afaa/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index c5f4d58..fa4ccf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1325,7 +1325,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // Decode and input/output format. type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String]) -def format(fmt: RowFormatContext, configKey: String): Format = fmt match { +def format( +fmt: RowFormatContext, +configKey: String, +defaultConfigValue: String): Format = fmt match { case c: RowFormatDelimitedContext => // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema // expects a seq of pairs in which the old parsers' token names are used as keys. @@ -1348,7 +1351,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // SPARK-10310: Special cases LazySimpleSerDe val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { - Try(conf.getConfString(configKey)).toOption + Option(conf.getConfString(configKey, defaultConfigValue)) } else { None } @@ -1359,15 +1362,18 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { val name = conf.getConfString("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") val props = Seq("field.delim" -> "\t") -val recordHandler = Try(conf.getConfString(configKey)).toOption +val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue)) (Nil, Option(name), props, recordHandler) } val (inFormat, inSerdeClass, inSerdeProps, reader) = - format(inRowFormat, "hive.script.recordreader") + format( +inRowFormat, "hive.script.recordreader", "org.apache.hadoop.hive.ql.exec.TextRecordReader") val (outFormat, outSerdeClass, outSerdeProps, writer) = - format(outRowFormat, "hive.script.recordwriter") + format( +outRowFormat, "hive.script.recordwriter", +"org.apache.hadoop.hive.ql.exec.TextRecordWriter") ScriptInputOutputSchema( inFormat, outFormat, http://git-wip-us.apache.org/repos/asf/spark/blob/96e9afaa/sql/hive/src/test/resources/test_script.sh -- diff --git a/sql/hive/src/test/resources/test_script.sh b/sql/hive/src/test/resources/test_script.sh new file mode 100755 index 000..ab998c4 --- /dev/null +++ b/sql/hive/src/test/resources/test_script.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file
spark git commit: [SPARK-16351][SQL] Avoid per-record type dispatch in JSON when writing
Repository: spark Updated Branches: refs/heads/master 8ea3f4eae -> 2877f1a52 [SPARK-16351][SQL] Avoid per-record type dispatch in JSON when writing ## What changes were proposed in this pull request? Currently, `JacksonGenerator.apply` is doing type-based dispatch for each row to write appropriate values. It might not have to be done like this because the schema is already kept. So, appropriate writers can be created first according to the schema once, and then apply them to each row. This approach is similar with `CatalystWriteSupport`. This PR corrects `JacksonGenerator` so that it creates all writers for the schema once and then applies them to each row rather than type dispatching for every row. Benchmark was proceeded with the codes below: ```scala test("Benchmark for JSON writer") { val N = 500 << 8 val row = """{"struct":{"field1": true, "field2": 92233720368547758070}, "structWithArrayFields":{"field1":[4, 5, 6], "field2":["str1", "str2"]}, "arrayOfString":["str1", "str2"], "arrayOfInteger":[1, 2147483647, -2147483648], "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808], "arrayOfBigInteger":[922337203685477580700, -922337203685477580800], "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308], "arrayOfBoolean":[true, false, true], "arrayOfNull":[null, null, null, null], "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}], "arrayOfArray1":[[1, 2, 3], ["str1", "str2"]], "arrayOfArray2":[[1, 2, 3], [1.1, 2.1, 3.1]] }""" val df = spark.sqlContext.read.json(spark.sparkContext.parallelize(List.fill(N)(row))) val benchmark = new Benchmark("JSON writer", N) benchmark.addCase("writing JSON file", 10) { _ => withTempPath { path => df.write.format("json").save(path.getCanonicalPath) } } benchmark.run() } ``` This produced the results below - **Before** ``` JSON writer: Best/Avg Time(ms)Rate(M/s) Per Row(ns) Relative writing JSON file 1675 / 1767 0.1 13087.5 1.0X ``` - **After** ``` JSON writer: Best/Avg Time(ms)Rate(M/s) Per Row(ns) Relative writing JSON file 1597 / 1686 0.1 12477.1 1.0X ``` In addition, I ran this benchmark 10 times for each and calculated the average elapsed time as below: | **Before** | **After**| |---|| |17478ms |16669ms | It seems roughly ~5% is improved. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwonCloses #14028 from HyukjinKwon/SPARK-16351. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2877f1a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2877f1a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2877f1a5 Branch: refs/heads/master Commit: 2877f1a5224c38c1fa0b85ef633ff935fae9dd83 Parents: 8ea3f4e Author: hyukjinkwon Authored: Mon Jul 18 09:49:14 2016 -0700 Committer: Yin Huai Committed: Mon Jul 18 09:49:14 2016 -0700 -- .../scala/org/apache/spark/sql/Dataset.scala| 4 +- .../datasources/json/JacksonGenerator.scala | 218 ++- .../datasources/json/JsonFileFormat.scala | 5 +- .../execution/datasources/json/JsonSuite.scala | 3 - 4 files changed, 163 insertions(+), 67 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2877f1a5/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ed4ccdb..b28ecb7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2489,12 +2489,12 @@ class Dataset[T] private[sql]( val rdd: RDD[String] = queryExecution.toRdd.mapPartitions { iter => val writer = new CharArrayWriter() // create the Generator without separator inserted between 2 records - val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + val gen = new JacksonGenerator(rowSchema, writer) new Iterator[String] { override def hasNext: Boolean = iter.hasNext override def next(): String = { - JacksonGenerator(rowSchema, gen)(iter.next())
spark git commit: [SPARK-16055][SPARKR] warning added while using sparkPackages with spark-submit
Repository: spark Updated Branches: refs/heads/master a529fc944 -> 8ea3f4eae [SPARK-16055][SPARKR] warning added while using sparkPackages with spark-submit ## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-16055 sparkPackages - argument is passed and we detect that we are in the R script mode, we should print some warning like --packages flag should be used with with spark-submit ## How was this patch tested? In my system locally Author: krishnakalyan3Closes #14179 from krishnakalyan3/spark-pkg. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8ea3f4ea Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8ea3f4ea Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8ea3f4ea Branch: refs/heads/master Commit: 8ea3f4eaec65ee4277f9943063fcc9488d3fa924 Parents: a529fc9 Author: krishnakalyan3 Authored: Mon Jul 18 09:46:23 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jul 18 09:46:23 2016 -0700 -- R/pkg/R/sparkR.R | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8ea3f4ea/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 62659b0..ff5297f 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -155,6 +155,10 @@ sparkR.sparkContext <- function( existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "") if (existingPort != "") { +if (length(packages) != 0) { + warning(paste("sparkPackages has no effect when using spark-submit or sparkR shell", +" please use the --packages commandline instead", sep = ",")) +} backendPort <- existingPort } else { path <- tempfile(pattern = "backend_port") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16055][SPARKR] warning added while using sparkPackages with spark-submit
Repository: spark Updated Branches: refs/heads/branch-2.0 2365d6352 -> 085f3cc85 [SPARK-16055][SPARKR] warning added while using sparkPackages with spark-submit ## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-16055 sparkPackages - argument is passed and we detect that we are in the R script mode, we should print some warning like --packages flag should be used with with spark-submit ## How was this patch tested? In my system locally Author: krishnakalyan3Closes #14179 from krishnakalyan3/spark-pkg. (cherry picked from commit 8ea3f4eaec65ee4277f9943063fcc9488d3fa924) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/085f3cc8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/085f3cc8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/085f3cc8 Branch: refs/heads/branch-2.0 Commit: 085f3cc855c091f0e5ea2699ddc9790e0b4cd83a Parents: 2365d63 Author: krishnakalyan3 Authored: Mon Jul 18 09:46:23 2016 -0700 Committer: Shivaram Venkataraman Committed: Mon Jul 18 09:46:47 2016 -0700 -- R/pkg/R/sparkR.R | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/085f3cc8/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 62659b0..ff5297f 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -155,6 +155,10 @@ sparkR.sparkContext <- function( existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "") if (existingPort != "") { +if (length(packages) != 0) { + warning(paste("sparkPackages has no effect when using spark-submit or sparkR shell", +" please use the --packages commandline instead", sep = ",")) +} backendPort <- existingPort } else { path <- tempfile(pattern = "backend_port") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][TYPO] fix fininsh typo
Repository: spark Updated Branches: refs/heads/branch-2.0 808d69aaa -> 2365d6352 [MINOR][TYPO] fix fininsh typo ## What changes were proposed in this pull request? fininsh => finish ## How was this patch tested? N/A Author: WeichenXuCloses #14238 from WeichenXu123/fix_fininsh_typo. (cherry picked from commit a529fc944209e7255ec5858b33490212884d6c60) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2365d635 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2365d635 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2365d635 Branch: refs/heads/branch-2.0 Commit: 2365d635262c42f2c60633d75abb3eddd251b40f Parents: 808d69a Author: WeichenXu Authored: Mon Jul 18 09:11:53 2016 +0100 Committer: Sean Owen Committed: Mon Jul 18 09:12:03 2016 +0100 -- .../org/apache/spark/ml/classification/LogisticRegression.scala| 2 +- .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala | 2 +- .../scala/org/apache/spark/ml/regression/LinearRegression.scala| 2 +- .../src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2365d635/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 1fed5fd..91eee0e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -425,7 +425,7 @@ class LogisticRegression @Since("1.2.0") ( } if (!state.actuallyConverged) { - logWarning("LogisticRegression training fininshed but the result " + + logWarning("LogisticRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/2365d635/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 366448f..700a92c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -246,7 +246,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S } if (!state.actuallyConverged) { -logWarning("AFTSurvivalRegression training fininshed but the result " + +logWarning("AFTSurvivalRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/2365d635/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index c57e9eb..401f2c6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -326,7 +326,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String } if (!state.actuallyConverged) { -logWarning("LinearRegression training fininshed but the result " + +logWarning("LinearRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/2365d635/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index c61b2db..fd09f35 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -214,7 +214,7 @@ object LBFGS extends Logging { lossHistory += state.value
spark git commit: [MINOR][TYPO] fix fininsh typo
Repository: spark Updated Branches: refs/heads/master 480c87064 -> a529fc944 [MINOR][TYPO] fix fininsh typo ## What changes were proposed in this pull request? fininsh => finish ## How was this patch tested? N/A Author: WeichenXuCloses #14238 from WeichenXu123/fix_fininsh_typo. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a529fc94 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a529fc94 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a529fc94 Branch: refs/heads/master Commit: a529fc944209e7255ec5858b33490212884d6c60 Parents: 480c870 Author: WeichenXu Authored: Mon Jul 18 09:11:53 2016 +0100 Committer: Sean Owen Committed: Mon Jul 18 09:11:53 2016 +0100 -- .../org/apache/spark/ml/classification/LogisticRegression.scala| 2 +- .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala | 2 +- .../scala/org/apache/spark/ml/regression/LinearRegression.scala| 2 +- .../src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a529fc94/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 1fed5fd..91eee0e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -425,7 +425,7 @@ class LogisticRegression @Since("1.2.0") ( } if (!state.actuallyConverged) { - logWarning("LogisticRegression training fininshed but the result " + + logWarning("LogisticRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/a529fc94/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 366448f..700a92c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -246,7 +246,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S } if (!state.actuallyConverged) { -logWarning("AFTSurvivalRegression training fininshed but the result " + +logWarning("AFTSurvivalRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/a529fc94/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index c57e9eb..401f2c6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -326,7 +326,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String } if (!state.actuallyConverged) { -logWarning("LinearRegression training fininshed but the result " + +logWarning("LinearRegression training finished but the result " + s"is not converged because: ${state.convergedReason.get.reason}") } http://git-wip-us.apache.org/repos/asf/spark/blob/a529fc94/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index c61b2db..fd09f35 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -214,7 +214,7 @@ object LBFGS extends Logging { lossHistory += state.value if (!state.actuallyConverged) { - logWarning("LBFGS training fininshed but the result " + + logWarning("LBFGS