[spark] branch branch-3.0 updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 27a9c1d [SPARK-31558][UI] Code clean up in spark-sql-viz.js 27a9c1d is described below commit 27a9c1da6f9ced760c33ff624c2eadc5f6a85dd9 Author: Gengliang Wang AuthorDate: Sat Apr 25 13:43:52 2020 -0700 [SPARK-31558][UI] Code clean up in spark-sql-viz.js ### What changes were proposed in this pull request? 1. Remove console.log(), which seems unnecessary in the releases. 2. Replace the double equals to triple equals 3. Reuse jquery selector. ### Why are the changes needed? For better code quality. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Existing tests + manual test. Closes #28333 from gengliangwang/removeLog. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit f59ebdef5b9c03dab99835527554ba8bebe38548) Signed-off-by: Gengliang Wang --- .../org/apache/spark/sql/execution/ui/static/spark-sql-viz.js | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js index bded921..724cec1 100644 --- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js +++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js @@ -61,7 +61,7 @@ function planVizContainer() { return d3.select("#plan-viz-graph"); } * node, it will display the details of this SparkPlan node in the right. */ function setupTooltipForSparkPlanNode(nodeId) { - var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text() + var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text(); d3.select("svg g .node_" + nodeId) .on('mouseover', function(d) { var domNode = d3.select(this).node(); @@ -127,10 +127,8 @@ function preprocessGraphLayout(g) { */ function resizeSvg(svg) { var allClusters = svg.selectAll("g rect")[0]; - console.log(allClusters); var startX = -PlanVizConstants.svgMarginX + toFloat(d3.min(allClusters, function(e) { - console.log(e); return getAbsolutePosition(d3.select(e)).x; })); var startY = -PlanVizConstants.svgMarginY + @@ -183,7 +181,7 @@ function getAbsolutePosition(d3selection) { // Climb upwards to find how our parents are translated obj = d3.select(obj.node().parentNode); // Stop when we've reached the graph container itself -if (obj.node() == planVizContainer().node()) { +if (obj.node() === planVizContainer().node()) { break; } } @@ -215,8 +213,8 @@ function postprocessForAdditionalMetrics() { checkboxNode.click(function() { onClickAdditionalMetricsCheckbox($(this)); }); - var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") == "true"; - $("#stageId-and-taskId-checkbox").prop("checked", isChecked); + var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") === "true"; + checkboxNode.prop("checked", isChecked); onClickAdditionalMetricsCheckbox(checkboxNode); } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f59ebde [SPARK-31558][UI] Code clean up in spark-sql-viz.js f59ebde is described below commit f59ebdef5b9c03dab99835527554ba8bebe38548 Author: Gengliang Wang AuthorDate: Sat Apr 25 13:43:52 2020 -0700 [SPARK-31558][UI] Code clean up in spark-sql-viz.js ### What changes were proposed in this pull request? 1. Remove console.log(), which seems unnecessary in the releases. 2. Replace the double equals to triple equals 3. Reuse jquery selector. ### Why are the changes needed? For better code quality. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Existing tests + manual test. Closes #28333 from gengliangwang/removeLog. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../org/apache/spark/sql/execution/ui/static/spark-sql-viz.js | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js index bb393d9..301183f 100644 --- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js +++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js @@ -61,7 +61,7 @@ function planVizContainer() { return d3.select("#plan-viz-graph"); } * node, it will display the details of this SparkPlan node in the right. */ function setupTooltipForSparkPlanNode(nodeId) { - var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text() + var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text(); d3.select("svg g .node_" + nodeId) .each(function(d) { var domNode = d3.select(this).node(); @@ -122,10 +122,8 @@ function preprocessGraphLayout(g) { */ function resizeSvg(svg) { var allClusters = svg.selectAll("g rect")[0]; - console.log(allClusters); var startX = -PlanVizConstants.svgMarginX + toFloat(d3.min(allClusters, function(e) { - console.log(e); return getAbsolutePosition(d3.select(e)).x; })); var startY = -PlanVizConstants.svgMarginY + @@ -178,7 +176,7 @@ function getAbsolutePosition(d3selection) { // Climb upwards to find how our parents are translated obj = d3.select(obj.node().parentNode); // Stop when we've reached the graph container itself -if (obj.node() == planVizContainer().node()) { +if (obj.node() === planVizContainer().node()) { break; } } @@ -210,8 +208,8 @@ function postprocessForAdditionalMetrics() { checkboxNode.click(function() { onClickAdditionalMetricsCheckbox($(this)); }); - var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") == "true"; - $("#stageId-and-taskId-checkbox").prop("checked", isChecked); + var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") === "true"; + checkboxNode.prop("checked", isChecked); onClickAdditionalMetricsCheckbox(checkboxNode); } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 6c6283b [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 6c6283b is described below commit 6c6283b1e68b174b212f3352b1496ab9e92450ad Author: Kent Yao AuthorDate: Sat Apr 25 10:27:05 2020 -0700 [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 ### What changes were proposed in this pull request? Fix flakiness by checking `1970/01/01` instead of `1970`. The test was added by SPARK-27125 for 3.0.0. ### Why are the changes needed? the `org.apache.spark.sql.execution.ui.AllExecutionsPageSuite.SPARK-27019:correctly display SQL page when event reordering happens` test is flaky for just checking the `html` content not containing 1970. I will add a ticket to check and fix that. In the specific failure https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/121799/testReport, it failed because the `html` ``` ... ... ``` contained `1970`. ### Does this PR introduce any user-facing change? no ### How was this patch tested? passing jenkins Closes #28344 from yaooqinn/SPARK-31564. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun (cherry picked from commit 7959808e962584704cba1ed26a4bcece724424d5) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala index 298afa8..24b8a97 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala @@ -57,7 +57,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter { val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT) assert(html.contains("failed queries")) -assert(!html.contains("1970")) +assert(!html.contains("1970/01/01")) } test("sorting should be successful") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (7d8216a -> 7959808)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 7d8216a [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types add 7959808 [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 6c6283b [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 6c6283b is described below commit 6c6283b1e68b174b212f3352b1496ab9e92450ad Author: Kent Yao AuthorDate: Sat Apr 25 10:27:05 2020 -0700 [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 ### What changes were proposed in this pull request? Fix flakiness by checking `1970/01/01` instead of `1970`. The test was added by SPARK-27125 for 3.0.0. ### Why are the changes needed? the `org.apache.spark.sql.execution.ui.AllExecutionsPageSuite.SPARK-27019:correctly display SQL page when event reordering happens` test is flaky for just checking the `html` content not containing 1970. I will add a ticket to check and fix that. In the specific failure https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/121799/testReport, it failed because the `html` ``` ... ... ``` contained `1970`. ### Does this PR introduce any user-facing change? no ### How was this patch tested? passing jenkins Closes #28344 from yaooqinn/SPARK-31564. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun (cherry picked from commit 7959808e962584704cba1ed26a4bcece724424d5) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala index 298afa8..24b8a97 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala @@ -57,7 +57,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter { val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT) assert(html.contains("failed queries")) -assert(!html.contains("1970")) +assert(!html.contains("1970/01/01")) } test("sorting should be successful") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (7d8216a -> 7959808)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 7d8216a [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types add 7959808 [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970 No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated (dbcf855 -> eb54e6c)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git. from dbcf855 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types add eb54e6c [SPARK-31528][SQL][3.0] Remove millennium, century, decade from trunc/date_trunc fucntions No new revisions were added by this update. Summary of changes: .../catalyst/expressions/datetimeExpressions.scala | 38 ++ .../spark/sql/catalyst/util/DateTimeUtils.scala| 26 ++- .../expressions/DateExpressionsSuite.scala | 84 +++--- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 6 -- .../scala/org/apache/spark/sql/functions.scala | 4 +- .../org/apache/spark/sql/DateFunctionsSuite.scala | 20 ++ 6 files changed, 79 insertions(+), 99 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new 5e6bcca [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types 5e6bcca is described below commit 5e6bccaeb46dfe6eaf5a24d1d2cc5dd2d62418c9 Author: Max Gekk AuthorDate: Sat Apr 25 09:29:51 2020 -0700 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types In the PR, I propose to fix the `InSet.sql` method for the cases when input collection contains values of internal Catalyst's types, for instance `UTF8String`. Elements of the input set `hset` are converted to Scala types, and wrapped by `Literal` to properly form SQL view of the input collection. The changes fixed the bug in `InSet.sql` that makes wrong assumption about types of collection elements. See more details in SPARK-31563. Highly likely, not. Added a test to `ColumnExpressionSuite` Closes #28343 from MaxGekk/fix-InSet-sql. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun (cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 365fcad..a016ed6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.immutable.TreeSet +import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral, GenerateSafeProjection, GenerateUnsafeProjection, Predicate => BasePredicate} @@ -393,7 +394,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with override def sql: String = { val valueSQL = child.sql -val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ") +val listSQL = hset.toSeq + .map(elem => Literal(convertToScala(elem, child.dataType)).sql) + .mkString(", ") s"($valueSQL IN ($listSQL))" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 2917c56..a41fce9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -25,11 +25,12 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.Matchers._ -import org.apache.spark.sql.catalyst.expressions.NamedExpression +import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String class ColumnExpressionSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -819,4 +820,9 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { df.select(typedLit(("a", 2, 1.0))), Row(Row("a", 2, 1.0)) :: Nil) } + + test("SPARK-31563: sql of InSet for UTF8String collection") { +val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString)) +assert(inSet.sql === "('a' IN ('a', 'b'))") + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ab8cada -> 7d8216a)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from ab8cada [SPARK-31521][CORE] Correct the fetch size when merging blocks into a merged block add 7d8216a [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ab8cada -> 7d8216a)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from ab8cada [SPARK-31521][CORE] Correct the fetch size when merging blocks into a merged block add 7d8216a [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new dbcf855 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types dbcf855 is described below commit dbcf855d8448b7bf8ba9d8c4a08f8bef14be2805 Author: Max Gekk AuthorDate: Sat Apr 25 09:29:51 2020 -0700 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types ### What changes were proposed in this pull request? In the PR, I propose to fix the `InSet.sql` method for the cases when input collection contains values of internal Catalyst's types, for instance `UTF8String`. Elements of the input set `hset` are converted to Scala types, and wrapped by `Literal` to properly form SQL view of the input collection. ### Why are the changes needed? The changes fixed the bug in `InSet.sql` that makes wrong assumption about types of collection elements. See more details in SPARK-31563. ### Does this PR introduce any user-facing change? Highly likely, not. ### How was this patch tested? Added a test to `ColumnExpressionSuite` Closes #28343 from MaxGekk/fix-InSet-sql. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun (cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index bd190c3..ac492cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.immutable.TreeSet +import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference @@ -519,7 +520,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with override def sql: String = { val valueSQL = child.sql -val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ") +val listSQL = hset.toSeq + .map(elem => Literal(convertToScala(elem, child.dataType)).sql) + .mkString(", ") s"($valueSQL IN ($listSQL))" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index a9ee25b..b72d92b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -26,12 +26,13 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.Matchers._ -import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String class ColumnExpressionSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -869,4 +870,9 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { df.select(typedLit(("a", 2, 1.0))), Row(Row("a", 2, 1.0)) :: Nil) } + + test("SPARK-31563: sql of InSet for UTF8String collection") { +val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString)) +assert(inSet.sql === "('a' IN ('a', 'b'))") + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ab8cada -> 7d8216a)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from ab8cada [SPARK-31521][CORE] Correct the fetch size when merging blocks into a merged block add 7d8216a [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new dbcf855 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types dbcf855 is described below commit dbcf855d8448b7bf8ba9d8c4a08f8bef14be2805 Author: Max Gekk AuthorDate: Sat Apr 25 09:29:51 2020 -0700 [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types ### What changes were proposed in this pull request? In the PR, I propose to fix the `InSet.sql` method for the cases when input collection contains values of internal Catalyst's types, for instance `UTF8String`. Elements of the input set `hset` are converted to Scala types, and wrapped by `Literal` to properly form SQL view of the input collection. ### Why are the changes needed? The changes fixed the bug in `InSet.sql` that makes wrong assumption about types of collection elements. See more details in SPARK-31563. ### Does this PR introduce any user-facing change? Highly likely, not. ### How was this patch tested? Added a test to `ColumnExpressionSuite` Closes #28343 from MaxGekk/fix-InSet-sql. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun (cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 - .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index bd190c3..ac492cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import scala.collection.immutable.TreeSet +import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference @@ -519,7 +520,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with override def sql: String = { val valueSQL = child.sql -val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ") +val listSQL = hset.toSeq + .map(elem => Literal(convertToScala(elem, child.dataType)).sql) + .mkString(", ") s"($valueSQL IN ($listSQL))" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index a9ee25b..b72d92b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -26,12 +26,13 @@ import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat} import org.scalatest.Matchers._ -import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, NamedExpression} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String class ColumnExpressionSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -869,4 +870,9 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { df.select(typedLit(("a", 2, 1.0))), Row(Row("a", 2, 1.0)) :: Nil) } + + test("SPARK-31563: sql of InSet for UTF8String collection") { +val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString)) +assert(inSet.sql === "('a' IN ('a', 'b'))") + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org