date:20200425

[spark] branch branch-3.0 updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js

2020-04-25 Thread gengliang

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 27a9c1d  [SPARK-31558][UI] Code clean up in spark-sql-viz.js
27a9c1d is described below

commit 27a9c1da6f9ced760c33ff624c2eadc5f6a85dd9
Author: Gengliang Wang 
AuthorDate: Sat Apr 25 13:43:52 2020 -0700

[SPARK-31558][UI] Code clean up in spark-sql-viz.js

### What changes were proposed in this pull request?

1. Remove console.log(), which seems unnecessary in the releases.
2. Replace the double equals to triple equals
3. Reuse jquery selector.

### Why are the changes needed?

For better code quality.

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Existing tests + manual test.

Closes #28333 from gengliangwang/removeLog.

Authored-by: Gengliang Wang 
Signed-off-by: Gengliang Wang 
(cherry picked from commit f59ebdef5b9c03dab99835527554ba8bebe38548)
Signed-off-by: Gengliang Wang 
---
 .../org/apache/spark/sql/execution/ui/static/spark-sql-viz.js  | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git 
a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
 
b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index bded921..724cec1 100644
--- 
a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ 
b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -61,7 +61,7 @@ function planVizContainer() { return 
d3.select("#plan-viz-graph"); }
  * node, it will display the details of this SparkPlan node in the right.
  */
 function setupTooltipForSparkPlanNode(nodeId) {
-  var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text()
+  var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text();
   d3.select("svg g .node_" + nodeId)
 .on('mouseover', function(d) {
   var domNode = d3.select(this).node();
@@ -127,10 +127,8 @@ function preprocessGraphLayout(g) {
  */
 function resizeSvg(svg) {
   var allClusters = svg.selectAll("g rect")[0];
-  console.log(allClusters);
   var startX = -PlanVizConstants.svgMarginX +
 toFloat(d3.min(allClusters, function(e) {
-  console.log(e);
   return getAbsolutePosition(d3.select(e)).x;
 }));
   var startY = -PlanVizConstants.svgMarginY +
@@ -183,7 +181,7 @@ function getAbsolutePosition(d3selection) {
 // Climb upwards to find how our parents are translated
 obj = d3.select(obj.node().parentNode);
 // Stop when we've reached the graph container itself
-if (obj.node() == planVizContainer().node()) {
+if (obj.node() === planVizContainer().node()) {
   break;
 }
   }
@@ -215,8 +213,8 @@ function postprocessForAdditionalMetrics() {
   checkboxNode.click(function() {
   onClickAdditionalMetricsCheckbox($(this));
   });
-  var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") == 
"true";
-  $("#stageId-and-taskId-checkbox").prop("checked", isChecked);
+  var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") 
=== "true";
+  checkboxNode.prop("checked", isChecked);
   onClickAdditionalMetricsCheckbox(checkboxNode);
 }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js

2020-04-25 Thread gengliang

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new f59ebde  [SPARK-31558][UI] Code clean up in spark-sql-viz.js
f59ebde is described below

commit f59ebdef5b9c03dab99835527554ba8bebe38548
Author: Gengliang Wang 
AuthorDate: Sat Apr 25 13:43:52 2020 -0700

[SPARK-31558][UI] Code clean up in spark-sql-viz.js

### What changes were proposed in this pull request?

1. Remove console.log(), which seems unnecessary in the releases.
2. Replace the double equals to triple equals
3. Reuse jquery selector.

### Why are the changes needed?

For better code quality.

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Existing tests + manual test.

Closes #28333 from gengliangwang/removeLog.

Authored-by: Gengliang Wang 
Signed-off-by: Gengliang Wang 
---
 .../org/apache/spark/sql/execution/ui/static/spark-sql-viz.js  | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git 
a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
 
b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index bb393d9..301183f 100644
--- 
a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ 
b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -61,7 +61,7 @@ function planVizContainer() { return 
d3.select("#plan-viz-graph"); }
  * node, it will display the details of this SparkPlan node in the right.
  */
 function setupTooltipForSparkPlanNode(nodeId) {
-  var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text()
+  var nodeTooltip = d3.select("#plan-meta-data-" + nodeId).text();
   d3.select("svg g .node_" + nodeId)
 .each(function(d) {
   var domNode = d3.select(this).node();
@@ -122,10 +122,8 @@ function preprocessGraphLayout(g) {
  */
 function resizeSvg(svg) {
   var allClusters = svg.selectAll("g rect")[0];
-  console.log(allClusters);
   var startX = -PlanVizConstants.svgMarginX +
 toFloat(d3.min(allClusters, function(e) {
-  console.log(e);
   return getAbsolutePosition(d3.select(e)).x;
 }));
   var startY = -PlanVizConstants.svgMarginY +
@@ -178,7 +176,7 @@ function getAbsolutePosition(d3selection) {
 // Climb upwards to find how our parents are translated
 obj = d3.select(obj.node().parentNode);
 // Stop when we've reached the graph container itself
-if (obj.node() == planVizContainer().node()) {
+if (obj.node() === planVizContainer().node()) {
   break;
 }
   }
@@ -210,8 +208,8 @@ function postprocessForAdditionalMetrics() {
   checkboxNode.click(function() {
   onClickAdditionalMetricsCheckbox($(this));
   });
-  var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") == 
"true";
-  $("#stageId-and-taskId-checkbox").prop("checked", isChecked);
+  var isChecked = window.localStorage.getItem("stageId-and-taskId-checked") 
=== "true";
+  checkboxNode.prop("checked", isChecked);
   onClickAdditionalMetricsCheckbox(checkboxNode);
 }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6c6283b  [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for 
checking 1970
6c6283b is described below

commit 6c6283b1e68b174b212f3352b1496ab9e92450ad
Author: Kent Yao 
AuthorDate: Sat Apr 25 10:27:05 2020 -0700

[SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

### What changes were proposed in this pull request?

Fix flakiness by checking `1970/01/01` instead of `1970`.
The test was added by SPARK-27125 for 3.0.0.

### Why are the changes needed?

the 
`org.apache.spark.sql.execution.ui.AllExecutionsPageSuite.SPARK-27019:correctly 
display SQL page when event reordering happens` test is flaky for just checking 
the `html` content not containing 1970. I will add a ticket to check and fix 
that.
In the specific failure 
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/121799/testReport,
 it failed because the `html`

```
...

...

```
contained `1970`.

### Does this PR introduce any user-facing change?

no

### How was this patch tested?

passing jenkins

Closes #28344 from yaooqinn/SPARK-31564.

Authored-by: Kent Yao 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 7959808e962584704cba1ed26a4bcece724424d5)
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 298afa8..24b8a97 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -57,7 +57,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with 
BeforeAndAfter {
 
 val html = renderSQLPage(request, tab, 
statusStore).toString().toLowerCase(Locale.ROOT)
 assert(html.contains("failed queries"))
-assert(!html.contains("1970"))
+assert(!html.contains("1970/01/01"))
   }
 
   test("sorting should be successful") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (7d8216a -> 7959808)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 7d8216a  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
 add 7959808  [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for 
checking 1970

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new 6c6283b  [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for 
checking 1970
6c6283b is described below

commit 6c6283b1e68b174b212f3352b1496ab9e92450ad
Author: Kent Yao 
AuthorDate: Sat Apr 25 10:27:05 2020 -0700

[SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

### What changes were proposed in this pull request?

Fix flakiness by checking `1970/01/01` instead of `1970`.
The test was added by SPARK-27125 for 3.0.0.

### Why are the changes needed?

the 
`org.apache.spark.sql.execution.ui.AllExecutionsPageSuite.SPARK-27019:correctly 
display SQL page when event reordering happens` test is flaky for just checking 
the `html` content not containing 1970. I will add a ticket to check and fix 
that.
In the specific failure 
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/121799/testReport,
 it failed because the `html`

```
...

...

```
contained `1970`.

### Does this PR introduce any user-facing change?

no

### How was this patch tested?

passing jenkins

Closes #28344 from yaooqinn/SPARK-31564.

Authored-by: Kent Yao 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 7959808e962584704cba1ed26a4bcece724424d5)
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 298afa8..24b8a97 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -57,7 +57,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with 
BeforeAndAfter {
 
 val html = renderSQLPage(request, tab, 
statusStore).toString().toLowerCase(Locale.ROOT)
 assert(html.contains("failed queries"))
-assert(!html.contains("1970"))
+assert(!html.contains("1970/01/01"))
   }
 
   test("sorting should be successful") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (7d8216a -> 7959808)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from 7d8216a  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
 add 7959808  [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for 
checking 1970

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated (dbcf855 -> eb54e6c)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git.


from dbcf855  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
 add eb54e6c  [SPARK-31528][SQL][3.0] Remove millennium, century, decade 
from trunc/date_trunc fucntions

No new revisions were added by this update.

Summary of changes:
 .../catalyst/expressions/datetimeExpressions.scala | 38 ++
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 26 ++-
 .../expressions/DateExpressionsSuite.scala | 84 +++---
 .../sql/catalyst/util/DateTimeUtilsSuite.scala |  6 --
 .../scala/org/apache/spark/sql/functions.scala |  4 +-
 .../org/apache/spark/sql/DateFunctionsSuite.scala  | 20 ++
 6 files changed, 79 insertions(+), 99 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 5e6bcca  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
5e6bcca is described below

commit 5e6bccaeb46dfe6eaf5a24d1d2cc5dd2d62418c9
Author: Max Gekk 
AuthorDate: Sat Apr 25 09:29:51 2020 -0700

[SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's 
internal types

In the PR, I propose to fix the `InSet.sql` method for the cases when input 
collection contains values of internal Catalyst's types, for instance 
`UTF8String`. Elements of the input set `hset` are converted to Scala types, 
and wrapped by `Literal` to properly form SQL view of the input collection.

The changes fixed the bug in `InSet.sql` that makes wrong assumption about 
types of collection elements. See more details in SPARK-31563.

Highly likely, not.

Added a test to `ColumnExpressionSuite`

Closes #28343 from MaxGekk/fix-InSet-sql.

Authored-by: Max Gekk 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68)
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 365fcad..a016ed6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.immutable.TreeSet
 
+import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode, FalseLiteral, GenerateSafeProjection, 
GenerateUnsafeProjection, Predicate => BasePredicate}
@@ -393,7 +394,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends 
UnaryExpression with
 
   override def sql: String = {
 val valueSQL = child.sql
-val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ")
+val listSQL = hset.toSeq
+  .map(elem => Literal(convertToScala(elem, child.dataType)).sql)
+  .mkString(", ")
 s"($valueSQL IN ($listSQL))"
   }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 2917c56..a41fce9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -25,11 +25,12 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => 
NewTextInputFormat}
 import org.scalatest.Matchers._
 
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, 
NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -819,4 +820,9 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSQLContext {
   df.select(typedLit(("a", 2, 1.0))),
   Row(Row("a", 2, 1.0)) :: Nil)
   }
+
+  test("SPARK-31563: sql of InSet for UTF8String collection") {
+val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString))
+assert(inSet.sql === "('a' IN ('a', 'b'))")
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ab8cada -> 7d8216a)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from ab8cada  [SPARK-31521][CORE] Correct the fetch size when merging 
blocks into a merged block
 add 7d8216a  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ab8cada -> 7d8216a)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from ab8cada  [SPARK-31521][CORE] Correct the fetch size when merging 
blocks into a merged block
 add 7d8216a  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new dbcf855  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
dbcf855 is described below

commit dbcf855d8448b7bf8ba9d8c4a08f8bef14be2805
Author: Max Gekk 
AuthorDate: Sat Apr 25 09:29:51 2020 -0700

[SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's 
internal types

### What changes were proposed in this pull request?
In the PR, I propose to fix the `InSet.sql` method for the cases when input 
collection contains values of internal Catalyst's types, for instance 
`UTF8String`. Elements of the input set `hset` are converted to Scala types, 
and wrapped by `Literal` to properly form SQL view of the input collection.

### Why are the changes needed?
The changes fixed the bug in `InSet.sql` that makes wrong assumption about 
types of collection elements. See more details in SPARK-31563.

### Does this PR introduce any user-facing change?
Highly likely, not.

### How was this patch tested?
Added a test to `ColumnExpressionSuite`

Closes #28343 from MaxGekk/fix-InSet-sql.

Authored-by: Max Gekk 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68)
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index bd190c3..ac492cf 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.immutable.TreeSet
 
+import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
@@ -519,7 +520,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends 
UnaryExpression with
 
   override def sql: String = {
 val valueSQL = child.sql
-val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ")
+val listSQL = hset.toSeq
+  .map(elem => Literal(convertToScala(elem, child.dataType)).sql)
+  .mkString(", ")
 s"($valueSQL IN ($listSQL))"
   }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index a9ee25b..b72d92b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -26,12 +26,13 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => 
NewTextInputFormat}
 import org.scalatest.Matchers._
 
-import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, 
NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -869,4 +870,9 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSparkSession {
   df.select(typedLit(("a", 2, 1.0))),
   Row(Row("a", 2, 1.0)) :: Nil)
   }
+
+  test("SPARK-31563: sql of InSet for UTF8String collection") {
+val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString))
+assert(inSet.sql === "('a' IN ('a', 'b'))")
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ab8cada -> 7d8216a)

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git.


from ab8cada  [SPARK-31521][CORE] Correct the fetch size when merging 
blocks into a merged block
 add 7d8216a  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types

No new revisions were added by this update.

Summary of changes:
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

2020-04-25 Thread dongjoon

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
 new dbcf855  [SPARK-31563][SQL] Fix failure of InSet.sql for collections 
of Catalyst's internal types
dbcf855 is described below

commit dbcf855d8448b7bf8ba9d8c4a08f8bef14be2805
Author: Max Gekk 
AuthorDate: Sat Apr 25 09:29:51 2020 -0700

[SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's 
internal types

### What changes were proposed in this pull request?
In the PR, I propose to fix the `InSet.sql` method for the cases when input 
collection contains values of internal Catalyst's types, for instance 
`UTF8String`. Elements of the input set `hset` are converted to Scala types, 
and wrapped by `Literal` to properly form SQL view of the input collection.

### Why are the changes needed?
The changes fixed the bug in `InSet.sql` that makes wrong assumption about 
types of collection elements. See more details in SPARK-31563.

### Does this PR introduce any user-facing change?
Highly likely, not.

### How was this patch tested?
Added a test to `ColumnExpressionSuite`

Closes #28343 from MaxGekk/fix-InSet-sql.

Authored-by: Max Gekk 
Signed-off-by: Dongjoon Hyun 
(cherry picked from commit 7d8216a6642f40af0d1b623129b1d5f4c86bec68)
Signed-off-by: Dongjoon Hyun 
---
 .../org/apache/spark/sql/catalyst/expressions/predicates.scala| 5 -
 .../test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala   | 8 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index bd190c3..ac492cf 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.immutable.TreeSet
 
+import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
@@ -519,7 +520,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends 
UnaryExpression with
 
   override def sql: String = {
 val valueSQL = child.sql
-val listSQL = hset.toSeq.map(Literal(_).sql).mkString(", ")
+val listSQL = hset.toSeq
+  .map(elem => Literal(convertToScala(elem, child.dataType)).sql)
+  .mkString(", ")
 s"($valueSQL IN ($listSQL))"
   }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index a9ee25b..b72d92b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -26,12 +26,13 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => 
NewTextInputFormat}
 import org.scalatest.Matchers._
 
-import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{In, InSet, Literal, 
NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -869,4 +870,9 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSparkSession {
   df.select(typedLit(("a", 2, 1.0))),
   Row(Row("a", 2, 1.0)) :: Nil)
   }
+
+  test("SPARK-31563: sql of InSet for UTF8String collection") {
+val inSet = InSet(Literal("a"), Set("a", "b").map(UTF8String.fromString))
+assert(inSet.sql === "('a' IN ('a', 'b'))")
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js

[spark] branch master updated: [SPARK-31558][UI] Code clean up in spark-sql-viz.js

[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

[spark] branch master updated (7d8216a -> 7959808)

[spark] branch branch-3.0 updated: [SPARK-31564][TESTS] Fix flaky AllExecutionsPageSuite for checking 1970

[spark] branch master updated (7d8216a -> 7959808)

[spark] branch branch-3.0 updated (dbcf855 -> eb54e6c)

[spark] branch branch-2.4 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

[spark] branch master updated (ab8cada -> 7d8216a)

[spark] branch master updated (ab8cada -> 7d8216a)

[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

[spark] branch master updated (ab8cada -> 7d8216a)

[spark] branch branch-3.0 updated: [SPARK-31563][SQL] Fix failure of InSet.sql for collections of Catalyst's internal types

13 matches

Site Navigation

Mail list logo

Footer information