spark git commit: [SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

2018-09-09 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.3 5b8b6b4e9 -> 5ad644a4c


[SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

How to reproduce:
```scala
val df1 = spark.createDataFrame(Seq(
   (1, 1)
)).toDF("a", "b").withColumn("c", lit(null).cast("int"))
val df2 = df1.union(df1).withColumn("d", 
spark_partition_id).filter($"c".isNotNull)
df2.show

+---+---++---+
|  a|  b|   c|  d|
+---+---++---+
|  1|  1|null|  0|
|  1|  1|null|  1|
+---+---++---+
```
`filter($"c".isNotNull)` was transformed to `(null <=> c#10)` before 
https://github.com/apache/spark/pull/19201, but it is transformed to `(c#10 = 
null)` since https://github.com/apache/spark/pull/20155. This pr revert it to 
`(null <=> c#10)` to fix this issue.

unit tests

Closes #22368 from wangyum/SPARK-25368.

Authored-by: Yuming Wang 
Signed-off-by: gatorsmile 
(cherry picked from commit 77c996403d5c761f0dfea64c5b1cb7480ba1d3ac)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ad644a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ad644a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ad644a4

Branch: refs/heads/branch-2.3
Commit: 5ad644a4cefc20e4f198d614c59b8b0f75a228ba
Parents: 5b8b6b4
Author: Yuming Wang 
Authored: Sun Sep 9 09:07:31 2018 -0700
Committer: gatorsmile 
Committed: Sun Sep 9 09:09:09 2018 -0700

--
 .../sql/catalyst/plans/logical/LogicalPlan.scala  |  2 +-
 .../InferFiltersFromConstraintsSuite.scala|  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 18 ++
 3 files changed, 20 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5ad644a4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index c8ccd9b..d96a9f0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -248,7 +248,7 @@ abstract class UnaryNode extends LogicalPlan {
 var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
 projectList.foreach {
   case a @ Alias(l: Literal, _) =>
-allConstraints += EqualTo(a.toAttribute, l)
+allConstraints += EqualNullSafe(a.toAttribute, l)
   case a @ Alias(e, _) =>
 // For every alias in `projectList`, replace the reference in 
constraints by its attribute.
 allConstraints ++= allConstraints.map(_ transform {

http://git-wip-us.apache.org/repos/asf/spark/blob/5ad644a4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index 178c4b8..9c7730c 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -182,7 +182,7 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
 
   test("constraints should be inferred from aliased literals") {
 val originalLeft = testRelation.subquery('left).as("left")
-val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
=== 2).as("left")
+val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
<=> 2).as("left")
 
 val right = Project(Seq(Literal(2).as("two")), 
testRelation.subquery('right)).as("right")
 val condition = Some("left.a".attr === "right.two".attr)

http://git-wip-us.apache.org/repos/asf/spark/blob/5ad644a4/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3640f6a..5808be8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2306,4 +2306,22 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
 val df2 = spark.range(3).selectExpr("id")
 assert(df1.join(df2, Seq("id"), 

spark git commit: [SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

2018-09-09 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.4 6b7ea78ae -> c1c1bda3c


[SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

## What changes were proposed in this pull request?
How to reproduce:
```scala
val df1 = spark.createDataFrame(Seq(
   (1, 1)
)).toDF("a", "b").withColumn("c", lit(null).cast("int"))
val df2 = df1.union(df1).withColumn("d", 
spark_partition_id).filter($"c".isNotNull)
df2.show

+---+---++---+
|  a|  b|   c|  d|
+---+---++---+
|  1|  1|null|  0|
|  1|  1|null|  1|
+---+---++---+
```
`filter($"c".isNotNull)` was transformed to `(null <=> c#10)` before 
https://github.com/apache/spark/pull/19201, but it is transformed to `(c#10 = 
null)` since https://github.com/apache/spark/pull/20155. This pr revert it to 
`(null <=> c#10)` to fix this issue.

## How was this patch tested?

unit tests

Closes #22368 from wangyum/SPARK-25368.

Authored-by: Yuming Wang 
Signed-off-by: gatorsmile 
(cherry picked from commit 77c996403d5c761f0dfea64c5b1cb7480ba1d3ac)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1c1bda3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1c1bda3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1c1bda3

Branch: refs/heads/branch-2.4
Commit: c1c1bda3cecd82a926526e5e5ee24d9909cb7e49
Parents: 6b7ea78
Author: Yuming Wang 
Authored: Sun Sep 9 09:07:31 2018 -0700
Committer: gatorsmile 
Committed: Sun Sep 9 09:07:42 2018 -0700

--
 .../sql/catalyst/plans/logical/LogicalPlan.scala   |  2 +-
 .../InferFiltersFromConstraintsSuite.scala |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala  | 17 +
 3 files changed, 19 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c1c1bda3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0e4456a..5f13662 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -159,7 +159,7 @@ abstract class UnaryNode extends LogicalPlan {
 var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
 projectList.foreach {
   case a @ Alias(l: Literal, _) =>
-allConstraints += EqualTo(a.toAttribute, l)
+allConstraints += EqualNullSafe(a.toAttribute, l)
   case a @ Alias(e, _) =>
 // For every alias in `projectList`, replace the reference in 
constraints by its attribute.
 allConstraints ++= allConstraints.map(_ transform {

http://git-wip-us.apache.org/repos/asf/spark/blob/c1c1bda3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index e4671f0..a40ba2d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -196,7 +196,7 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
 
   test("constraints should be inferred from aliased literals") {
 val originalLeft = testRelation.subquery('left).as("left")
-val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
=== 2).as("left")
+val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
<=> 2).as("left")
 
 val right = Project(Seq(Literal(2).as("two")), 
testRelation.subquery('right)).as("right")
 val condition = Some("left.a".attr === "right.two".attr)

http://git-wip-us.apache.org/repos/asf/spark/blob/c1c1bda3/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 45b17b3..435b887 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2552,4 +2552,21 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
 }
   }
 
+  

spark git commit: [SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

2018-09-09 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 88a930dfa -> 77c996403


[SPARK-25368][SQL] Incorrect predicate pushdown returns wrong result

## What changes were proposed in this pull request?
How to reproduce:
```scala
val df1 = spark.createDataFrame(Seq(
   (1, 1)
)).toDF("a", "b").withColumn("c", lit(null).cast("int"))
val df2 = df1.union(df1).withColumn("d", 
spark_partition_id).filter($"c".isNotNull)
df2.show

+---+---++---+
|  a|  b|   c|  d|
+---+---++---+
|  1|  1|null|  0|
|  1|  1|null|  1|
+---+---++---+
```
`filter($"c".isNotNull)` was transformed to `(null <=> c#10)` before 
https://github.com/apache/spark/pull/19201, but it is transformed to `(c#10 = 
null)` since https://github.com/apache/spark/pull/20155. This pr revert it to 
`(null <=> c#10)` to fix this issue.

## How was this patch tested?

unit tests

Closes #22368 from wangyum/SPARK-25368.

Authored-by: Yuming Wang 
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77c99640
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77c99640
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77c99640

Branch: refs/heads/master
Commit: 77c996403d5c761f0dfea64c5b1cb7480ba1d3ac
Parents: 88a930d
Author: Yuming Wang 
Authored: Sun Sep 9 09:07:31 2018 -0700
Committer: gatorsmile 
Committed: Sun Sep 9 09:07:31 2018 -0700

--
 .../sql/catalyst/plans/logical/LogicalPlan.scala   |  2 +-
 .../InferFiltersFromConstraintsSuite.scala |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala  | 17 +
 3 files changed, 19 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/77c99640/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0e4456a..5f13662 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -159,7 +159,7 @@ abstract class UnaryNode extends LogicalPlan {
 var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
 projectList.foreach {
   case a @ Alias(l: Literal, _) =>
-allConstraints += EqualTo(a.toAttribute, l)
+allConstraints += EqualNullSafe(a.toAttribute, l)
   case a @ Alias(e, _) =>
 // For every alias in `projectList`, replace the reference in 
constraints by its attribute.
 allConstraints ++= allConstraints.map(_ transform {

http://git-wip-us.apache.org/repos/asf/spark/blob/77c99640/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index e4671f0..a40ba2d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -196,7 +196,7 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
 
   test("constraints should be inferred from aliased literals") {
 val originalLeft = testRelation.subquery('left).as("left")
-val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
=== 2).as("left")
+val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a 
<=> 2).as("left")
 
 val right = Project(Seq(Literal(2).as("two")), 
testRelation.subquery('right)).as("right")
 val condition = Some("left.a".attr === "right.two".attr)

http://git-wip-us.apache.org/repos/asf/spark/blob/77c99640/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 45b17b3..435b887 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2552,4 +2552,21 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
 }
   }
 
+  test("SPARK-25368 Incorrect predicate pushdown returns wrong result") {
+def check(newCol: Column,