Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c9c45d97b -> e30fe1c6a


[SPARK-19766][SQL][BRANCH-2.0] Constant alias columns in INNER JOIN should not 
be folded by FoldablePropagation rule

This PR fix for branch-2.0

Refer #17099

gatorsmile

Author: Stan Zhai <zhaishi...@haizhi.com>

Closes #17131 from stanzhai/fix-inner-join-2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e30fe1c6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e30fe1c6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e30fe1c6

Branch: refs/heads/branch-2.0
Commit: e30fe1c6aa91f74cf3bed74f3be3eb69a6eaf1b4
Parents: c9c45d9
Author: Stan Zhai <zhaishi...@haizhi.com>
Authored: Thu Mar 2 04:24:43 2017 -0800
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Thu Mar 2 04:24:43 2017 -0800

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      |  2 +-
 .../optimizer/FoldablePropagationSuite.scala    | 14 ++++
 .../resources/sql-tests/inputs/inner-join.sql   | 17 +++++
 .../sql-tests/results/inner-join.sql.out        | 67 ++++++++++++++++++++
 4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e30fe1c6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3a71463..f3cf1f7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -669,7 +669,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         // join is not always picked from its children, but can also be null.
         // TODO(cloud-fan): It seems more reasonable to use new attributes as 
the output attributes
         // of outer join.
-        case j @ Join(_, _, Inner, _) =>
+        case j @ Join(_, _, Inner, _) if !stop =>
           j.transformExpressions(replaceFoldable)
 
         // We can fold the projections an expand holds. However expand changes 
the output columns

http://git-wip-us.apache.org/repos/asf/spark/blob/e30fe1c6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index bbef212..9d18827 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -131,6 +131,20 @@ class FoldablePropagationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Propagate in inner join") {
+    val ta = testRelation.select('a, Literal(1).as('tag))
+      .union(testRelation.select('a, Literal(2).as('tag)))
+      .subquery('ta)
+    val tb = testRelation.select('a, Literal(1).as('tag))
+      .union(testRelation.select('a, Literal(2).as('tag)))
+      .subquery('tb)
+    val query = ta.join(tb, Inner,
+      Some("ta.a".attr === "tb.a".attr && "ta.tag".attr === "tb.tag".attr))
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = query.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("Propagate in expand") {
     val c1 = Literal(1).as('a)
     val c2 = Literal(2).as('b)

http://git-wip-us.apache.org/repos/asf/spark/blob/e30fe1c6/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql 
b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
new file mode 100644
index 0000000..38739cb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
@@ -0,0 +1,17 @@
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a);
+
+CREATE TEMPORARY VIEW ta AS
+SELECT a, 'a' AS tag FROM t1
+UNION ALL
+SELECT a, 'b' AS tag FROM t2;
+
+CREATE TEMPORARY VIEW tb AS
+SELECT a, 'a' AS tag FROM t3
+UNION ALL
+SELECT a, 'b' AS tag FROM t4;
+
+-- SPARK-19766 Constant alias columns in INNER JOIN should not be folded by 
FoldablePropagation rule
+SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag;

http://git-wip-us.apache.org/repos/asf/spark/blob/e30fe1c6/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out 
b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
new file mode 100644
index 0000000..8d56ebe
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
@@ -0,0 +1,67 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 7
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+CREATE TEMPORARY VIEW ta AS
+SELECT a, 'a' AS tag FROM t1
+UNION ALL
+SELECT a, 'b' AS tag FROM t2
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+CREATE TEMPORARY VIEW tb AS
+SELECT a, 'a' AS tag FROM t3
+UNION ALL
+SELECT a, 'b' AS tag FROM t4
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag
+-- !query 6 schema
+struct<a:int,tag:string>
+-- !query 6 output
+1      a
+1      a
+1      b
+1      b


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to