spark git commit: [SPARK-14001][SQL] support multi-children Union in SQLBuilder

lian Fri, 18 Mar 2016 22:47:20 -0700

Repository: spark
Updated Branches:
  refs/heads/master 7783b6f38 -> 0f1015ffd



[SPARK-14001][SQL] support multi-children Union in SQLBuilder

## What changes were proposed in this pull request?

The fix is simple, use the existing `CombineUnions` rule to combine adjacent 
Unions before build SQL string.

## How was this patch tested?

The re-enabled test

Author: Wenchen Fan <wenc...@databricks.com>

Closes #11818 from cloud-fan/bug-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f1015ff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f1015ff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f1015ff

Branch: refs/heads/master
Commit: 0f1015ffdd40cd8647f6acdd5cdd717b883e4875
Parents: 7783b6f
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Fri Mar 18 19:42:33 2016 +0800
Committer: Cheng Lian <l...@databricks.com>
Committed: Fri Mar 18 19:42:33 2016 +0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/hive/SQLBuilder.scala  | 14 ++++++++++----
 .../apache/spark/sql/hive/LogicalPlanToSQLSuite.scala |  7 +------
 2 files changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0f1015ff/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 249a685..5175bd4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -24,9 +24,8 @@ import scala.util.control.NonFatal
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.CollapseProject
+import org.apache.spark.sql.catalyst.optimizer.{CollapseProject, CombineUnions}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
@@ -384,11 +383,18 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: 
SQLContext) extends Loggi
 
   object Canonicalizer extends RuleExecutor[LogicalPlan] {
     override protected def batches: Seq[Batch] = Seq(
-      Batch("Collapse Project", FixedPoint(100),
+      Batch("Prepare", FixedPoint(100),
         // The `WidenSetOperationTypes` analysis rule may introduce extra 
`Project`s over
         // `Aggregate`s to perform type casting.  This rule merges these 
`Project`s into
         // `Aggregate`s.
-        CollapseProject),
+        CollapseProject,
+        // Parser is unable to parse the following query:
+        // SELECT  `u_1`.`id`
+        // FROM (((SELECT  `t0`.`id` FROM `default`.`t0`)
+        // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`))
+        // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`)) AS u_1
+        // This rule combine adjacent Unions together so we can generate flat 
UNION ALL SQL string.
+        CombineUnions),
       Batch("Recover Scoping Info", Once,
         // A logical plan is allowed to have same-name outputs with different 
qualifiers(e.g. the
         // `Join` operator). However, this kind of plan can't be put under a 
sub query as we will

http://git-wip-us.apache.org/repos/asf/spark/blob/0f1015ff/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index f86eba6..f6b9072 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -141,12 +141,7 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with 
SQLTestUtils {
     checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0")
   }
 
-  // Parser is unable to parse the following query:
-  // SELECT  `u_1`.`id`
-  // FROM (((SELECT  `t0`.`id` FROM `default`.`t0`)
-  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`))
-  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`)) AS u_1
-  ignore("three-child union") {
+  test("three-child union") {
     checkHiveQl(
       """
         |SELECT id FROM parquet_t0


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-14001][SQL] support multi-children Union in SQLBuilder

Reply via email to