Repository: spark
Updated Branches:
  refs/heads/master 9dd5399d7 -> 01e10c9fe


[SPARK-13236] SQL Generation for Set Operations

This PR is to implement SQL generation for the following three set operations:
- Union Distinct
- Intersect
- Except

liancheng Thanks!

Author: gatorsmile <gatorsm...@gmail.com>
Author: xiaoli <lixiao1...@gmail.com>
Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local>

Closes #11195 from gatorsmile/setOpSQLGen.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01e10c9f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01e10c9f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01e10c9f

Branch: refs/heads/master
Commit: 01e10c9fef51c69ecf3060929c62d113cfc672b9
Parents: 9dd5399
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Tue Feb 23 15:16:59 2016 +0800
Committer: Cheng Lian <l...@databricks.com>
Committed: Tue Feb 23 15:16:59 2016 +0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/SQLBuilder.scala  | 14 ++++++++++--
 .../spark/sql/hive/LogicalPlanToSQLSuite.scala  | 24 +++++++++++++++++---
 2 files changed, 33 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/01e10c9f/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 32f17f4..e66cc12 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -38,7 +38,7 @@ import 
org.apache.spark.sql.execution.datasources.LogicalRelation
  * supported by this builder (yet).
  */
 class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends 
Logging {
-  require(logicalPlan.resolved, "SQLBuilder only supports resloved logical 
query plans")
+  require(logicalPlan.resolved, "SQLBuilder only supports resolved logical 
query plans")
 
   def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext)
 
@@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: 
SQLContext) extends Loggi
       }
       build(toSQL(p.child), whereOrHaving, p.condition.sql)
 
+    case p @ Distinct(u: Union) if u.children.length > 1 =>
+      val childrenSql = u.children.map(c => s"(${toSQL(c)})")
+      childrenSql.mkString(" UNION DISTINCT ")
+
     case p: Union if p.children.length > 1 =>
-      val childrenSql = p.children.map(toSQL(_))
+      val childrenSql = p.children.map(c => s"(${toSQL(c)})")
       childrenSql.mkString(" UNION ALL ")
 
+    case p: Intersect =>
+      build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")")
+
+    case p: Except =>
+      build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")")
+
     case p: SubqueryAlias =>
       p.child match {
         // Persisted data source relation

http://git-wip-us.apache.org/repos/asf/spark/blob/01e10c9f/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index b162adf..28559ea 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with 
SQLTestUtils {
     checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM 
t0")
   }
 
+  test("union distinct") {
+    checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0")
+  }
+
+  // Parser is unable to parse the following query:
+  // SELECT  `u_1`.`id`
+  // FROM (((SELECT  `t0`.`id` FROM `default`.`t0`)
+  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`))
+  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`)) AS u_1
+  test("three-child union") {
+    checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL 
SELECT id FROM t0")
+  }
+
+  test("intersect") {
+    checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0")
+  }
+
+  test("except") {
+    checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0")
+  }
+
   test("self join") {
     checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
   }
@@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with 
SQLTestUtils {
     checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key 
group by x.key")
   }
 
-  test("three-child union") {
-    checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL 
SELECT id FROM t0")
-  }
 
   test("case") {
     checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END 
FROM t0")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to