Repository: spark Updated Branches: refs/heads/master 9dd5399d7 -> 01e10c9fe
[SPARK-13236] SQL Generation for Set Operations This PR is to implement SQL generation for the following three set operations: - Union Distinct - Intersect - Except liancheng Thanks! Author: gatorsmile <gatorsm...@gmail.com> Author: xiaoli <lixiao1...@gmail.com> Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local> Closes #11195 from gatorsmile/setOpSQLGen. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01e10c9f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01e10c9f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01e10c9f Branch: refs/heads/master Commit: 01e10c9fef51c69ecf3060929c62d113cfc672b9 Parents: 9dd5399 Author: gatorsmile <gatorsm...@gmail.com> Authored: Tue Feb 23 15:16:59 2016 +0800 Committer: Cheng Lian <l...@databricks.com> Committed: Tue Feb 23 15:16:59 2016 +0800 ---------------------------------------------------------------------- .../org/apache/spark/sql/hive/SQLBuilder.scala | 14 ++++++++++-- .../spark/sql/hive/LogicalPlanToSQLSuite.scala | 24 +++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/01e10c9f/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 32f17f4..e66cc12 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation * supported by this builder (yet). */ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging { - require(logicalPlan.resolved, "SQLBuilder only supports resloved logical query plans") + require(logicalPlan.resolved, "SQLBuilder only supports resolved logical query plans") def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext) @@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi } build(toSQL(p.child), whereOrHaving, p.condition.sql) + case p @ Distinct(u: Union) if u.children.length > 1 => + val childrenSql = u.children.map(c => s"(${toSQL(c)})") + childrenSql.mkString(" UNION DISTINCT ") + case p: Union if p.children.length > 1 => - val childrenSql = p.children.map(toSQL(_)) + val childrenSql = p.children.map(c => s"(${toSQL(c)})") childrenSql.mkString(" UNION ALL ") + case p: Intersect => + build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")") + + case p: Except => + build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")") + case p: SubqueryAlias => p.child match { // Persisted data source relation http://git-wip-us.apache.org/repos/asf/spark/blob/01e10c9f/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index b162adf..28559ea 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0") } + test("union distinct") { + checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0") + } + + // Parser is unable to parse the following query: + // SELECT `u_1`.`id` + // FROM (((SELECT `t0`.`id` FROM `default`.`t0`) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1 + test("three-child union") { + checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0") + } + + test("intersect") { + checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0") + } + + test("except") { + checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0") + } + test("self join") { checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key") } @@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key") } - test("three-child union") { - checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0") - } test("case") { checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org