This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 70a98eeb8e1 [SPARK-39444][SQL] Add OptimizeSubqueries into nonExcludableRules list 70a98eeb8e1 is described below commit 70a98eeb8e10bd6557b83ab040f6b7c689c1e9d7 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Wed Jun 15 09:46:24 2022 +0900 [SPARK-39444][SQL] Add OptimizeSubqueries into nonExcludableRules list ### What changes were proposed in this pull request? This PR adds `OptimizeSubqueries` rule into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries` before running this query: ```sql WITH tmp AS ( SELECT id FROM range(2) INTERSECT SELECT id FROM range(4) ) SELECT id FROM range(5) WHERE id > (SELECT max(id) FROM x) ``` Exception: ``` logical intersect operator should have been replaced by semi-join in the optimizer java.lang.IllegalStateException: logical intersect operator should have been replaced by semi-join in the optimizer ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36841 from wangyum/SPARK-39444. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql | 9 +++++++++ .../sql-tests/results/non-excludable-rule.sql.out | 23 +++++++++++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index a84959f0991..12e21faca9f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -270,7 +270,8 @@ abstract class Optimizer(catalogManager: CatalogManager) RewritePredicateSubquery.ruleName :: NormalizeFloatingNumbers.ruleName :: ReplaceUpdateFieldsExpression.ruleName :: - RewriteLateralSubquery.ruleName :: Nil + RewriteLateralSubquery.ruleName :: + OptimizeSubqueries.ruleName :: Nil /** * Apply finish-analysis rules for the entire plan including all subqueries. diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql index b238d199cc1..e3799de5ff7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -4,3 +4,12 @@ SELECT (SELECT min(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)); + +-- SPARK-39444 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries; +WITH tmp AS ( + SELECT id FROM range(2) + INTERSECT + SELECT id FROM range(4) +) +SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out index c7fa2f04152..fa8b2bbec4e 100644 --- a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 4 -- !query @@ -19,3 +19,24 @@ SELECT struct<scalarsubquery():bigint,scalarsubquery():bigint,scalarsubquery():bigint> -- !query output 0 45 10 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries + + +-- !query +WITH tmp AS ( + SELECT id FROM range(2) + INTERSECT + SELECT id FROM range(4) +) +SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp) +-- !query schema +struct<id:bigint> +-- !query output +2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org