Repository: spark Updated Branches: refs/heads/master c0e333dbe -> 5596ce83c
[MINOR][SQL] Additional test case for CheckCartesianProducts rule ## What changes were proposed in this pull request? While discovering optimization rules and their test coverage, I did not find any tests for `CheckCartesianProducts` in the Catalyst folder. So, I decided to create a new test suite. Once I finished, I found a test in `JoinSuite` for this functionality so feel free to discard this change if it does not make much sense. The proposed test suite covers a few additional use cases. Author: aokolnychyi <anton.okolnyc...@sap.com> Closes #18909 from aokolnychyi/check-cartesian-join-tests. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5596ce83 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5596ce83 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5596ce83 Branch: refs/heads/master Commit: 5596ce83c4d1971510dacfee4c8b0cf438475135 Parents: c0e333d Author: aokolnychyi <anton.okolnyc...@sap.com> Authored: Sun Aug 13 21:33:16 2017 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Sun Aug 13 21:33:16 2017 -0700 ---------------------------------------------------------------------- .../optimizer/CheckCartesianProductsSuite.scala | 91 ++++++++++++++++++++ .../resources/sql-tests/inputs/cross-join.sql | 3 +- .../sql-tests/results/cross-join.sql.out | 12 ++- .../scala/org/apache/spark/sql/JoinSuite.scala | 32 +++++++ 4 files changed, 136 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala new file mode 100644 index 0000000..21220b3 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.scalatest.Matchers._ + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.internal.SQLConf.CROSS_JOINS_ENABLED + +class CheckCartesianProductsSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Nil + } + + val testRelation1 = LocalRelation('a.int, 'b.int) + val testRelation2 = LocalRelation('c.int, 'd.int) + + val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, FullOuter) + val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin('exists)) + + test("CheckCartesianProducts doesn't throw an exception if cross joins are enabled)") { + withSQLConf(CROSS_JOINS_ENABLED.key -> "true") { + noException should be thrownBy { + for (joinType <- joinTypesWithRequiredCondition ++ joinTypesWithoutRequiredCondition) { + performCartesianProductCheck(joinType) + } + } + } + } + + test("CheckCartesianProducts throws an exception for join types that require a join condition") { + withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { + for (joinType <- joinTypesWithRequiredCondition) { + val thrownException = the [AnalysisException] thrownBy { + performCartesianProductCheck(joinType) + } + assert(thrownException.message.contains("Detected cartesian product")) + } + } + } + + test("CheckCartesianProducts doesn't throw an exception if a join condition is present") { + withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { + for (joinType <- joinTypesWithRequiredCondition) { + noException should be thrownBy { + performCartesianProductCheck(joinType, Some('a === 'd)) + } + } + } + } + + test("CheckCartesianProducts doesn't throw an exception if join types don't require conditions") { + withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { + for (joinType <- joinTypesWithoutRequiredCondition) { + noException should be thrownBy { + performCartesianProductCheck(joinType) + } + } + } + } + + private def performCartesianProductCheck( + joinType: JoinType, + condition: Option[Expression] = None): Unit = { + val analyzedPlan = testRelation1.join(testRelation2, joinType, condition).analyze + val optimizedPlan = Optimize.execute(analyzedPlan) + comparePlans(analyzedPlan, optimizedPlan) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql b/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql index aa73124..b64197e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql @@ -32,4 +32,5 @@ create temporary view D(d, vd) as select * from nt1; -- Allowed since cross join with C is explicit select * from ((A join B on (a = b)) cross join C) join D on (a = d); - +-- Cross joins with non-equal predicates +SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k); http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out index 562e174..e75cc44 100644 --- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 12 +-- Number of queries: 13 -- !query 0 @@ -127,3 +127,13 @@ three 3 three 3 two 2 three 3 two 2 two 2 one 1 two 2 two 2 two 2 three 3 two 2 two 2 two 2 two 2 two 2 + +-- !query 12 +SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k) +-- !query 12 schema +struct<k:string,v1:int,k:string,v2:int> +-- !query 12 output +three 3 one 1 +three 3 one 5 +two 2 one 1 +two 2 one 5 http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 0008d50..3f0c864 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -216,6 +216,9 @@ class JoinSuite extends QueryTest with SharedSQLContext { Row(1, null, 2, 2) :: Row(2, 2, 1, null) :: Row(2, 2, 2, 2) :: Nil) + checkAnswer( + testData3.as("x").join(testData3.as("y"), $"x.a" > $"y.a"), + Row(2, 2, 1, null) :: Nil) } withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") { val e = intercept[Exception] { @@ -604,6 +607,35 @@ class JoinSuite extends QueryTest with SharedSQLContext { } cartesianQueries.foreach(checkCartesianDetection) + + // Check that left_semi, left_anti, existence joins without conditions do not throw + // an exception if cross joins are disabled + withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") { + checkAnswer( + sql("SELECT * FROM testData3 LEFT SEMI JOIN testData2"), + Row(1, null) :: Row (2, 2) :: Nil) + checkAnswer( + sql("SELECT * FROM testData3 LEFT ANTI JOIN testData2"), + Nil) + checkAnswer( + sql( + """ + |SELECT a FROM testData3 + |WHERE + | EXISTS (SELECT * FROM testData) + |OR + | EXISTS (SELECT * FROM testData2)""".stripMargin), + Row(1) :: Row(2) :: Nil) + checkAnswer( + sql( + """ + |SELECT key FROM testData + |WHERE + | key IN (SELECT a FROM testData2) + |OR + | key IN (SELECT a FROM testData3)""".stripMargin), + Row(1) :: Row(2) :: Row(3) :: Nil) + } } test("test SortMergeJoin (without spill)") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org