cloud-fan commented on a change in pull request #31595: URL: https://github.com/apache/spark/pull/31595#discussion_r581618859
########## File path: sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala ########## @@ -1013,118 +1014,120 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("SPARK-23957 Remove redundant sort from subquery plan(in subquery)") { - withTempView("t1", "t2", "t3") { - Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1") - Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2") - Seq((1, 1, 1), (2, 2, 2)).toDF("c1", "c2", "c3").createOrReplaceTempView("t3") - - // Simple order by - val query1 = - """ - |SELECT c1 FROM t1 - |WHERE - |c1 IN (SELECT c1 FROM t2 ORDER BY c1) + withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> RemoveNoopUnion.ruleName) { + withTempView("t1", "t2", "t3") { + Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1") + Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2") + Seq((1, 1, 1), (2, 2, 2)).toDF("c1", "c2", "c3").createOrReplaceTempView("t3") + + // Simple order by + val query1 = + """ + |SELECT c1 FROM t1 + |WHERE + |c1 IN (SELECT c1 FROM t2 ORDER BY c1) """.stripMargin - assert(getNumSortsInQuery(query1) == 0) + assert(getNumSortsInQuery(query1) == 0) - // Nested order bys - val query2 = - """ - |SELECT c1 - |FROM t1 - |WHERE c1 IN (SELECT c1 - | FROM (SELECT * - | FROM t2 - | ORDER BY c2) - | ORDER BY c1) + // Nested order bys + val query2 = + """ + |SELECT c1 + |FROM t1 + |WHERE c1 IN (SELECT c1 + | FROM (SELECT * + | FROM t2 + | ORDER BY c2) + | ORDER BY c1) """.stripMargin - assert(getNumSortsInQuery(query2) == 0) + assert(getNumSortsInQuery(query2) == 0) - // nested IN - val query3 = - """ - |SELECT c1 - |FROM t1 - |WHERE c1 IN (SELECT c1 - | FROM t2 - | WHERE c1 IN (SELECT c1 - | FROM t3 - | WHERE c1 = 1 - | ORDER BY c3) - | ORDER BY c2) + // nested IN + val query3 = + """ + |SELECT c1 + |FROM t1 + |WHERE c1 IN (SELECT c1 + | FROM t2 + | WHERE c1 IN (SELECT c1 + | FROM t3 + | WHERE c1 = 1 + | ORDER BY c3) + | ORDER BY c2) """.stripMargin - assert(getNumSortsInQuery(query3) == 0) + assert(getNumSortsInQuery(query3) == 0) - // Complex subplan and multiple sorts - val query4 = - """ - |SELECT c1 - |FROM t1 - |WHERE c1 IN (SELECT c1 - | FROM (SELECT c1, c2, count(*) - | FROM t2 - | GROUP BY c1, c2 - | HAVING count(*) > 0 - | ORDER BY c2) - | ORDER BY c1) + // Complex subplan and multiple sorts + val query4 = + """ + |SELECT c1 + |FROM t1 + |WHERE c1 IN (SELECT c1 + | FROM (SELECT c1, c2, count(*) + | FROM t2 + | GROUP BY c1, c2 + | HAVING count(*) > 0 + | ORDER BY c2) + | ORDER BY c1) """.stripMargin - assert(getNumSortsInQuery(query4) == 0) + assert(getNumSortsInQuery(query4) == 0) - // Join in subplan - val query5 = - """ - |SELECT c1 FROM t1 - |WHERE - |c1 IN (SELECT t2.c1 FROM t2, t3 - | WHERE t2.c1 = t3.c1 - | ORDER BY t2.c1) + // Join in subplan + val query5 = + """ + |SELECT c1 FROM t1 + |WHERE + |c1 IN (SELECT t2.c1 FROM t2, t3 + | WHERE t2.c1 = t3.c1 + | ORDER BY t2.c1) """.stripMargin - assert(getNumSortsInQuery(query5) == 0) + assert(getNumSortsInQuery(query5) == 0) - val query6 = - """ - |SELECT c1 - |FROM t1 - |WHERE (c1, c2) IN (SELECT c1, max(c2) - | FROM (SELECT c1, c2, count(*) - | FROM t2 - | GROUP BY c1, c2 - | HAVING count(*) > 0 - | ORDER BY c2) - | GROUP BY c1 - | HAVING max(c2) > 0 - | ORDER BY c1) + val query6 = + """ + |SELECT c1 + |FROM t1 + |WHERE (c1, c2) IN (SELECT c1, max(c2) + | FROM (SELECT c1, c2, count(*) + | FROM t2 + | GROUP BY c1, c2 + | HAVING count(*) > 0 + | ORDER BY c2) + | GROUP BY c1 + | HAVING max(c2) > 0 + | ORDER BY c1) """.stripMargin - assert(getNumSortsInQuery(query6) == 0) + assert(getNumSortsInQuery(query6) == 0) - // Cases when sort is not removed from the plan - // Limit on top of sort - val query7 = + // Cases when sort is not removed from the plan + // Limit on top of sort + val query7 = """ - |SELECT c1 FROM t1 - |WHERE - |c1 IN (SELECT c1 FROM t2 ORDER BY c1 limit 1) + |SELECT c1 FROM t1 + |WHERE + |c1 IN (SELECT c1 FROM t2 ORDER BY c1 limit 1) """.stripMargin - assert(getNumSortsInQuery(query7) == 1) + assert(getNumSortsInQuery(query7) == 1) - // Sort below a set operations (intersect, union) - val query8 = - """ - |SELECT c1 FROM t1 - |WHERE - |c1 IN (( - | SELECT c1 FROM t2 - | ORDER BY c1 - | ) - | UNION - | ( - | SELECT c1 FROM t2 - | ORDER BY c1 - | )) + // Sort below a set operations (intersect, union) + val query8 = + """ + |SELECT c1 FROM t1 + |WHERE + |c1 IN (( + | SELECT c1 FROM t2 + | ORDER BY c1 + | ) + | UNION Review comment: If this is the only issue, we can probably add a filter in one side to avoid optimizing the Union. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org