beliefer commented on code in PR #44009:
URL: https://github.com/apache/spark/pull/44009#discussion_r1404844214


##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala:
##########
@@ -352,4 +352,21 @@ class LimitPushdownSuite extends PlanTest {
       comparePlans(Optimize.execute(originalQuery2), originalQuery2)
     }
   }
+
+  test("SPARK-46097: Push down limit 1 though Union and Aggregate") {
+    val unionQuery = Union(
+      Union(
+        testRelation.groupBy($"a", $"b")($"a", $"b"),
+        testRelation2.groupBy($"d", $"e")($"d", $"e"),
+      ),
+      testRelation2.groupBy($"e", $"f")($"e", $"f")).limit(1)
+
+    val correctAnswer = Union(
+      Union(
+        LocalLimit(1, testRelation).select($"a", $"b"),
+        LocalLimit(1, testRelation2).select($"d", $"e")).limit(1),
+      LocalLimit(1, testRelation2).select($"e", $"f")).limit(1)
+
+    comparePlans(Optimize.execute(unionQuery.analyze), correctAnswer.analyze)

Review Comment:
   This test case only ensure the logical plan. Could you add a test case to 
compare the output data?



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala:
##########
@@ -771,6 +771,17 @@ object LimitPushDown extends Rule[LogicalPlan] {
       Limit(le, Project(a.aggregateExpressions, LocalLimit(le, a.child)))
     case Limit(le @ IntegerLiteral(1), p @ Project(_, a: Aggregate)) if 
a.groupOnly =>
       Limit(le, p.copy(child = Project(a.aggregateExpressions, LocalLimit(le, 
a.child))))
+    // Push down limit 1 though Union and Aggregate
+    case Limit(le @ IntegerLiteral(1), u: Union) =>
+      val newUnionChildren = u.children.map {
+        case a: Aggregate if a.groupOnly =>

Review Comment:
   I doubt that the output will be changed.
   `limit 1 after shuffle` is not the same as `limit 1 before shuffle`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to