alamb commented on code in PR #22525:
URL: https://github.com/apache/datafusion/pull/22525#discussion_r3306834780
##########
datafusion/core/tests/physical_optimizer/limit_pushdown.rs:
##########
@@ -714,3 +714,69 @@ fn no_limit_preserves_plan_identity() -> Result<()> {
Ok(())
}
+
+#[test]
+fn outer_offset_does_not_leak_through_sort_into_inner_limit() -> Result<()> {
+ // Regression test for https://github.com/apache/datafusion/issues/22489
+ //
+ // When an outer OFFSET is separated from an inner LIMIT by a SortExec
+ // with different sort keys, the outer skip must not reduce the inner
+ // fetch. Before the fix, combine_limit merged them, producing
+ // GlobalLimitExec(skip=1, fetch=7) instead of preserving the inner
+ // LIMIT 8.
+ //
+ // Plan structure:
+ // GlobalLimitExec: skip=1, fetch=None (outer OFFSET 1)
+ // SortExec: [c1 DESC] (outer sort — different key)
+ // GlobalLimitExec: skip=0, fetch=8 (inner LIMIT 8)
+ // SortExec: [c2 ASC] (inner sort — different key)
+ // EmptyExec
+ let schema = create_schema();
+ let empty = empty_exec(Arc::clone(&schema));
+
+ let inner_ordering: LexOrdering = [PhysicalSortExpr {
+ expr: col("c2", &schema)?,
+ options: SortOptions::default(),
+ }]
+ .into();
+ let inner_sort = sort_exec(inner_ordering, empty);
+ let inner_limit = global_limit_exec(inner_sort, 0, Some(8));
+
+ let outer_ordering: LexOrdering = [PhysicalSortExpr {
+ expr: col("c1", &schema)?,
+ options: SortOptions {
+ descending: true,
+ nulls_first: false,
Review Comment:
I think we should also add a test (or confirm one already exists) where the
two sorts **DO** have the same sort and ensure the limit is still pushed
##########
datafusion/core/tests/physical_optimizer/limit_pushdown.rs:
##########
@@ -714,3 +714,69 @@ fn no_limit_preserves_plan_identity() -> Result<()> {
Ok(())
}
+
+#[test]
+fn outer_offset_does_not_leak_through_sort_into_inner_limit() -> Result<()> {
+ // Regression test for https://github.com/apache/datafusion/issues/22489
+ //
+ // When an outer OFFSET is separated from an inner LIMIT by a SortExec
+ // with different sort keys, the outer skip must not reduce the inner
+ // fetch. Before the fix, combine_limit merged them, producing
+ // GlobalLimitExec(skip=1, fetch=7) instead of preserving the inner
+ // LIMIT 8.
+ //
+ // Plan structure:
+ // GlobalLimitExec: skip=1, fetch=None (outer OFFSET 1)
+ // SortExec: [c1 DESC] (outer sort — different key)
+ // GlobalLimitExec: skip=0, fetch=8 (inner LIMIT 8)
+ // SortExec: [c2 ASC] (inner sort — different key)
+ // EmptyExec
+ let schema = create_schema();
+ let empty = empty_exec(Arc::clone(&schema));
+
+ let inner_ordering: LexOrdering = [PhysicalSortExpr {
+ expr: col("c2", &schema)?,
+ options: SortOptions::default(),
+ }]
+ .into();
+ let inner_sort = sort_exec(inner_ordering, empty);
+ let inner_limit = global_limit_exec(inner_sort, 0, Some(8));
+
+ let outer_ordering: LexOrdering = [PhysicalSortExpr {
+ expr: col("c1", &schema)?,
+ options: SortOptions {
+ descending: true,
+ nulls_first: false,
Review Comment:
maybe worth calling out that this is a different sort order
##########
datafusion/physical-optimizer/src/limit_pushdown.rs:
##########
@@ -375,6 +375,14 @@ pub(crate) fn pushdown_limits(
(new_node, global_state) = pushdown_limit_helper(new_node.data,
global_state)?;
}
+ // Once a limit has been materialized above the current node, child
+ // subtrees should not inherit its `skip`. Keep `fetch`, but clear
+ // `skip` before recursing so child-local limits are not merged with
+ // an `OFFSET` that has already been applied.
Review Comment:
I am surprised this doesn't need to check for the actual sort keys too 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]