Hi David,
Please find attached patch with addressed issues mentioned before.
Things resolved:
1. Correct position of window function from where order by push down can
happen
is determined, this fixes issue mentioned in case #1.
While writing test cases, I found that optimization do not happen for
case #1
(which is prime candidate for such operation) like
EXPLAIN (COSTS OFF)
SELECT empno,
depname,
min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
sum(salary) OVER (PARTITION BY depname) depsalary
FROM empsalary
ORDER BY depname, empno, enroll_date
2. Point #2 as in above discussions
a) looks like the best plan to me. What's the point of pushing the
sort below the WindowAgg in this case? The point of this optimisation
is to reduce the number of sorts not to push them as deep into the
plan as possible. We should only be pushing them down when it can
reduce the number of sorts. There's no reduction in the number of
sorts in the above plan.
Works as mentioned.
All test cases (newly added and existing ones) are green.
--
Regards,
Ankit Kumar Pandey
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 000d757bdd..fd3fcd43a0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4423,6 +4423,12 @@ create_one_window_path(PlannerInfo *root,
PathTarget *window_target;
ListCell *l;
List *topqual = NIL;
+ bool has_runcondition = false;
+
+ List *window_pathkeys;
+ int index = 0;
+ int i;
+ bool enable_order_by_pushdown = false;
/*
* Since each window clause could require a different sort order, we stack
@@ -4441,10 +4447,45 @@ create_one_window_path(PlannerInfo *root,
*/
window_target = input_target;
+ enable_order_by_pushdown = (root->parse->distinctClause == NIL &&
+ root->parse->sortClause != NIL &&
+ root->parse->limitCount == NULL &&
+ !contain_window_function((Node *) get_sortgrouplist_exprs(root->parse->sortClause,
+ root->processed_tlist)));
+
+
+ i=0;
+ if (enable_order_by_pushdown)
+ {
+ /*
+ * Search for the window function whose path keys are
+ * subset of orderby_path keys, this allows us to perform
+ * order by pushdown from this position of window function onwards
+ */
+ foreach(l, activeWindows)
+ {
+ List *orderby_pathkeys;
+ WindowClause *wc = lfirst_node(WindowClause, l);
+ orderby_pathkeys = make_pathkeys_for_sortclauses(root,
+ root->parse->sortClause,
+ root->processed_tlist);
+ window_pathkeys = make_pathkeys_for_window(root,
+ wc,
+ root->processed_tlist);
+ if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys))
+ {
+
+ index = i;
+ break;
+ }
+ i++;
+ }
+ }
+
+ i=0;
foreach(l, activeWindows)
{
WindowClause *wc = lfirst_node(WindowClause, l);
- List *window_pathkeys;
int presorted_keys;
bool is_sorted;
bool topwindow;
@@ -4457,6 +4498,38 @@ create_one_window_path(PlannerInfo *root,
path->pathkeys,
&presorted_keys);
+ has_runcondition |= (wc->runCondition != NIL);
+
+ /*
+ * If not sorted already and the query has an ORDER BY clause, then we
+ * try to push the entire ORDER BY below the final WindowAgg. We
+ * don't do this when the query has a DISTINCT clause as the planner
+ * might opt to do a Hash Aggregate and spoil our efforts here. We
+ * also can't do this when the ORDER BY contains any WindowFuncs as we
+ * obviously can't sort something that's yet to be evaluated. We also
+ * don't bother with this when there is a WindowClauses with a
+ * runCondition as those can reduce the number of rows to sort in the
+ * ORDER BY and we don't want add complexity to the WindowAgg sort if
+ * the final ORDER BY sort is going to have far fewer rows to sort.
+ * For the same reason, don't bother if the query has a LIMIT clause.
+ */
+ if (enable_order_by_pushdown && !is_sorted && !has_runcondition && (i == index))
+ {
+ List *orderby_pathkeys;
+
+ orderby_pathkeys = make_pathkeys_for_sortclauses(root,
+ root->parse->sortClause,
+ root->processed_tlist);
+
+ if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys))
+ window_pathkeys = orderby_pathkeys;
+
+ is_sorted = pathkeys_count_contained_in(window_pathkeys,
+ path->pathkeys,
+ &presorted_keys);
+ }
+ i++;
+
/* Sort if necessary */
if (!is_sorted)
{
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 90e89fb5b6..08663fd914 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3982,7 +3982,194 @@ ORDER BY depname, empno;
-> Seq Scan on empsalary
(11 rows)
+-- Do not perform additional sort if column is presorted
+CREATE INDEX depname_idx ON empsalary(depname);
+SET enable_seqscan=0;
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-------------------------------------------------------
+ Incremental Sort
+ Sort Key: depname, empno
+ Presorted Key: depname
+ -> WindowAgg
+ -> Index Scan using depname_idx on empsalary
+(5 rows)
+
+-- Do perform sort pushdown if columns are only partially sorted
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname, empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+-------------------------------------------------------
+ WindowAgg
+ -> Incremental Sort
+ Sort Key: depname, empno, enroll_date
+ Presorted Key: depname
+ -> Index Scan using depname_idx on empsalary
+(5 rows)
+
+DROP INDEX depname_idx;
+RESET enable_seqscan;
RESET enable_hashagg;
+-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
+-- QUERY's ORDER BY to save additional sorting in the end
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (ORDER BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-----------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, empno
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Same as above, but PARTITION BY clause is included as well
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+-----------------------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, empno, enroll_date
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Negation of above, when sort optimization is not needed to be performed
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY empno, enroll_date;
+ QUERY PLAN
+-----------------------------------------
+ Sort
+ Sort Key: empno, enroll_date
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname, empno
+ -> Seq Scan on empsalary
+(6 rows)
+
+-- ORDER BY's in multiple Window functions can be combined into one
+-- if they are subset of QUERY's ORDER BY
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+------------------------------------------------------
+ WindowAgg
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname, empno, enroll_date
+ -> WindowAgg
+ -> Sort
+ Sort Key: enroll_date DESC
+ -> Seq Scan on empsalary
+(8 rows)
+
+-- order of window function shouldn't affect query's ORDER BY pushdown
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ count(*) OVER (ORDER BY enroll_date DESC) c,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+ QUERY PLAN
+------------------------------------------------------
+ WindowAgg
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname, empno, enroll_date
+ -> WindowAgg
+ -> Sort
+ Sort Key: enroll_date DESC
+ -> Seq Scan on empsalary
+(8 rows)
+
+-- SORT OPTIMIZATION (as in cases above) is not needed to be performed
+-- in cases, as mentioned below.
+-- Case #1: When distinct clause is included
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------
+ Unique
+ -> Sort
+ Sort Key: depname, empno, (min(salary) OVER (?)), (sum(salary) OVER (?)), (count(*) OVER (?))
+ -> WindowAgg
+ -> Sort
+ Sort Key: depname
+ -> WindowAgg
+ -> Sort
+ Sort Key: enroll_date DESC
+ -> Seq Scan on empsalary
+(10 rows)
+
+-- Case #2: When Limit clause is specified
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY empno) depsalary
+FROM empsalary
+ORDER BY empno, depname LIMIT 1;
+ QUERY PLAN
+-----------------------------------------------
+ Limit
+ -> Incremental Sort
+ Sort Key: empno, depname
+ Presorted Key: empno
+ -> WindowAgg
+ -> Sort
+ Sort Key: empno
+ -> Seq Scan on empsalary
+(8 rows)
+
+-- Case #3: When ORDER BY contains any WindowFuncs
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ row_number() over (PARTITION BY empno) esalary
+FROM empsalary
+ORDER BY empno, esalary;
+ QUERY PLAN
+--------------------------------------------
+ Incremental Sort
+ Sort Key: empno, (row_number() OVER (?))
+ Presorted Key: empno
+ -> WindowAgg
+ -> Sort
+ Sort Key: empno
+ -> Seq Scan on empsalary
+(7 rows)
+
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index b7bd0a83da..274b7fed7d 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -1324,8 +1324,102 @@ SELECT DISTINCT
FROM empsalary
ORDER BY depname, empno;
+-- Do not perform additional sort if column is presorted
+CREATE INDEX depname_idx ON empsalary(depname);
+SET enable_seqscan=0;
+
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+
+-- Do perform sort pushdown if columns are only partially sorted
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ min(salary) OVER (PARTITION BY depname, empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+DROP INDEX depname_idx;
+RESET enable_seqscan;
RESET enable_hashagg;
+-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as
+-- QUERY's ORDER BY to save additional sorting in the end
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (ORDER BY depname) depminsalary
+FROM empsalary
+ORDER BY depname, empno;
+
+-- Same as above, but PARTITION BY clause is included as well
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+-- Negation of above, when sort optimization is not needed to be performed
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY empno, enroll_date;
+
+-- ORDER BY's in multiple Window functions can be combined into one
+-- if they are subset of QUERY's ORDER BY
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+-- order of window function shouldn't affect query's ORDER BY pushdown
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ count(*) OVER (ORDER BY enroll_date DESC) c,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary
+FROM empsalary
+ORDER BY depname, empno, enroll_date;
+
+-- SORT OPTIMIZATION (as in cases above) is not needed to be performed
+-- in cases, as mentioned below.
+
+-- Case #1: When distinct clause is included
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT empno,
+ depname,
+ min(salary) OVER (PARTITION BY depname) depminsalary,
+ sum(salary) OVER (PARTITION BY depname) depsalary,
+ count(*) OVER (ORDER BY enroll_date DESC) c
+FROM empsalary
+ORDER BY depname, empno;
+
+-- Case #2: When Limit clause is specified
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ depname,
+ min(salary) OVER (PARTITION BY empno) depminsalary,
+ sum(salary) OVER (PARTITION BY empno) depsalary
+FROM empsalary
+ORDER BY empno, depname LIMIT 1;
+
+-- Case #3: When ORDER BY contains any WindowFuncs
+EXPLAIN (COSTS OFF)
+SELECT empno,
+ row_number() over (PARTITION BY empno) esalary
+FROM empsalary
+ORDER BY empno, esalary;
+
-- Test Sort node reordering
EXPLAIN (COSTS OFF)
SELECT
--
2.37.2