From e7e6a96b2023efde6f7b841055e15731a83972bd Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Tue, 10 Sep 2024 18:09:44 +0900
Subject: [PATCH v1] Allow pushdown of HAVING clauses with grouping sets

In some cases, we may want to transfer a HAVING clause into WHERE in
hopes of eliminating tuples before aggregation instead of after.

Previously, we couldn't do this if there were any nonempty grouping
sets, because we didn't have a way to tell if the HAVING clause
referenced any columns that were nullable by the grouping sets, and
moving such a clause into WHERE could potentially change the results.

Now, with expressions marked nullable by grouping sets with the RT
index of the RTE_GROUP RTE, it is much easier to identify those
clauses that reference any nullable-by-grouping-sets columns: we just
need to check if the RT index of the RTE_GROUP RTE is present in the
clause.  For other HAVING clauses, they can be safely pushed down.
---
 src/backend/optimizer/plan/planner.c       | 17 +++++++++--------
 src/test/regress/expected/groupingsets.out | 21 +++++++++++++++++++++
 src/test/regress/sql/groupingsets.sql      |  5 +++++
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index df35d1ff9c..eb60dcd9a6 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1041,10 +1041,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
 	 * cannot do so if the HAVING clause contains aggregates (obviously) or
 	 * volatile functions (since a HAVING clause is supposed to be executed
 	 * only once per group).  We also can't do this if there are any nonempty
-	 * grouping sets; moving such a clause into WHERE would potentially change
-	 * the results, if any referenced column isn't present in all the grouping
-	 * sets.  (If there are only empty grouping sets, then the HAVING clause
-	 * must be degenerate as discussed below.)
+	 * grouping sets and the clause references any columns that are nullable
+	 * by the grouping sets; moving such a clause into WHERE would potentially
+	 * change the results.  (If there are only empty grouping sets, then the
+	 * HAVING clause must be degenerate as discussed below.)
 	 *
 	 * Also, it may be that the clause is so expensive to execute that we're
 	 * better off doing it only once per group, despite the loss of
@@ -1082,15 +1082,16 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
 	{
 		Node	   *havingclause = (Node *) lfirst(l);
 
-		if ((parse->groupClause && parse->groupingSets) ||
-			contain_agg_clause(havingclause) ||
+		if (contain_agg_clause(havingclause) ||
 			contain_volatile_functions(havingclause) ||
-			contain_subplans(havingclause))
+			contain_subplans(havingclause) ||
+			(parse->groupClause && parse->groupingSets &&
+			 bms_is_member(root->group_rtindex, pull_varnos(root, havingclause))))
 		{
 			/* keep it in HAVING */
 			newHaving = lappend(newHaving, havingclause);
 		}
-		else if (parse->groupClause && !parse->groupingSets)
+		else if (parse->groupClause)
 		{
 			Node	   *whereclause;
 
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 717383c4f3..d7c9b44605 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -860,6 +860,27 @@ explain (costs off)
                        ->  Seq Scan on gstest2
 (10 rows)
 
+-- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets
+explain (costs off)
+select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
+           QUERY PLAN            
+---------------------------------
+ GroupAggregate
+   Group Key: a, b
+   Group Key: a
+   Filter: (b > 1)
+   ->  Sort
+         Sort Key: a, b
+         ->  Seq Scan on gstest2
+               Filter: (a > 1)
+(8 rows)
+
+select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
+ a | b | count 
+---+---+-------
+ 2 | 2 |     1
+(1 row)
+
 -- HAVING with GROUPING queries
 select ten, grouping(ten) from onek
 group by grouping sets(ten) having grouping(ten) >= 0
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 660ca33efc..21cd312194 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -279,6 +279,11 @@ explain (costs off)
   select v.c, (select count(*) from gstest2 group by () having v.c)
     from (values (false),(true)) v(c) order by v.c;
 
+-- test pushdown of HAVING clause that does not reference any columns that are nullable by grouping sets
+explain (costs off)
+select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
+select a, b, count(*) from gstest2 group by grouping sets ((a, b), (a)) having a > 1 and b > 1;
+
 -- HAVING with GROUPING queries
 select ten, grouping(ten) from onek
 group by grouping sets(ten) having grouping(ten) >= 0
-- 
2.43.0