From 2d9798207cdbb1bbe7c1858e36d69bdb17103ecf Mon Sep 17 00:00:00 2001
From: Tatsuro Yamada <yamatattsu@gmail.com>
Date: Thu, 5 Jun 2025 18:50:34 +0900
Subject: [PATCH] Add new GUC parameter: enable_groupagg

Previously, there was no GUC parameter to control the use of
GroupAggregate, so we couldn't influence the planner's choice
in certain queries.
This patch adds a new parameter, "enable_groupagg", which
allows users to enable or disable GroupAggregate explicitly.

By disabling GroupAggregate, the planner may choose
HashAggregate instead, potentially resulting in a more
efficient execution plan for some queries.
---
 doc/src/sgml/config.sgml                      |  14 ++
 src/backend/optimizer/path/costsize.c         |   3 +
 src/backend/utils/misc/guc_tables.c           |  10 ++
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/optimizer/cost.h                  |   1 +
 src/test/regress/expected/aggregates.out      | 120 ++++++++++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  81 ++++++++++++
 8 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 021153b2a5f..edd0f3a13b8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5515,6 +5515,20 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-groupagg" xreflabel="enable_groupagg">
+      <term><varname>enable_groupagg</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_groupagg</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's use of grouped
+        aggregation plan types. The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-hashjoin" xreflabel="enable_hashjoin">
       <term><varname>enable_hashjoin</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..80c68008d85 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -150,6 +150,7 @@ bool		enable_tidscan = true;
 bool		enable_sort = true;
 bool		enable_incremental_sort = true;
 bool		enable_hashagg = true;
+bool		enable_groupagg = true;
 bool		enable_nestloop = true;
 bool		enable_material = true;
 bool		enable_memoize = true;
@@ -2737,6 +2738,8 @@ cost_agg(Path *path, PlannerInfo *root,
 		/* Here we are able to deliver output on-the-fly */
 		startup_cost = input_startup_cost;
 		total_cost = input_total_cost;
+		if (aggstrategy == AGG_SORTED && !enable_groupagg && enable_hashagg)
+			++disabled_nodes;
 		if (aggstrategy == AGG_MIXED && !enable_hashagg)
 			++disabled_nodes;
 		/* calcs phrased this way to match HASHED case, see note above */
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f04bfedb2fd..a17b7fb1ba1 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -869,6 +869,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_groupagg", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner's use of grouped aggregation plans."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_groupagg,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"enable_material", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of materialization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 341f88adc87..0514c327767 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -408,6 +408,7 @@
 #enable_bitmapscan = on
 #enable_gathermerge = on
 #enable_hashagg = on
+#enable_groupagg = on
 #enable_hashjoin = on
 #enable_incremental_sort = on
 #enable_indexscan = on
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d397fe27dc1..099d41fd7bd 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -57,6 +57,7 @@ extern PGDLLIMPORT bool enable_tidscan;
 extern PGDLLIMPORT bool enable_sort;
 extern PGDLLIMPORT bool enable_incremental_sort;
 extern PGDLLIMPORT bool enable_hashagg;
+extern PGDLLIMPORT bool enable_groupagg;
 extern PGDLLIMPORT bool enable_nestloop;
 extern PGDLLIMPORT bool enable_material;
 extern PGDLLIMPORT bool enable_memoize;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f1ce2380af..0911cb33c0a 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -3581,3 +3581,123 @@ drop table agg_hash_1;
 drop table agg_hash_2;
 drop table agg_hash_3;
 drop table agg_hash_4;
+-- create table to check enable_groupagg
+CREATE TABLE test_groupagg(
+    id serial primary key,
+    c1 text,
+    c2 text,
+    c3 numeric);
+INSERT INTO test_groupagg (c1, c2, c3) VALUES
+('a', 'GGG', 100),
+('a', 'GGG', 150),
+('a', 'rrr', 200),
+('b', 'ooo', 300),
+('b', 'ooo', 250),
+('b', 'uuu', 100),
+('c', 'ppp', 500),
+('c', 'ppp', 600),
+('c', 'aaa', 550);
+ANALYZE;
+-- default: GroupAgg and HashAgg are mixed and selected
+SET max_parallel_workers to 0;
+SET max_parallel_workers_per_gather to 0;
+SET enable_hashagg  to default;
+SET enable_groupagg to default;
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: sub.c1
+   ->  Sort
+         Sort Key: sub.c1
+         ->  Subquery Scan on sub
+               ->  HashAggregate
+                     Group Key: test_groupagg.c1, test_groupagg.c2
+                     ->  Seq Scan on test_groupagg
+ Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0'
+(9 rows)
+
+-- Only GroupAgg is selected
+set enable_hashagg to off;
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: test_groupagg.c1
+   ->  GroupAggregate
+         Group Key: test_groupagg.c1, test_groupagg.c2
+         ->  Sort
+               Sort Key: test_groupagg.c1, test_groupagg.c2
+               ->  Seq Scan on test_groupagg
+ Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_hashagg = 'off'
+(8 rows)
+
+-- Only HashAgg is selected
+SET enable_hashagg  to on;
+SET enable_groupagg to off;
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: test_groupagg.c1
+   ->  HashAggregate
+         Group Key: test_groupagg.c1
+         ->  HashAggregate
+               Group Key: test_groupagg.c1, test_groupagg.c2
+               ->  Seq Scan on test_groupagg
+ Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_groupagg = 'off'
+(8 rows)
+
+-- Only GroupAgg is selected as a fallback
+SET enable_hashagg  to off;
+SET enable_groupagg to off;
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+                                                          QUERY PLAN                                                          
+------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: test_groupagg.c1
+   ->  GroupAggregate
+         Group Key: test_groupagg.c1, test_groupagg.c2
+         ->  Sort
+               Sort Key: test_groupagg.c1, test_groupagg.c2
+               ->  Seq Scan on test_groupagg
+ Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_hashagg = 'off', enable_groupagg = 'off'
+(8 rows)
+
+RESET enable_hashagg;
+RESET enable_groupagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+DROP TABLE test_groupagg;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 83228cfca29..f10371d6e26 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -153,6 +153,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_distinct_reordering     | on
  enable_gathermerge             | on
  enable_group_by_reordering     | on
+ enable_groupagg                | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -172,7 +173,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(24 rows)
+(25 rows)
 
 -- There are always wait event descriptions for various types.  InjectionPoint
 -- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 277b4b198cc..92493ead1f9 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1644,3 +1644,84 @@ drop table agg_hash_1;
 drop table agg_hash_2;
 drop table agg_hash_3;
 drop table agg_hash_4;
+
+-- create table to check enable_groupagg
+CREATE TABLE test_groupagg(
+    id serial primary key,
+    c1 text,
+    c2 text,
+    c3 numeric);
+INSERT INTO test_groupagg (c1, c2, c3) VALUES
+('a', 'GGG', 100),
+('a', 'GGG', 150),
+('a', 'rrr', 200),
+('b', 'ooo', 300),
+('b', 'ooo', 250),
+('b', 'uuu', 100),
+('c', 'ppp', 500),
+('c', 'ppp', 600),
+('c', 'aaa', 550);
+ANALYZE;
+
+-- default: GroupAgg and HashAgg are mixed and selected
+SET max_parallel_workers to 0;
+SET max_parallel_workers_per_gather to 0;
+SET enable_hashagg  to default;
+SET enable_groupagg to default;
+
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+
+-- Only GroupAgg is selected
+set enable_hashagg to off;
+
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+
+-- Only HashAgg is selected
+SET enable_hashagg  to on;
+SET enable_groupagg to off;
+
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+
+-- Only GroupAgg is selected as a fallback
+SET enable_hashagg  to off;
+SET enable_groupagg to off;
+
+EXPLAIN(costs off, settings)
+SELECT c1, AVG(total)
+FROM (
+    SELECT c1, c2, SUM(c3) AS total
+    FROM test_groupagg
+    GROUP BY c1, c2
+) AS sub
+GROUP BY c1
+ORDER BY c1;
+
+RESET enable_hashagg;
+RESET enable_groupagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+DROP TABLE test_groupagg;
-- 
2.43.5

