Re: [PATCH] GROUP BY ALL

Tom Lane Fri, 26 Sep 2025 13:38:24 -0700

[ trimming the cc: list because gmail decided my last was spam ]

David Christensen <[email protected]> writes:
> Great feedback, thanks; attached is v4 which should address these comments.


I did a pass of cleanup over this --- mostly cosmetic, but not
entirely.  Along the way I discovered a pre-existing bug:
transformSelectStmt does
        qry->groupDistinct = stmt->groupDistinct;
but transformPLAssignStmt fails to, with the result that GROUP BY
DISTINCT would misbehave if used in a plpgsql "expression" context.
I'm not hugely surprised that no one has reported that from the
field, but nonetheless it's broken.  In the attached v5 I just
quickly added the missing line and moved on, but we'll need to
back-patch that bit.  (Maybe we'd be well advised to refactor to
reduce the amount of duplicated code that needs to be kept in sync?)

I have not attempted to address the definitional issues I just
queried Peter about.  Other open items:

* Documentation

* The test cases deserve reconsideration now that we think their
charter only goes as far as EXPLAIN'ing the results; some of them
seem pretty redundant in this context.

                        regards, tom lane

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..fe632d3dabf 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 
 	qry->groupClause = transformGroupClause(pstate,
 											stmt->groupClause,
+											stmt->groupAll,
 											&qry->groupingSets,
 											&qry->targetList,
 											qry->sortClause,
 											EXPR_KIND_GROUP_BY,
 											false /* allow SQL92 rules */ );
 	qry->groupDistinct = stmt->groupDistinct;
+	qry->groupAll = stmt->groupAll;
 
 	if (stmt->distinctClause == NIL)
 	{
@@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt)
 
 	qry->groupClause = transformGroupClause(pstate,
 											sstmt->groupClause,
+											sstmt->groupAll,
 											&qry->groupingSets,
 											&qry->targetList,
 											qry->sortClause,
 											EXPR_KIND_GROUP_BY,
 											false /* allow SQL92 rules */ );
+	qry->groupDistinct = sstmt->groupDistinct;
+	qry->groupAll = sstmt->groupAll;
 
 	if (sstmt->distinctClause == NIL)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
 typedef struct GroupClause
 {
 	bool		distinct;
+	bool		all;
 	List	   *list;
 } GroupClause;
 
@@ -12993,6 +12994,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
 					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
 
 					n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+					n->all = false;
 					n->list = $4;
 					$$ = n;
 				}
+			| GROUP_P BY ALL
+				{
+					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+					n->distinct = false;
+					n->all = true;
+					n->list = NIL;
+					$$ = n;
+				}
 			| /*EMPTY*/
 				{
 					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
 
 					n->distinct = false;
+					n->all = false;
 					n->list = NIL;
 					$$ = n;
 				}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
 					n->whereClause = $4;
 					n->groupClause = ($5)->list;
 					n->groupDistinct = ($5)->distinct;
+					n->groupAll = ($5)->all;
 					n->havingClause = $6;
 					n->windowClause = $7;
 					n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..562e3e90e34 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetlist.
+ *
  * This is also used for window PARTITION BY clauses (which act almost the
  * same, but are always interpreted per SQL99 rules).
  *
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
  *
  * pstate		ParseState
  * grouplist	clause to transform
+ * groupByAll	is this a GROUP BY ALL statement?
  * groupingSets reference to list to contain the grouping set tree
  * targetlist	reference to TargetEntry list
  * sortClause	ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
  * useSQL99		SQL99 rather than SQL92 syntax
  */
 List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+					 List **groupingSets,
 					 List **targetlist, List *sortClause,
 					 ParseExprKind exprKind, bool useSQL99)
 {
@@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
 	bool		hasGroupingSets = false;
 	Bitmapset  *seen_local = NULL;
 
+	/* Handle GROUP BY ALL */
+	if (groupByAll)
+	{
+		/* There cannot have been any explicit list items */
+		Assert(grouplist == NIL);
+
+		/*
+		 * Iterate over targets, any non-aggregate gets added as a Target.
+		 * Note that it's not enough to check for a top-level Aggref; we need
+		 * to ensure that any sub-expression here does not include an Aggref
+		 * (for instance an expression such as `sum(col) + 4` should not be
+		 * added as a grouping target).
+		 *
+		 * We specify the parse location as the TLE's location, despite the
+		 * comment for addTargetToGroupList discouraging that.  The only other
+		 * thing we could point to is the ALL keyword, which seems unhelpful
+		 * when there are multiple TLEs.
+		 */
+		foreach_ptr(TargetEntry, tle, *targetlist)
+		{
+			/* Ignore junk TLEs */
+			if (tle->resjunk)
+				continue;
+
+			/* Use contain_aggs_of_level to detect Aggrefs in SubLinks */
+			if (!(pstate->p_hasAggs &&
+				  contain_aggs_of_level((Node *) tle->expr, 0)))
+				result = addTargetToGroupList(pstate, tle,
+											  result, *targetlist,
+											  exprLocation((Node *) tle->expr));
+		}
+
+		return result;
+	}
+
 	/*
 	 * Recursively flatten implicit RowExprs. (Technically this is only needed
 	 * for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate,
 										  true /* force SQL99 rules */ );
 		partitionClause = transformGroupClause(pstate,
 											   windef->partitionClause,
+											   false /* not GROUP BY ALL */ ,
 											   NULL,
 											   targetlist,
 											   orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
 		save_ingroupby = context->inGroupBy;
 		context->inGroupBy = true;
 
-		if (query->groupingSets == NIL)
+		if (query->groupAll)
+			appendStringInfoString(buf, "ALL");
+		else if (query->groupingSets == NIL)
 		{
 			sep = "";
 			foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4ed14fc5b78..d379879ae80 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
 	List	   *returningList;	/* return-values list (of TargetEntry) */
 
 	List	   *groupClause;	/* a list of SortGroupClause's */
-	bool		groupDistinct;	/* is the group by clause distinct? */
+	bool		groupDistinct;	/* was GROUP BY DISTINCT used? */
+	bool		groupAll;		/* was GROUP BY ALL used? */
 
 	List	   *groupingSets;	/* a list of GroupingSet's if present */
 
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
 	Node	   *whereClause;	/* WHERE qualification */
 	List	   *groupClause;	/* GROUP BY clauses */
 	bool		groupDistinct;	/* Is this GROUP BY DISTINCT? */
+	bool		groupAll;		/* Is this GROUP BY ALL? */
 	Node	   *havingClause;	/* HAVING conditional-expression */
 	List	   *windowClause;	/* WINDOW window_name AS (...), ... */
 
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
 								  ParseExprKind exprKind, const char *constructName,
 								  LimitOption limitOption);
 extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+								  bool groupByAll,
 								  List **groupingSets,
 								  List **targetlist, List *sortClause,
 								  ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..aeeb452ec26 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,153 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 --
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: (a + b)
+   ->  Seq Scan on t1
+(3 rows)
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ Aggregate
+   ->  Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t1
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t1
+(3 rows)
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR:  column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+                                               ^
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   InitPlan 1
+     ->  Limit
+           ->  Seq Scan on t1 t1_1
+   ->  Seq Scan on t1
+(5 rows)
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b  FROM a_query GROUP BY ALL;
+         QUERY PLAN         
+----------------------------
+ HashAggregate
+   Group Key: sum(t1.b)
+   ->  HashAggregate
+         Group Key: t1.a
+         ->  Seq Scan on t1
+(5 rows)
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE:  view "v1" will be a temporary view
+SELECT pg_get_viewdef('v1'::regclass);
+    pg_get_viewdef     
+-----------------------
+  SELECT b,           +
+     count(*) AS count+
+    FROM t1           +
+   GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
 create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..6b2516cd9b1 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,68 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 
+
+--
+-- Test GROUP BY ALL
+--
+
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b  FROM a_query GROUP BY ALL;
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
 --
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --

Re: [PATCH] GROUP BY ALL

Reply via email to