[ trimming the cc: list because gmail decided my last was spam ]
David Christensen <[email protected]> writes:
> Great feedback, thanks; attached is v4 which should address these comments.
I did a pass of cleanup over this --- mostly cosmetic, but not
entirely. Along the way I discovered a pre-existing bug:
transformSelectStmt does
qry->groupDistinct = stmt->groupDistinct;
but transformPLAssignStmt fails to, with the result that GROUP BY
DISTINCT would misbehave if used in a plpgsql "expression" context.
I'm not hugely surprised that no one has reported that from the
field, but nonetheless it's broken. In the attached v5 I just
quickly added the missing line and moved on, but we'll need to
back-patch that bit. (Maybe we'd be well advised to refactor to
reduce the amount of duplicated code that needs to be kept in sync?)
I have not attempted to address the definitional issues I just
queried Peter about. Other open items:
* Documentation
* The test cases deserve reconsideration now that we think their
charter only goes as far as EXPLAIN'ing the results; some of them
seem pretty redundant in this context.
regards, tom lane
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..fe632d3dabf 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
+ stmt->groupAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
@@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt)
qry->groupClause = transformGroupClause(pstate,
sstmt->groupClause,
+ sstmt->groupAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
+ qry->groupDistinct = sstmt->groupDistinct;
+ qry->groupAll = sstmt->groupAll;
if (sstmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->all = false;
n->list = $4;
$$ = n;
}
+ | GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->all = true;
+ n->list = NIL;
+ $$ = n;
+ }
| /*EMPTY*/
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = false;
+ n->all = false;
n->list = NIL;
$$ = n;
}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..562e3e90e34 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetlist.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
*
* pstate ParseState
* grouplist clause to transform
+ * groupByAll is this a GROUP BY ALL statement?
* groupingSets reference to list to contain the grouping set tree
* targetlist reference to TargetEntry list
* sortClause ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
* useSQL99 SQL99 rather than SQL92 syntax
*/
List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+ List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99)
{
@@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
bool hasGroupingSets = false;
Bitmapset *seen_local = NULL;
+ /* Handle GROUP BY ALL */
+ if (groupByAll)
+ {
+ /* There cannot have been any explicit list items */
+ Assert(grouplist == NIL);
+
+ /*
+ * Iterate over targets, any non-aggregate gets added as a Target.
+ * Note that it's not enough to check for a top-level Aggref; we need
+ * to ensure that any sub-expression here does not include an Aggref
+ * (for instance an expression such as `sum(col) + 4` should not be
+ * added as a grouping target).
+ *
+ * We specify the parse location as the TLE's location, despite the
+ * comment for addTargetToGroupList discouraging that. The only other
+ * thing we could point to is the ALL keyword, which seems unhelpful
+ * when there are multiple TLEs.
+ */
+ foreach_ptr(TargetEntry, tle, *targetlist)
+ {
+ /* Ignore junk TLEs */
+ if (tle->resjunk)
+ continue;
+
+ /* Use contain_aggs_of_level to detect Aggrefs in SubLinks */
+ if (!(pstate->p_hasAggs &&
+ contain_aggs_of_level((Node *) tle->expr, 0)))
+ result = addTargetToGroupList(pstate, tle,
+ result, *targetlist,
+ exprLocation((Node *) tle->expr));
+ }
+
+ return result;
+ }
+
/*
* Recursively flatten implicit RowExprs. (Technically this is only needed
* for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate,
true /* force SQL99 rules */ );
partitionClause = transformGroupClause(pstate,
windef->partitionClause,
+ false /* not GROUP BY ALL */ ,
NULL,
targetlist,
orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupAll)
+ appendStringInfoString(buf, "ALL");
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4ed14fc5b78..d379879ae80 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
- bool groupDistinct; /* is the group by clause distinct? */
+ bool groupDistinct; /* was GROUP BY DISTINCT used? */
+ bool groupAll; /* was GROUP BY ALL used? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
ParseExprKind exprKind, const char *constructName,
LimitOption limitOption);
extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+ bool groupByAll,
List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..aeeb452ec26 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,153 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b
+ -> Seq Scan on t1
+(3 rows)
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: (a + b)
+ -> Seq Scan on t1
+(3 rows)
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ -> Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------
+ Seq Scan on t1
+(1 row)
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+ ^
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+-----------------------------------
+ GroupAggregate
+ InitPlan 1
+ -> Limit
+ -> Seq Scan on t1 t1_1
+ -> Seq Scan on t1
+(5 rows)
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+ QUERY PLAN
+----------------------------
+ HashAggregate
+ Group Key: sum(t1.b)
+ -> HashAggregate
+ Group Key: t1.a
+ -> Seq Scan on t1
+(5 rows)
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE: view "v1" will be a temporary view
+SELECT pg_get_viewdef('v1'::regclass);
+ pg_get_viewdef
+-----------------------
+ SELECT b, +
+ count(*) AS count+
+ FROM t1 +
+ GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..6b2516cd9b1 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,68 @@ drop table t2;
drop table t3;
drop table p_t1;
+
+--
+-- Test GROUP BY ALL
+--
+
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--