GROUP BY DISTINCT

Vik Fearing Sun, 21 Feb 2021 04:52:48 -0800

When combining multiple grouping items, such as rollups and cubes, the
resulting flattened grouping sets can contain duplicate items.  The
standard provides for this by allowing GROUP BY DISTINCT to deduplicate
them prior to doing the actual work.


For example:
GROUP BY ROLLUP (a,b), ROLLUP (a,c)

expands to the sets:
(a,b,c), (a,b), (a,b), (a,c), (a), (a), (a,c), (a), ()

but:
GROUP BY DISTINCT ROLLUP (a,b), ROLLUP (a,c)

expands to just the sets:
(a,b,c), (a,b), (a,c), (a), ()

Attached is a patch to implement this for PostgreSQL.
-- 
Vik Fearing

>From cfb7fc82dd4da1ea0b985ab4b909fac100acc130 Mon Sep 17 00:00:00 2001
From: Vik Fearing <v...@postgresfriends.org>
Date: Sun, 21 Feb 2021 10:26:57 +0100
Subject: [PATCH] implement GROUP BY DISTINCT

---
 doc/src/sgml/queries.sgml                  |  41 ++++++++
 doc/src/sgml/ref/select.sgml               |   9 +-
 src/backend/catalog/sql_features.txt       |   2 +-
 src/backend/nodes/copyfuncs.c              |   2 +
 src/backend/nodes/equalfuncs.c             |   2 +
 src/backend/nodes/list.c                   |  16 +++
 src/backend/nodes/outfuncs.c               |   2 +
 src/backend/nodes/readfuncs.c              |   1 +
 src/backend/optimizer/plan/planner.c       |   2 +-
 src/backend/parser/analyze.c               |   1 +
 src/backend/parser/gram.y                  |  22 ++--
 src/backend/parser/parse_agg.c             |  58 ++++++++++-
 src/include/nodes/parsenodes.h             |   2 +
 src/include/nodes/pg_list.h                |   1 +
 src/include/parser/parse_agg.h             |   2 +-
 src/test/regress/expected/groupingsets.out | 111 +++++++++++++++++++++
 src/test/regress/sql/groupingsets.sql      |  26 +++++
 17 files changed, 284 insertions(+), 16 deletions(-)

diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 4741506eb5..3c8facff87 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1372,6 +1372,47 @@ GROUP BY GROUPING SETS (
 </programlisting>
    </para>
 
+   <para>
+    When specifying multiple grouping items together, the final set of grouping
+    sets might contain duplicates. For example:
+<programlisting>
+GROUP BY ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+    is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+    (a, b, c),
+    (a, b),
+    (a, b),
+    (a, c),
+    (a),
+    (a),
+    (a, c),
+    (a),
+    ()
+)
+</programlisting>
+    If these duplicates are undesirable, they can be removed using the
+    <literal>DISTINCT</literal> clause directly on the <literal>GROUP BY</literal>.
+    Therefore:
+<programlisting>
+GROUP BY <emphasis>DISTINCT</emphasis> ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+    is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+    (a, b, c),
+    (a, b),
+    (a, c),
+    (a),
+    ()
+)
+</programlisting>
+    This is not the same as using <literal>SELECT DISTINCT</literal> because the output
+    rows may still contain duplicates.  If any of the ungrouped columns contains NULL,
+    it will be indistiguishable from the NULL used when that same column is grouped.
+   </para>
+
   <note>
    <para>
     The construct <literal>(a, b)</literal> is normally recognized in expressions as
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index eb8b524951..0e2b2bef49 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
     [ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
     [ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ]
     [ WHERE <replaceable class="parameter">condition</replaceable> ]
-    [ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
+    [ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
     [ HAVING <replaceable class="parameter">condition</replaceable> ]
     [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
     [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ]
@@ -776,7 +776,7 @@ WHERE <replaceable class="parameter">condition</replaceable>
    <para>
     The optional <literal>GROUP BY</literal> clause has the general form
 <synopsis>
-GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
+GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
 </synopsis>
    </para>
 
@@ -800,7 +800,10 @@ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
     independent <replaceable>grouping sets</replaceable>.  The effect of this is
     equivalent to constructing a <literal>UNION ALL</literal> between
     subqueries with the individual grouping sets as their
-    <literal>GROUP BY</literal> clauses.  For further details on the handling
+    <literal>GROUP BY</literal> clauses.  The optional <literal>DISTINCT</literal>
+    clause removes duplicate sets before processing; it does <emphasis>not</emphasis>
+    transform the <literal>UNION ALL</literal> into a <literal>UNION DISTINCT</literal>.
+    For further details on the handling
     of grouping sets see <xref linkend="queries-grouping-sets"/>.
    </para>
 
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index a24387c1e7..1ab7ef1f86 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -481,7 +481,7 @@ T351	Bracketed SQL comments (/*...*/ comments)			YES
 T431	Extended grouping capabilities			YES	
 T432	Nested and concatenated GROUPING SETS			YES	
 T433	Multiargument GROUPING function			YES	
-T434	GROUP BY DISTINCT			NO	
+T434	GROUP BY DISTINCT			YES
 T441	ABS and MOD functions			YES	
 T461	Symmetric BETWEEN predicate			YES	
 T471	Result sets return value			NO	
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 65bbc18ecb..9be432ea7b 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3113,6 +3113,7 @@ _copyQuery(const Query *from)
 	COPY_NODE_FIELD(onConflict);
 	COPY_NODE_FIELD(returningList);
 	COPY_NODE_FIELD(groupClause);
+	COPY_SCALAR_FIELD(groupDistinct);
 	COPY_NODE_FIELD(groupingSets);
 	COPY_NODE_FIELD(havingQual);
 	COPY_NODE_FIELD(windowClause);
@@ -3199,6 +3200,7 @@ _copySelectStmt(const SelectStmt *from)
 	COPY_NODE_FIELD(fromClause);
 	COPY_NODE_FIELD(whereClause);
 	COPY_NODE_FIELD(groupClause);
+	COPY_SCALAR_FIELD(groupDistinct);
 	COPY_NODE_FIELD(havingClause);
 	COPY_NODE_FIELD(windowClause);
 	COPY_NODE_FIELD(valuesLists);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index c2d73626fc..bc5e9e52fe 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -977,6 +977,7 @@ _equalQuery(const Query *a, const Query *b)
 	COMPARE_NODE_FIELD(onConflict);
 	COMPARE_NODE_FIELD(returningList);
 	COMPARE_NODE_FIELD(groupClause);
+	COMPARE_SCALAR_FIELD(groupDistinct);
 	COMPARE_NODE_FIELD(groupingSets);
 	COMPARE_NODE_FIELD(havingQual);
 	COMPARE_NODE_FIELD(windowClause);
@@ -1053,6 +1054,7 @@ _equalSelectStmt(const SelectStmt *a, const SelectStmt *b)
 	COMPARE_NODE_FIELD(fromClause);
 	COMPARE_NODE_FIELD(whereClause);
 	COMPARE_NODE_FIELD(groupClause);
+	COMPARE_SCALAR_FIELD(groupDistinct);
 	COMPARE_NODE_FIELD(havingClause);
 	COMPARE_NODE_FIELD(windowClause);
 	COMPARE_NODE_FIELD(valuesLists);
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index dbf6b30233..94fb236daf 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1506,6 +1506,22 @@ list_sort(List *list, list_sort_comparator cmp)
 		qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
 }
 
+/*
+ * list_sort comparator for sorting a list into ascending int order.
+ */
+int
+list_int_cmp(const ListCell *p1, const ListCell *p2)
+{
+	int			v1 = lfirst_int(p1);
+	int			v2 = lfirst_int(p2);
+
+	if (v1 < v2)
+		return -1;
+	if (v1 > v2)
+		return 1;
+	return 0;
+}
+
 /*
  * list_sort comparator for sorting a list into ascending OID order.
  */
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index f5dcedf6e8..7c7ef697f9 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2758,6 +2758,7 @@ _outSelectStmt(StringInfo str, const SelectStmt *node)
 	WRITE_NODE_FIELD(fromClause);
 	WRITE_NODE_FIELD(whereClause);
 	WRITE_NODE_FIELD(groupClause);
+	WRITE_BOOL_FIELD(groupDistinct);
 	WRITE_NODE_FIELD(havingClause);
 	WRITE_NODE_FIELD(windowClause);
 	WRITE_NODE_FIELD(valuesLists);
@@ -2983,6 +2984,7 @@ _outQuery(StringInfo str, const Query *node)
 	WRITE_NODE_FIELD(onConflict);
 	WRITE_NODE_FIELD(returningList);
 	WRITE_NODE_FIELD(groupClause);
+	WRITE_BOOL_FIELD(groupDistinct);
 	WRITE_NODE_FIELD(groupingSets);
 	WRITE_NODE_FIELD(havingQual);
 	WRITE_NODE_FIELD(windowClause);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 4388aae71d..0bc9429d67 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -271,6 +271,7 @@ _readQuery(void)
 	READ_NODE_FIELD(onConflict);
 	READ_NODE_FIELD(returningList);
 	READ_NODE_FIELD(groupClause);
+	READ_BOOL_FIELD(groupDistinct);
 	READ_NODE_FIELD(groupingSets);
 	READ_NODE_FIELD(havingQual);
 	READ_NODE_FIELD(windowClause);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index adf68d8790..66bad90f60 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2427,7 +2427,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 	ListCell   *lc_set;
 	grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
 
-	parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+	parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
 
 	gd->any_hashable = false;
 	gd->unhashable_refs = NULL;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 0f3a70c49a..7149724953 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1264,6 +1264,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 											qry->sortClause,
 											EXPR_KIND_GROUP_BY,
 											false /* allow SQL92 rules */ );
+	qry->groupDistinct = stmt->groupDistinct;
 
 	if (stmt->distinctClause == NIL)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index dd72a9fc3c..66e5401684 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -443,7 +443,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	for_locking_item
 %type <list>	for_locking_clause opt_for_locking_clause for_locking_items
 %type <list>	locked_rels_list
-%type <boolean>	all_or_distinct
+%type <boolean>	all_or_distinct distinct_or_all
 
 %type <node>	join_qual
 %type <jtype>	join_type
@@ -11294,7 +11294,8 @@ simple_select:
 					n->intoClause = $4;
 					n->fromClause = $5;
 					n->whereClause = $6;
-					n->groupClause = $7;
+					n->groupClause = linitial($7);
+					n->groupDistinct = (bool) intVal(lsecond($7));
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *)n;
@@ -11309,7 +11310,8 @@ simple_select:
 					n->intoClause = $4;
 					n->fromClause = $5;
 					n->whereClause = $6;
-					n->groupClause = $7;
+					n->groupClause = linitial($7);
+					n->groupDistinct = (bool) intVal(lsecond($7));
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *)n;
@@ -11531,12 +11533,19 @@ opt_table:	TABLE
 			| /*EMPTY*/
 		;
 
+/* The difference between these two is the default */
 all_or_distinct:
 			ALL										{ $$ = true; }
 			| DISTINCT								{ $$ = false; }
 			| /*EMPTY*/								{ $$ = false; }
 		;
 
+distinct_or_all:
+			DISTINCT								{ $$ = true; }
+			| ALL									{ $$ = false; }
+			| /*EMPTY*/								{ $$ = false; }
+		;
+
 /* We use (NIL) as a placeholder to indicate that all target expressions
  * should be placed in the DISTINCT list during parsetree analysis.
  */
@@ -11760,8 +11769,8 @@ first_or_next: FIRST_P								{ $$ = 0; }
  * GroupingSet node of some type.
  */
 group_clause:
-			GROUP_P BY group_by_list				{ $$ = $3; }
-			| /*EMPTY*/								{ $$ = NIL; }
+			GROUP_P BY distinct_or_all group_by_list	{ $$ = list_make2($4, makeInteger((int) $3)); }
+			| /*EMPTY*/									{ $$ = list_make2(NIL, makeInteger((int) false)); }
 		;
 
 group_by_list:
@@ -15134,7 +15143,8 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
 					n->targetList = $2;
 					n->fromClause = $3;
 					n->whereClause = $4;
-					n->groupClause = $5;
+					n->groupClause = linitial($5);
+					n->groupDistinct = (bool) intVal(lsecond($5));
 					n->havingClause = $6;
 					n->windowClause = $7;
 					n->sortClause = $8;
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index fd08b9eeff..899327aaf4 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1071,7 +1071,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 		 * The limit of 4096 is arbitrary and exists simply to avoid resource
 		 * issues from pathological constructs.
 		 */
-		List	   *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+		List	   *gsets = expand_grouping_sets(qry->groupingSets, qry->groupDistinct, 4096);
 
 		if (!gsets)
 			ereport(ERROR,
@@ -1735,6 +1735,33 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
 	return (la > lb) ? 1 : (la == lb) ? 0 : -1;
 }
 
+/* list_sort comparator to sort sub-lists by length and contents */
+static int
+cmp_list_len_contents_asc(const ListCell *a, const ListCell *b)
+{
+	int		res = cmp_list_len_asc(a, b);
+
+	if (res == 0)
+	{
+		List		   *la = (List *) lfirst(a);
+		List		   *lb = (List *) lfirst(b);
+		ListCell	   *lca;
+		ListCell	   *lcb;
+
+		forboth(lca, la, lcb, lb)
+		{
+			int		va = intVal(lca);
+			int		vb = intVal(lcb);
+			if (va > vb)
+				return 1;
+			if (va < vb)
+				return -1;
+		}
+	}
+
+	return res;
+}
+
 /*
  * Expand a groupingSets clause to a flat list of grouping sets.
  * The returned list is sorted by length, shortest sets first.
@@ -1743,7 +1770,7 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
  * some consistency checks.
  */
 List *
-expand_grouping_sets(List *groupingSets, int limit)
+expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit)
 {
 	List	   *expanded_groups = NIL;
 	List	   *result = NIL;
@@ -1801,8 +1828,31 @@ expand_grouping_sets(List *groupingSets, int limit)
 		result = new_result;
 	}
 
-	/* Now sort the lists by length */
-	list_sort(result, cmp_list_len_asc);
+	/* Now sort the lists by length and deduplicate if necessary */
+	if (!groupDistinct || list_length(result) < 2)
+		list_sort(result, cmp_list_len_asc);
+	else
+	{
+		ListCell   *cell;
+		List	   *prev;
+
+		/* Sort each groupset individually */
+		foreach(cell, result)
+			list_sort(lfirst(cell), list_int_cmp);
+
+		/* Now sort the list of groupsets by length and contents */
+		list_sort(result, cmp_list_len_contents_asc);
+
+		/* Finally, remove duplicates */
+		prev = list_nth_node(List, result, 0);
+		for_each_from(cell, result, 1)
+		{
+			if (equal(lfirst(cell), prev))
+				foreach_delete_current(result, cell);
+			else
+				prev = lfirst(cell);
+		}
+	}
 
 	return result;
 }
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 236832a2ca..5af9f6bd9d 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -146,6 +146,7 @@ typedef struct Query
 	List	   *returningList;	/* return-values list (of TargetEntry) */
 
 	List	   *groupClause;	/* a list of SortGroupClause's */
+	bool		groupDistinct;	/* is the group by clause distinct? */
 
 	List	   *groupingSets;	/* a list of GroupingSet's if present */
 
@@ -1629,6 +1630,7 @@ typedef struct SelectStmt
 	List	   *fromClause;		/* the FROM clause */
 	Node	   *whereClause;	/* WHERE qualification */
 	List	   *groupClause;	/* GROUP BY clauses */
+	bool		groupDistinct;	/* Is this GROUP BY DISTINCT? */
 	Node	   *havingClause;	/* HAVING conditional-expression */
 	List	   *windowClause;	/* WINDOW window_name AS (...), ... */
 
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 404e03f132..30f98c4595 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -604,6 +604,7 @@ extern pg_nodiscard List *list_copy_deep(const List *oldlist);
 typedef int (*list_sort_comparator) (const ListCell *a, const ListCell *b);
 extern void list_sort(List *list, list_sort_comparator cmp);
 
+extern int	list_int_cmp(const ListCell *p1, const ListCell *p2);
 extern int	list_oid_cmp(const ListCell *p1, const ListCell *p2);
 
 #endif							/* PG_LIST_H */
diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h
index 0a2546c3ea..4dea01752a 100644
--- a/src/include/parser/parse_agg.h
+++ b/src/include/parser/parse_agg.h
@@ -26,7 +26,7 @@ extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 
 extern void parseCheckAggregates(ParseState *pstate, Query *qry);
 
-extern List *expand_grouping_sets(List *groupingSets, int limit);
+extern List *expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit);
 
 extern int	get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes);
 
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 7c844c6e09..4c467c1b15 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1929,4 +1929,115 @@ set work_mem to default;
 
 drop table gs_group_1;
 drop table gs_hash_1;
+-- GROUP BY DISTINCT
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   | 3
+ 1 |   |  
+ 1 |   |  
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   | 9
+ 7 |   |  
+ 7 |   |  
+ 7 |   |  
+   |   |  
+(25 rows)
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   | 3
+ 1 |   |  
+ 1 |   |  
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   | 9
+ 7 |   |  
+ 7 |   |  
+ 7 |   |  
+   |   |  
+(25 rows)
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   |  
+   |   |  
+(13 rows)
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   |  
+ 4 |   | 6
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   |  
+   |   |  
+(11 rows)
+
 -- end
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index 18ae803e9d..3944944704 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -529,4 +529,30 @@ set work_mem to default;
 drop table gs_group_1;
 drop table gs_hash_1;
 
+-- GROUP BY DISTINCT
+
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
 -- end
-- 
2.25.1

GROUP BY DISTINCT

Reply via email to