From 2db5d944c7f353c4d1e618ff27ca7eaeb1f9244f Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 22 Oct 2025 12:25:08 +0900
Subject: [PATCH v2] Fix distinctness check for queries with grouping sets

query_is_distinct_for() is intended to determine whether a query never
returns duplicates of the specified columns.  For queries using
grouping sets, if there are no grouping expressions, the query may
contain one or more empty grouping sets.  The goal is to detect
whether there is exactly one empty grouping set, in which case the
query would return a single row and thus be distinct.

The previous logic in query_is_distinct_for() was incomplete.  It
failed to consider cases where the DISTINCT clause is used on the
GROUP BY, in which case duplicate empty grouping sets are removed,
leaving only one.  It also did not correctly handle all possible
structures of GroupingSet nodes that represent a single empty grouping
set.

To fix, add a check for the groupDistinct flag, and expand the query's
groupingSets tree into a flat list, then verify that the expanded list
contains only one element.

No backpatch as this could result in plan changes.

Author: Richard Guo <guofenglinux@gmail.com>
Discussion: https://postgr.es/m/CAMbWs480Z04NtP8-O55uROq2Zego309+h3hhaZhz6ztmgWLEBw@mail.gmail.com
---
 src/backend/optimizer/plan/analyzejoins.c | 19 ++++--
 src/test/regress/expected/join.out        | 83 +++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 32 +++++++++
 3 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 6a3c030e8ef..de7cb59e5c2 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -31,6 +31,7 @@
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
+#include "parser/parse_agg.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"
 
@@ -1175,6 +1176,8 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	}
 	else if (query->groupingSets)
 	{
+		List	   *gsets;
+
 		/*
 		 * If we have grouping sets with expressions, we probably don't have
 		 * uniqueness and analysis would be hard. Punt.
@@ -1184,15 +1187,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 
 		/*
 		 * If we have no groupClause (therefore no grouping expressions), we
-		 * might have one or many empty grouping sets. If there's just one,
-		 * then we're returning only one row and are certainly unique. But
-		 * otherwise, we know we're certainly not unique.
+		 * might have one or many empty grouping sets. If there's just one, or
+		 * if the DISTINCT clause is used on the GROUP BY, then we're
+		 * returning only one row and are certainly unique. But otherwise, we
+		 * know we're certainly not unique.
 		 */
-		if (list_length(query->groupingSets) == 1 &&
-			((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
+		if (query->groupDistinct)
 			return true;
-		else
-			return false;
+
+		gsets = expand_grouping_sets(query->groupingSets, false, -1);
+
+		return (list_length(gsets) == 1);
 	}
 	else
 	{
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d10095de70f..fafe132af3d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6134,6 +6134,40 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
  Seq Scan on d
 (1 row)
 
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by GROUPING SETS
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by ()) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
+  on d.a = s.x;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
@@ -6162,6 +6196,55 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
          ->  Seq Scan on d
 (8 rows)
 
+-- join removal is not possible when the GROUP BY contains non-empty grouping
+-- sets or multiple empty grouping sets
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by rollup(x)) s
+  on d.a = s.x;
+           QUERY PLAN            
+---------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  MixedAggregate
+               Hash Key: 1
+               Group Key: ()
+               ->  Seq Scan on b
+(8 rows)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
+  on d.a = s.x;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  Append
+               ->  Result
+                     Replaces: Aggregate
+               ->  Result
+                     Replaces: Aggregate
+(9 rows)
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
+  on d.a = s.x;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = (1))
+   ->  Seq Scan on d
+   ->  Hash
+         ->  Append
+               ->  Result
+                     Replaces: Aggregate
+               ->  Result
+                     Replaces: Aggregate
+(9 rows)
+
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index b1732453e8d..40d901a5307 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2212,6 +2212,24 @@ explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id and d.b = s.c_id;
 
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by GROUPING SETS
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by ()) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
+  on d.a = s.x;
+
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
@@ -2225,6 +2243,20 @@ explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
 
+-- join removal is not possible when the GROUP BY contains non-empty grouping
+-- sets or multiple empty grouping sets
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by rollup(x)) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
+  on d.a = s.x;
+
+explain (costs off)
+select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
+  on d.a = s.x;
+
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
-- 
2.39.5 (Apple Git-154)