From 20282bc40bb59686a11eb8180b9c4d39d224f570 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Tue, 14 Apr 2020 20:21:07 +1200
Subject: [PATCH v5 2/2] Skip DISTINCT / GROUP BY if input is already unique

If we can detect that the input to a DISTINCT is already unique on the
SELECT list, then we can completely skip doing any uniquification work.
Likewise with GROUP BY, however, we can only do this when there are no
aggregate functions.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  28 +-
 src/backend/optimizer/plan/planner.c          |  27 +-
 src/test/regress/expected/aggregates.out      |  65 ++--
 src/test/regress/expected/select_distinct.out | 335 ++++++++++++++++++
 src/test/regress/sql/select_distinct.sql      | 118 ++++++
 5 files changed, 515 insertions(+), 58 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index e941b7e538..88441568b7 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2904,22 +2904,20 @@ select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 f
 -- Outer query is aggregation query
 explain (verbose, costs off)
 select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
-                                                          QUERY PLAN                                                          
-------------------------------------------------------------------------------------------------------------------------------
- Unique
+                                                       QUERY PLAN                                                       
+------------------------------------------------------------------------------------------------------------------------
+ Sort
    Output: ((SubPlan 1))
-   ->  Sort
-         Output: ((SubPlan 1))
-         Sort Key: ((SubPlan 1))
-         ->  Foreign Scan
-               Output: (SubPlan 1)
-               Relations: Aggregate on (public.ft2 t2)
-               Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
-               SubPlan 1
-                 ->  Foreign Scan on public.ft1 t1
-                       Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
-                       Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6))
-(13 rows)
+   Sort Key: ((SubPlan 1))
+   ->  Foreign Scan
+         Output: (SubPlan 1)
+         Relations: Aggregate on (public.ft2 t2)
+         Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
+         SubPlan 1
+           ->  Foreign Scan on public.ft1 t1
+                 Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
+                 Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6))
+(11 rows)
 
 select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
  count 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 6f1d9b6b2f..f5b2794c14 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3815,6 +3815,23 @@ create_grouping_paths(PlannerInfo *root,
 	Query	   *parse = root->parse;
 	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
+	List	*groupExprs = NIL;
+
+	if (root->parse->groupingSets == NIL)
+	{
+		groupExprs  = get_sortgrouplist_exprs(parse->groupClause,
+														parse->targetList);
+		/*
+		 * If the groupby clauses is unique already,  groupping node is not necessary
+		 * if there is no aggreation functions
+		 */
+		if (groupExprs != NIL &&
+			!parse->hasAggs &&
+			!parse->hasWindowFuncs &&
+			parse->havingQual == NULL &&
+			relation_has_uniquekeys_for(root, input_rel, groupExprs))
+			return input_rel;
+	}
 
 	/*
 	 * Create grouping relation to hold fully aggregated grouping and/or
@@ -4741,6 +4758,12 @@ create_distinct_paths(PlannerInfo *root,
 	bool		allow_hash;
 	Path	   *path;
 	ListCell   *lc;
+	List	   *distinctExprs =  get_sortgrouplist_exprs(parse->distinctClause,
+														 parse->targetList);
+
+	/* If the result is unique already, we just return the input_rel directly */
+	if (relation_has_uniquekeys_for(root, input_rel, distinctExprs))
+		return input_rel;
 
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
@@ -4778,10 +4801,6 @@ create_distinct_paths(PlannerInfo *root,
 		/*
 		 * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
 		 */
-		List	   *distinctExprs;
-
-		distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
-												parse->targetList);
 		numDistinctRows = estimate_num_groups(root, distinctExprs,
 											  cheapest_input_path->rows,
 											  NULL);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 7b574efb0c..42bd180895 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -870,14 +870,12 @@ explain (costs off)
   select distinct max(unique2) from tenk1;
                              QUERY PLAN                              
 ---------------------------------------------------------------------
- HashAggregate
-   Group Key: $0
+ Result
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                  Index Cond: (unique2 IS NOT NULL)
-   ->  Result
-(7 rows)
+(5 rows)
 
 select distinct max(unique2) from tenk1;
  max  
@@ -1036,7 +1034,7 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ Result
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1059,10 +1057,7 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+(23 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -1092,12 +1087,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y));
 create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
 -- Non-primary-key columns can be removed from GROUP BY
 explain (costs off) select * from t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 -- No removal can happen if the complete PK is not present in GROUP BY
 explain (costs off) select a,c from t1 group by a,c,d;
@@ -1112,16 +1105,14 @@ explain (costs off) select a,c from t1 group by a,c,d;
 explain (costs off) select t2.*
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.c,t1.d,t2.x,t2.y,t2.z;
-                      QUERY PLAN                      
-------------------------------------------------------
- HashAggregate
-   Group Key: t1.a, t1.c, t1.d, t2.x, t2.y
-   ->  Hash Join
-         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-         ->  Seq Scan on t2
-         ->  Hash
-               ->  Seq Scan on t1
-(7 rows)
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Join
+   Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+   ->  Seq Scan on t2
+   ->  Hash
+         ->  Seq Scan on t1
+(5 rows)
 
 -- Test case where t1 can be optimized but not t2
 explain (costs off) select t1.*,t2.x,t2.z
@@ -1161,12 +1152,10 @@ explain (costs off) select * from t1 group by a,b,c,d;
 
 -- Okay to remove columns if we're only querying the parent.
 explain (costs off) select * from only t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 create temp table p_t1 (
   a int,
@@ -1179,14 +1168,12 @@ create temp table p_t1_1 partition of p_t1 for values in(1);
 create temp table p_t1_2 partition of p_t1 for values in(2);
 -- Ensure we can remove non-PK columns for partitioned tables.
 explain (costs off) select * from p_t1 group by a,b,c,d;
-           QUERY PLAN           
---------------------------------
- HashAggregate
-   Group Key: p_t1.a, p_t1.b
-   ->  Append
-         ->  Seq Scan on p_t1_1
-         ->  Seq Scan on p_t1_2
-(5 rows)
+        QUERY PLAN        
+--------------------------
+ Append
+   ->  Seq Scan on p_t1_1
+   ->  Seq Scan on p_t1_2
+(3 rows)
 
 drop table t1 cascade;
 NOTICE:  drop cascades to table t1c
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 11c6f50fbf..227bc27af4 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -306,3 +306,338 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+CREATE TABLE uqk1(a int, pk int primary key, c int,  d int);
+CREATE TABLE uqk2(a int, pk int primary key, c int,  d int);
+INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3);
+INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5);
+ANALYZE uqk1;
+ANALYZE uqk2;
+-- Test single table
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1;
+    QUERY PLAN    
+------------------
+ Seq Scan on uqk1
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1;
+          QUERY PLAN          
+------------------------------
+ Unique
+   ->  Sort
+         Sort Key: c, d
+         ->  Seq Scan on uqk1
+(4 rows)
+
+CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d);
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1;
+          QUERY PLAN          
+------------------------------
+ Unique
+   ->  Sort
+         Sort Key: c, d
+         ->  Seq Scan on uqk1
+(4 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+              QUERY PLAN               
+---------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: c, d
+         ->  Seq Scan on uqk1
+               Filter: (c IS NOT NULL)
+(5 rows)
+
+ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+        QUERY PLAN         
+---------------------------
+ Seq Scan on uqk1
+   Filter: (c IS NOT NULL)
+(2 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL;
+           QUERY PLAN            
+---------------------------------
+ HashAggregate
+   Group Key: d
+   ->  Seq Scan on uqk1
+         Filter: (c IS NOT NULL)
+(4 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uqk1
+   Filter: (pk = 1)
+(2 rows)
+
+-- Test join
+-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk;
+           QUERY PLAN            
+---------------------------------
+ Hash Join
+   Hash Cond: (uqk1.a = uqk2.pk)
+   ->  Seq Scan on uqk1
+         Filter: (c IS NOT NULL)
+   ->  Hash
+         ->  Seq Scan on uqk2
+(6 rows)
+
+-- Distinct is needed since the outer join
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+order BY 1;
+                 QUERY PLAN                  
+---------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.pk
+         ->  Hash Right Join
+               Hash Cond: (uqk1.a = uqk2.pk)
+               ->  Seq Scan on uqk1
+               ->  Hash
+                     ->  Seq Scan on uqk2
+(8 rows)
+
+SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1;
+ pk 
+----
+  1
+   
+(2 rows)
+
+-- Distinct is not needed since uqk1 is the left table in outer join
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+WHERE uqk1.c is NOT NULL order BY 1, 2;
+           QUERY PLAN            
+---------------------------------
+ Sort
+   Sort Key: uqk1.c, uqk1.d
+   ->  Seq Scan on uqk1
+         Filter: (c IS NOT NULL)
+(4 rows)
+
+SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+WHERE uqk1.c is NOT NULL order BY 1, 2;
+ c | d 
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
+-- Distinct is ok even with NOT clause-list both UNIQUE keys shown in targetlist
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   ->  Seq Scan on uqk1
+   ->  Materialize
+         ->  Seq Scan on uqk2
+(4 rows)
+
+SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6;
+ a | pk | c | d | a | pk | c | d 
+---+----+---+---+---+----+---+---
+ 1 |  1 | 1 | 1 | 1 |  1 | 1 | 1
+ 1 |  1 | 1 | 1 | 4 |  4 | 4 | 4
+ 1 |  1 | 1 | 1 | 5 |  5 | 5 | 5
+ 2 |  2 | 2 | 2 | 1 |  1 | 1 | 1
+ 2 |  2 | 2 | 2 | 4 |  4 | 4 | 4
+ 2 |  2 | 2 | 2 | 5 |  5 | 5 | 5
+ 3 |  3 | 3 | 3 | 1 |  1 | 1 | 1
+ 3 |  3 | 3 | 3 | 4 |  4 | 4 | 4
+ 3 |  3 | 3 | 3 | 5 |  5 | 5 | 5
+(9 rows)
+
+-- Test Semi/Anti JOIN
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2);
+           QUERY PLAN           
+--------------------------------
+ Hash Semi Join
+   Hash Cond: (uqk1.d = uqk2.d)
+   ->  Seq Scan on uqk1
+   ->  Hash
+         ->  Seq Scan on uqk2
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2);
+             QUERY PLAN             
+------------------------------------
+ Seq Scan on uqk1
+   Filter: (NOT (hashed SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on uqk2
+(4 rows)
+
+-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well.
+-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Join Filter: (uqk1.d = uqk2.pk)
+   ->  Seq Scan on uqk1
+         Filter: (pk = 2)
+   ->  Seq Scan on uqk2
+(5 rows)
+
+SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1;
+ d 
+---
+(0 rows)
+
+-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2
+AND uqk2.pk = 1 AND uqk1.d = uqk2.d ;
+            QUERY PLAN            
+----------------------------------
+ Nested Loop
+   Join Filter: (uqk1.d = uqk2.d)
+   ->  Seq Scan on uqk1
+         Filter: (pk = 2)
+   ->  Seq Scan on uqk2
+         Filter: (pk = 1)
+(6 rows)
+
+SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1
+AND uqk1.d = uqk2.d order BY 1;
+ d 
+---
+(0 rows)
+
+-- Both UniqueKey in targetList
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1;
+        QUERY PLAN        
+--------------------------
+ Nested Loop
+   ->  Seq Scan on uqk1
+         Filter: (pk = 2)
+   ->  Seq Scan on uqk2
+         Filter: (pk = 1)
+(5 rows)
+
+SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2;
+ c | c 
+---+---
+ 2 | 1
+(1 row)
+
+-- Test SubQuery
+-- t2(a, c) is UNIQUE key because OF group BY
+EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN
+(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2
+ON (t2.t2b = t1.pk);
+              QUERY PLAN               
+---------------------------------------
+ Hash Join
+   Hash Cond: ((sum(uqk2.pk)) = t1.pk)
+   ->  HashAggregate
+         Group Key: uqk2.a, uqk2.c
+         ->  Seq Scan on uqk2
+   ->  Hash
+         ->  Seq Scan on uqk1 t1
+(7 rows)
+
+-- Test Partition TABLE
+-- Test partitioned TABLE
+CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b);
+CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10);
+CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20);
+-- CREATE unqiue INDEX ON dist_p
+CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c);
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+             QUERY PLAN             
+------------------------------------
+ Append
+   ->  Seq Scan on dist_p0 dist_p_1
+   ->  Seq Scan on dist_p1 dist_p_2
+(3 rows)
+
+DROP INDEX dist_p_uk_b_c;
+-- we also support CREATE unqiue INDEX ON each child tables
+CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c);
+-- NOT ok, since dist_p1 have no such INDEX
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ HashAggregate
+   Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d
+   ->  Append
+         ->  Seq Scan on dist_p0 dist_p_1
+         ->  Seq Scan on dist_p1 dist_p_2
+(5 rows)
+
+CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c);
+-- OK now
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+             QUERY PLAN             
+------------------------------------
+ Append
+   ->  Seq Scan on dist_p0 dist_p_1
+   ->  Seq Scan on dist_p1 dist_p_2
+(3 rows)
+
+DROP INDEX dist_p0_uk_bc;
+DROP INDEX dist_p1_uk_bc;
+-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well.
+CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c);
+CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c);
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ HashAggregate
+   Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d
+   ->  Append
+         ->  Seq Scan on dist_p0 dist_p_1
+         ->  Seq Scan on dist_p1 dist_p_2
+(5 rows)
+
+DROP TABLE dist_p;
+-- Test some VIEW
+CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1;
+        QUERY PLAN         
+---------------------------
+ Seq Scan on uqk1
+   Filter: (c IS NOT NULL)
+(2 rows)
+
+ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1;
+              QUERY PLAN               
+---------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.c, uqk1.d
+         ->  Seq Scan on uqk1
+               Filter: (c IS NOT NULL)
+(5 rows)
+
+-- Test generic plan
+ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL;
+prepare pt as SELECT * FROM distinct_v1;
+EXPLAIN (COSTS OFF)  execute pt;
+        QUERY PLAN         
+---------------------------
+ Seq Scan on uqk1
+   Filter: (c IS NOT NULL)
+(2 rows)
+
+ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL;
+EXPLAIN (COSTS OFF) execute pt;
+              QUERY PLAN               
+---------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.c, uqk1.d
+         ->  Seq Scan on uqk1
+               Filter: (c IS NOT NULL)
+(5 rows)
+
+DEALLOCATE pt;
+DROP VIEW distinct_v1;
+DROP TABLE uqk1;
+DROP TABLE uqk2;
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 33102744eb..72716217c0 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -135,3 +135,121 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+
+
+CREATE TABLE uqk1(a int, pk int primary key, c int,  d int);
+CREATE TABLE uqk2(a int, pk int primary key, c int,  d int);
+INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3);
+INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5);
+ANALYZE uqk1;
+ANALYZE uqk2;
+
+-- Test single table
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1;
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1;
+
+CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d);
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1;
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1;
+
+
+-- Test join
+-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed.
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk;
+
+-- Distinct is needed since the outer join
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+order BY 1;
+
+SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1;
+
+-- Distinct is not needed since uqk1 is the left table in outer join
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+WHERE uqk1.c is NOT NULL order BY 1, 2;
+
+SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk)
+WHERE uqk1.c is NOT NULL order BY 1, 2;
+
+-- Distinct is ok even with NOT clause-list both UNIQUE keys shown in targetlist
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2;
+SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6;
+
+-- Test Semi/Anti JOIN
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2);
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2);
+
+-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well.
+-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk;
+SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1;
+
+-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2
+AND uqk2.pk = 1 AND uqk1.d = uqk2.d ;
+
+SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1
+AND uqk1.d = uqk2.d order BY 1;
+
+-- Both UniqueKey in targetList
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1;
+SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2;
+
+-- Test SubQuery
+-- t2(a, c) is UNIQUE key because OF group BY
+EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN
+(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2
+ON (t2.t2b = t1.pk);
+
+-- Test Partition TABLE
+-- Test partitioned TABLE
+CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b);
+CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10);
+CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20);
+
+-- CREATE unqiue INDEX ON dist_p
+CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c);
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+DROP INDEX dist_p_uk_b_c;
+
+-- we also support CREATE unqiue INDEX ON each child tables
+CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c);
+-- NOT ok, since dist_p1 have no such INDEX
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c);
+-- OK now
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+
+DROP INDEX dist_p0_uk_bc;
+DROP INDEX dist_p1_uk_bc;
+
+-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well.
+CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c);
+CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c);
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p;
+
+DROP TABLE dist_p;
+
+-- Test some VIEW
+CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1;
+ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL;
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1;
+
+-- Test generic plan
+ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL;
+prepare pt as SELECT * FROM distinct_v1;
+EXPLAIN (COSTS OFF)  execute pt;
+ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL;
+EXPLAIN (COSTS OFF) execute pt;
+DEALLOCATE pt;
+
+DROP VIEW distinct_v1;
+DROP TABLE uqk1;
+DROP TABLE uqk2;
-- 
2.25.1

