diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 85805ff5c70..927b56db837 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -143,6 +143,9 @@ static double preprocess_limit(PlannerInfo *root,
 				 int64 *offset_est, int64 *count_est);
 static bool limit_needed(Query *parse);
 static void remove_useless_groupby_columns(PlannerInfo *root);
+static void remove_useless_distinct_columns(PlannerInfo *root);
+static List *remove_functionally_dependent_clauses(PlannerInfo *root,
+									  List *clauselist);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
@@ -919,6 +922,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	/* Remove any redundant GROUP BY columns */
 	remove_useless_groupby_columns(root);
 
+	/* Likewise for redundant DISTINCT columns */
+	remove_useless_distinct_columns(root);
+
 	/*
 	 * If we have any outer joins, try to reduce them to plain inner joins.
 	 * This step is most easily done after we've done expression
@@ -2929,27 +2935,70 @@ static void
 remove_useless_groupby_columns(PlannerInfo *root)
 {
 	Query	   *parse = root->parse;
-	Bitmapset **groupbyattnos;
-	Bitmapset **surplusvars;
-	ListCell   *lc;
-	int			relid;
-
-	/* No chance to do anything if there are less than two GROUP BY items */
-	if (list_length(parse->groupClause) < 2)
-		return;
 
 	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
 	if (parse->groupingSets)
 		return;
 
+	parse->groupClause = remove_functionally_dependent_clauses(root,
+														parse->groupClause);
+}
+
+/*
+ * remove_useless_distinct_columns
+ *		Similar to remove_useless_groupby_columns but for the DISTINCT clause
+ */
+static void
+remove_useless_distinct_columns(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+
+	/*
+	 * We also don't bother trying to remove anything from a distinctOn
+	 * clause.  For this case, the distinctClauses are closely entwined with
+	 * the ORDER BY clause, so we'd better not meddle with them, although,
+	 * perhaps we could change the ORDER BY clause too, but let's leave that
+	 * for another day. DISTINCT ON is probably not worth going to too much
+	 * trouble over.
+	 */
+	if (parse->hasDistinctOn)
+		return;
+
+	parse->distinctClause = remove_functionally_dependent_clauses(root,
+													parse->distinctClause);
+}
+
+/*
+ * remove_functionally_dependent_clauses
+ *		Processes clauselist and removes any items which are deemed to be
+ *		functionally dependent on other clauselist items.
+ *
+ * If any item from the list can be removed, then a new list is built which
+ * does not contain the removed items.  If no item can be removed then the
+ * original list is returned.
+ */
+static List *
+remove_functionally_dependent_clauses(PlannerInfo *root,
+									  List *clauselist)
+{
+	Query	   *parse = root->parse;
+	Bitmapset **clauseattnos;
+	Bitmapset **surplusvars;
+	ListCell   *lc;
+	int			relid;
+
+	/* No chance of removing anything if there are fewer than two items */
+	if (list_length(clauselist) < 2)
+		return clauselist;
+
 	/*
-	 * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
-	 * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
-	 * that are GROUP BY items.
+	 * Scan the clauselist to find items that are simple Vars. Fill
+	 * clauseattnos[k] with a bitmapset of the column attnos of RTE k that are
+	 * in the clauselist.
 	 */
-	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
+	clauseattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
 										   (list_length(parse->rtable) + 1));
-	foreach(lc, parse->groupClause)
+	foreach(lc, clauselist)
 	{
 		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2959,9 +3008,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
 		 * Ignore non-Vars and Vars from other query levels.
 		 *
 		 * XXX in principle, stable expressions containing Vars could also be
-		 * removed, if all the Vars are functionally dependent on other GROUP
-		 * BY items.  But it's not clear that such cases occur often enough to
-		 * be worth troubling over.
+		 * removed, if all the Vars are functionally dependent on other items
+		 * in the clauselist.  But it's not clear that such cases occur often
+		 * enough to be worth troubling over.
 		 */
 		if (!IsA(var, Var) ||
 			var->varlevelsup > 0)
@@ -2970,15 +3019,16 @@ remove_useless_groupby_columns(PlannerInfo *root)
 		/* OK, remember we have this Var */
 		relid = var->varno;
 		Assert(relid <= list_length(parse->rtable));
-		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
-											  var->varattno - FirstLowInvalidHeapAttributeNumber);
+		clauseattnos[relid] = bms_add_member(clauseattnos[relid],
+											 var->varattno - FirstLowInvalidHeapAttributeNumber);
 	}
 
 	/*
 	 * Consider each relation and see if it is possible to remove some of its
-	 * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
-	 * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
-	 * of the column attnos of RTE k that are removable GROUP BY items.
+	 * Vars from the clauselist.  For simplicity and speed, we do the actual
+	 * removal in a separate pass.  Here, we just fill surplusvars[k] with a
+	 * bitmapset of the column attnos of RTE k that are removable clauselist
+	 * items.
 	 */
 	surplusvars = NULL;			/* don't allocate array unless required */
 	relid = 0;
@@ -2995,8 +3045,8 @@ remove_useless_groupby_columns(PlannerInfo *root)
 		if (rte->rtekind != RTE_RELATION)
 			continue;
 
-		/* Nothing to do unless this rel has multiple Vars in GROUP BY */
-		relattnos = groupbyattnos[relid];
+		/* Nothing to do unless this rel has multiple Vars in clauselist */
+		relattnos = clauseattnos[relid];
 		if (bms_membership(relattnos) != BMS_MULTIPLE)
 			continue;
 
@@ -3010,7 +3060,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 
 		/*
 		 * If the primary key is a proper subset of relattnos then we have
-		 * some items in the GROUP BY that can be removed.
+		 * some items in the clauselist that can be removed.
 		 */
 		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
 		{
@@ -3032,15 +3082,15 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	}
 
 	/*
-	 * If we found any surplus Vars, build a new GROUP BY clause without them.
+	 * If we found any surplus Vars, build a new clause list without them.
 	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
 	 * markings, but that's harmless.)
 	 */
 	if (surplusvars != NULL)
 	{
-		List	   *new_groupby = NIL;
+		List	   *new_clauselist = NIL;
 
-		foreach(lc, parse->groupClause)
+		foreach(lc, clauselist)
 		{
 			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -3054,11 +3104,14 @@ remove_useless_groupby_columns(PlannerInfo *root)
 				var->varlevelsup > 0 ||
 				!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
 							   surplusvars[var->varno]))
-				new_groupby = lappend(new_groupby, sgc);
+				new_clauselist = lappend(new_clauselist, sgc);
 		}
 
-		parse->groupClause = new_groupby;
+		return new_clauselist;
 	}
+
+	/* nothing to change, just return the old list */
+	return clauselist;
 }
 
 /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 92082b3a7a2..4ccce48cf86 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -151,7 +151,8 @@ typedef struct Query
 
 	List	   *windowClause;	/* a list of WindowClause's */
 
-	List	   *distinctClause; /* a list of SortGroupClause's */
+	List	   *distinctClause; /* a list of SortGroupClause's. Not equivalent
+								 * to non-junk targetList items. */
 
 	List	   *sortClause;		/* a list of SortGroupClause's */
 
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e9138504..2976c4b7fe9 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1017,6 +1017,65 @@ explain (costs off) select * from t3 group by a,b,c;
    ->  Seq Scan on t3
 (3 rows)
 
+--
+-- Test removal of redundant DISTINCT columns
+--
+-- Non-primary-key columns can be removed from DISTINCT clause
+explain (costs off) select distinct a,b,c,d from t1;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- No removal can happen if the complete PK is not present in DISTINCT clause
+explain (costs off) select distinct a,c,d from t1;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, c, d
+   ->  Seq Scan on t1
+(3 rows)
+
+-- Test removal across multiple relations
+explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z
+from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+                      QUERY PLAN                      
+------------------------------------------------------
+ HashAggregate
+   Group Key: t1.a, t1.b, t2.x, t2.y
+   ->  Hash Join
+         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+         ->  Seq Scan on t2
+         ->  Hash
+               ->  Seq Scan on t1
+(7 rows)
+
+-- Test case where t1 can be optimized but not t2
+explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z
+from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+                      QUERY PLAN                      
+------------------------------------------------------
+ HashAggregate
+   Group Key: t1.a, t1.b, t2.x, t2.z
+   ->  Hash Join
+         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+         ->  Seq Scan on t2
+         ->  Hash
+               ->  Seq Scan on t1
+(7 rows)
+
+-- Ensure we don't remove DISTINCT ON items
+explain (costs off) select distinct on (a,b,c) d from t1 order by a,b,c,d;
+          QUERY PLAN          
+------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a, b, c, d
+         ->  Seq Scan on t1
+(4 rows)
+
 drop table t1;
 drop table t2;
 drop table t3;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 84c6e9b5a40..f6ad81f952b 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4130,18 +4130,17 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
-   ->  Sort
-         Sort Key: d.a
-         ->  Seq Scan on d
-(9 rows)
+              QUERY PLAN               
+---------------------------------------
+ Hash Left Join
+   Hash Cond: (d.a = s.id)
+   ->  Seq Scan on d
+   ->  Hash
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id
+                     ->  Seq Scan on b
+(8 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d79..1ee26ca7f39 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -362,6 +362,26 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
 -- Cannot optimize when PK is deferrable
 explain (costs off) select * from t3 group by a,b,c;
 
+--
+-- Test removal of redundant DISTINCT columns
+--
+-- Non-primary-key columns can be removed from DISTINCT clause
+explain (costs off) select distinct a,b,c,d from t1;
+
+-- No removal can happen if the complete PK is not present in DISTINCT clause
+explain (costs off) select distinct a,c,d from t1;
+
+-- Test removal across multiple relations
+explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z
+from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+
+-- Test case where t1 can be optimized but not t2
+explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z
+from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+
+-- Ensure we don't remove DISTINCT ON items
+explain (costs off) select distinct on (a,b,c) d from t1 order by a,b,c,d;
+
 drop table t1;
 drop table t2;
 drop table t3;