diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 0be2a73e05b..e9cd8e60f57 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -40,7 +40,7 @@ typedef struct
 } Clump;
 
 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
-			bool force);
+			int num_gene, bool force);
 static bool desirable_join(PlannerInfo *root,
 			   RelOptInfo *outer_rel, RelOptInfo *inner_rel);
 
@@ -196,7 +196,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		cur_clump->size = 1;
 
 		/* Merge it into the clumps list, using only desirable joins */
-		clumps = merge_clump(root, clumps, cur_clump, false);
+		clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
 	}
 
 	if (list_length(clumps) > 1)
@@ -210,7 +210,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		{
 			Clump	   *clump = (Clump *) lfirst(lc);
 
-			fclumps = merge_clump(root, fclumps, clump, true);
+			fclumps = merge_clump(root, fclumps, clump, num_gene, true);
 		}
 		clumps = fclumps;
 	}
@@ -235,7 +235,8 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
  * "desirable" joins.
  */
 static List *
-merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
+merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
+			bool force)
 {
 	ListCell   *prev;
 	ListCell   *lc;
@@ -267,8 +268,13 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 				/* Create paths for partitionwise joins. */
 				generate_partitionwise_join_paths(root, joinrel);
 
-				/* Create GatherPaths for any useful partial paths for rel */
-				generate_gather_paths(root, joinrel, false);
+				/*
+				 * Except for the topmost join rel, consider gathering partial
+				 * paths.  We'll do the same for the topmost join rel once we
+				 * know the final targetlist (see grouping_planner).
+				 */
+				if (old_clump->size + new_clump->size < num_gene)
+					generate_gather_paths(root, joinrel, NULL, false);
 
 				/* Find and save the cheapest paths for this joinrel */
 				set_cheapest(joinrel);
@@ -286,7 +292,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 				 * others.  When no further merge is possible, we'll reinsert
 				 * it into the list.
 				 */
-				return merge_clump(root, clumps, old_clump, force);
+				return merge_clump(root, clumps, old_clump, num_gene, force);
 			}
 		}
 		prev = lc;
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 1c792a00eb2..bf0469a4649 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -481,14 +481,21 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	}
 
 	/*
-	 * If this is a baserel, consider gathering any partial paths we may have
-	 * created for it.  (If we tried to gather inheritance children, we could
+	 * If this is a baserel, we should normally consider gathering any partial
+	 * paths we may have created for it.
+	 *
+	 * However, if this is an inheritance child, skip it.  Otherwise, we could
 	 * end up with a very large number of gather nodes, each trying to grab
-	 * its own pool of workers, so don't do this for otherrels.  Instead,
-	 * we'll consider gathering partial paths for the parent appendrel.)
+	 * its own pool of workers. Instead, we'll consider gathering partial
+	 * paths for the parent appendrel.
+	 *
+	 * Also, if this is the topmost scan rel (that is, the only baserel), we
+	 * postpone this until the final scan targelist is available (see
+	 * grouping_planner).
 	 */
-	if (rel->reloptkind == RELOPT_BASEREL)
-		generate_gather_paths(root, rel, false);
+	if (rel->reloptkind == RELOPT_BASEREL &&
+		bms_membership(root->all_baserels) != BMS_SINGLETON)
+		generate_gather_paths(root, rel, NULL, false);
 
 	/*
 	 * Allow a plugin to editorialize on the set of Paths for this base
@@ -2445,6 +2452,9 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * for the specified relation.  (Otherwise, add_partial_path might delete a
  * path that some GatherPath or GatherMergePath has a reference to.)
  *
+ * It should also not be called until we know what target list we want to
+ * generate.
+ *
  * If we're generating paths for a scan or join relation, override_rows will
  * be false, and we'll just use the relation's size estimate.  When we're
  * being called for a partially-grouped path, though, we need to override
@@ -2453,7 +2463,8 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * we must do something.)
  */
 void
-generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, PathTarget *target,
+					  bool override_rows)
 {
 	Path	   *cheapest_partial_path;
 	Path	   *simple_gather_path;
@@ -2480,6 +2491,14 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
 	simple_gather_path = (Path *)
 		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
 						   NULL, rowsp);
+
+	/* Add projection step if needed */
+	if (target && simple_gather_path->pathtarget != target)
+		simple_gather_path = apply_projection_to_path(root,
+													  rel,
+													  simple_gather_path,
+													  target);
+
 	add_path(rel, simple_gather_path);
 
 	/*
@@ -2489,15 +2508,20 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
 	foreach(lc, rel->partial_pathlist)
 	{
 		Path	   *subpath = (Path *) lfirst(lc);
-		GatherMergePath *path;
+		Path	   *path;
 
 		if (subpath->pathkeys == NIL)
 			continue;
 
 		rows = subpath->rows * subpath->parallel_workers;
-		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
-										subpath->pathkeys, NULL, rowsp);
-		add_path(rel, &path->path);
+		path = (Path *) create_gather_merge_path(root, rel, subpath,
+							rel->reltarget, subpath->pathkeys, NULL, rowsp);
+
+		/* Add projection step if needed */
+		if (target && path->pathtarget != target)
+			path = apply_projection_to_path(root, rel, path, target);
+
+		add_path(rel, path);
 	}
 }
 
@@ -2668,8 +2692,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 			/* Create paths for partitionwise joins. */
 			generate_partitionwise_join_paths(root, rel);
 
-			/* Create GatherPaths for any useful partial paths for rel */
-			generate_gather_paths(root, rel, false);
+			/*
+			 * Except for the topmost join rel, consider gathering partial
+			 * paths.  We'll do the same for the topmost join rel once we know
+			 * the final targetlist (see grouping_planner).
+			 */
+			if (lev < levels_needed)
+				generate_gather_paths(root, rel, NULL, false);
 
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 14b7becf3e8..01a62e2038f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1948,6 +1948,28 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			}
 		}
 
+		/*
+		 * When possible, we want target list evaluation to happen in parallel
+		 * worker processes rather than in the leader.  To facilitate this,
+		 * scan/join planning avoids generating Gather or Gather Merge paths
+		 * for the topmost scan/join relation.  That lets us do it here.
+		 *
+		 * In the past, we used to generate Gather or Gather Merge paths first
+		 * and then modify the target lists of their subpaths after the fact,
+		 * but that wasn't good because at that point it's too late for the
+		 * associated cost savings to affect which plans get chosen.  A plan
+		 * that involves using parallel query for the entire scan/join tree
+		 * may gain a significant advantage as compared with a serial plan if
+		 * target list evaluation is expensive.
+		 */
+		generate_gather_paths(root, current_rel, scanjoin_target, false);
+
+		/*
+		 * Since generate_gather_paths has likely added new paths to
+		 * current_rel, the cheapest path might have changed.
+		 */
+		set_cheapest(current_rel);
+
 		/*
 		 * Upper planning steps which make use of the top scan/join rel's
 		 * partial pathlist will expect partial paths for that rel to produce
@@ -6370,7 +6392,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
 	 * Try adding Gather or Gather Merge to partial paths to produce
 	 * non-partial paths.
 	 */
-	generate_gather_paths(root, partially_grouped_rel, true);
+	generate_gather_paths(root, partially_grouped_rel, NULL, true);
 
 	/* Get cheapest partial path from partially_grouped_rel */
 	cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fe3b4582d42..d2b845cc854 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2454,6 +2454,8 @@ apply_projection_to_path(PlannerInfo *root,
 						 PathTarget *target)
 {
 	QualCost	oldcost;
+	double		nrows;
+	bool		resultPath = false;
 
 	/*
 	 * If given path can't project, we might need a Result node, so make a
@@ -2464,14 +2466,16 @@ apply_projection_to_path(PlannerInfo *root,
 
 	/*
 	 * We can just jam the desired tlist into the existing path, being sure to
-	 * update its cost estimates appropriately.
+	 * update its cost estimates appropriately.  Also, ensure that the cost
+	 * estimates reflects the fact that the target list evaluation will happen
+	 * in workers if path is a Gather or GatherMerge path.
 	 */
 	oldcost = path->pathtarget->cost;
 	path->pathtarget = target;
 
+	nrows = path->rows;
 	path->startup_cost += target->cost.startup - oldcost.startup;
-	path->total_cost += target->cost.startup - oldcost.startup +
-		(target->cost.per_tuple - oldcost.per_tuple) * path->rows;
+	path->total_cost += target->cost.startup - oldcost.startup;
 
 	/*
 	 * If the path happens to be a Gather or GatherMerge path, we'd like to
@@ -2487,10 +2491,6 @@ apply_projection_to_path(PlannerInfo *root,
 		 * projection-capable, so as to avoid modifying the subpath in place.
 		 * It seems unlikely at present that there could be any other
 		 * references to the subpath, but better safe than sorry.
-		 *
-		 * Note that we don't change the parallel path's cost estimates; it
-		 * might be appropriate to do so, to reflect the fact that the bulk of
-		 * the target evaluation will happen in workers.
 		 */
 		if (IsA(path, GatherPath))
 		{
@@ -2501,6 +2501,10 @@ apply_projection_to_path(PlannerInfo *root,
 									   gpath->subpath->parent,
 									   gpath->subpath,
 									   target);
+
+			nrows = gpath->subpath->rows;
+			if (!((ProjectionPath *) gpath->subpath)->dummypp)
+				resultPath = true;
 		}
 		else
 		{
@@ -2511,6 +2515,10 @@ apply_projection_to_path(PlannerInfo *root,
 									   gmpath->subpath->parent,
 									   gmpath->subpath,
 									   target);
+
+			nrows = gmpath->subpath->rows;
+			if (!((ProjectionPath *) gmpath->subpath)->dummypp)
+				resultPath = true;
 		}
 	}
 	else if (path->parallel_safe &&
@@ -2524,6 +2532,20 @@ apply_projection_to_path(PlannerInfo *root,
 		path->parallel_safe = false;
 	}
 
+	/*
+	 * Update the cost estimates based on whether Result node is required. See
+	 * create_projection_path.
+	 */
+	if (resultPath)
+	{
+		Assert (IsA(path, GatherPath) || IsA(path, GatherMergePath));
+		path->total_cost += (cpu_tuple_cost + target->cost.per_tuple) * nrows;
+	}
+	else
+	{
+		path->total_cost += (target->cost.per_tuple - oldcost.per_tuple) * nrows;
+	}
+
 	return path;
 }
 
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 94f9bb2b574..b21e98b4ae2 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -54,7 +54,7 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
 					 List *initial_rels);
 
 extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
-					  bool override_rows);
+						PathTarget *target, bool override_rows);
 extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
 						double index_pages, int max_workers);
 extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 0a782616385..0aff0c94dd1 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -262,6 +262,23 @@ execute tenk1_count(1);
 (1 row)
 
 deallocate tenk1_count;
+-- test that parallel plan gets selected when target list contains costly
+-- function
+create or replace function costly_func(var1 integer) returns integer
+as $$
+begin
+        return var1 + 10;
+end;
+$$ language plpgsql PARALLEL SAFE Cost 100000;
+explain (costs off) select ten, costly_func(ten) from tenk1;
+            QUERY PLAN            
+----------------------------------
+ Gather
+   Workers Planned: 4
+   ->  Parallel Seq Scan on tenk1
+(3 rows)
+
+drop function costly_func(var1 integer);
 -- test parallel plans for queries containing un-correlated subplans.
 alter table tenk2 set (parallel_workers = 0);
 explain (costs off)
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index fa03aae0c03..eef9ace6c9f 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -97,6 +97,17 @@ explain (costs off) execute tenk1_count(1);
 execute tenk1_count(1);
 deallocate tenk1_count;
 
+-- test that parallel plan gets selected when target list contains costly
+-- function
+create or replace function costly_func(var1 integer) returns integer
+as $$
+begin
+        return var1 + 10;
+end;
+$$ language plpgsql PARALLEL SAFE Cost 100000;
+explain (costs off) select ten, costly_func(ten) from tenk1;
+drop function costly_func(var1 integer);
+
 -- test parallel plans for queries containing un-correlated subplans.
 alter table tenk2 set (parallel_workers = 0);
 explain (costs off)
