diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 9053cfd0b9..8666555ca5 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -40,7 +40,7 @@ typedef struct
 } Clump;
 
 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
-			bool force);
+			int num_gene, bool force);
 static bool desirable_join(PlannerInfo *root,
 			   RelOptInfo *outer_rel, RelOptInfo *inner_rel);
 
@@ -196,7 +196,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		cur_clump->size = 1;
 
 		/* Merge it into the clumps list, using only desirable joins */
-		clumps = merge_clump(root, clumps, cur_clump, false);
+		clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
 	}
 
 	if (list_length(clumps) > 1)
@@ -210,7 +210,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		{
 			Clump	   *clump = (Clump *) lfirst(lc);
 
-			fclumps = merge_clump(root, fclumps, clump, true);
+			fclumps = merge_clump(root, fclumps, clump, num_gene, true);
 		}
 		clumps = fclumps;
 	}
@@ -235,7 +235,8 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
  * "desirable" joins.
  */
 static List *
-merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
+merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
+			bool force)
 {
 	ListCell   *prev;
 	ListCell   *lc;
@@ -267,8 +268,14 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 				/* Create paths for partition-wise joins. */
 				generate_partition_wise_join_paths(root, joinrel);
 
-				/* Create GatherPaths for any useful partial paths for rel */
-				generate_gather_paths(root, joinrel);
+				/*
+				 * Create GatherPaths for any useful partial paths for rel
+				 * other than top-level rel.  The gather path for top-level
+				 * rel is generated once path target is available.  See
+				 * grouping_planner.
+				 */
+				if (old_clump->size + new_clump->size < num_gene)
+					generate_gather_paths(root, joinrel, NULL);
 
 				/* Find and save the cheapest paths for this joinrel */
 				set_cheapest(joinrel);
@@ -286,7 +293,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 				 * others.  When no further merge is possible, we'll reinsert
 				 * it into the list.
 				 */
-				return merge_clump(root, clumps, old_clump, force);
+				return merge_clump(root, clumps, old_clump, num_gene, force);
 			}
 		}
 		prev = lc;
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index fd1a58336b..140c4f121d 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -481,14 +481,19 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	}
 
 	/*
-	 * If this is a baserel, consider gathering any partial paths we may have
-	 * created for it.  (If we tried to gather inheritance children, we could
-	 * end up with a very large number of gather nodes, each trying to grab
-	 * its own pool of workers, so don't do this for otherrels.  Instead,
-	 * we'll consider gathering partial paths for the parent appendrel.)
+	 * If this is a baserel and not the top-level rel, consider gathering any
+	 * partial paths we may have created for it.  (If we tried to gather
+	 * inheritance children, we could end up with a very large number of
+	 * gather nodes, each trying to grab its own pool of workers, so don't do
+	 * this for otherrels.  Instead, we'll consider gathering partial paths
+	 * for the parent appendrel.).  We can check for joins by counting the
+	 * membership of all_baserels (note that this correctly counts inheritance
+	 * trees as single rels).  The gather path for top-level rel is generated
+	 * once path target is available.  See grouping_planner.
 	 */
-	if (rel->reloptkind == RELOPT_BASEREL)
-		generate_gather_paths(root, rel);
+	if (rel->reloptkind == RELOPT_BASEREL &&
+		bms_membership(root->all_baserels) != BMS_SINGLETON)
+		generate_gather_paths(root, rel, NULL);
 
 	/*
 	 * Allow a plugin to editorialize on the set of Paths for this base
@@ -2441,11 +2446,12 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  *		Gather Merge on top of a partial path.
  *
  * This must not be called until after we're done creating all partial paths
- * for the specified relation.  (Otherwise, add_partial_path might delete a
- * path that some GatherPath or GatherMergePath has a reference to.)
+ * for the specified relation (Otherwise, add_partial_path might delete a
+ * path that some GatherPath or GatherMergePath has a reference to.) and path
+ * target for top level scan/join node is available.
  */
 void
-generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, PathTarget *target)
 {
 	Path	   *cheapest_partial_path;
 	Path	   *simple_gather_path;
@@ -2455,6 +2461,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	if (rel->partial_pathlist == NIL)
 		return;
 
+	if (target == NULL)
+		target = rel->reltarget;
+
 	/*
 	 * The output of Gather is always unsorted, so there's only one partial
 	 * path of interest: the cheapest one.  That will be the one at the front
@@ -2462,7 +2471,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	 */
 	cheapest_partial_path = linitial(rel->partial_pathlist);
 	simple_gather_path = (Path *)
-		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
+		create_gather_path(root, rel, cheapest_partial_path, target,
 						   NULL, NULL);
 	add_path(rel, simple_gather_path);
 
@@ -2478,7 +2487,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 		if (subpath->pathkeys == NIL)
 			continue;
 
-		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
+		path = create_gather_merge_path(root, rel, subpath, target,
 										subpath->pathkeys, NULL, NULL);
 		add_path(rel, &path->path);
 	}
@@ -2651,8 +2660,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 			/* Create paths for partition-wise joins. */
 			generate_partition_wise_join_paths(root, rel);
 
-			/* Create GatherPaths for any useful partial paths for rel */
-			generate_gather_paths(root, rel);
+			/*
+			 * Create GatherPaths for any useful partial paths for rel other
+			 * than top-level rel.  The gather path for top-level rel is
+			 * generated once path target is available.  See grouping_planner.
+			 */
+			if (lev < levels_needed)
+				generate_gather_paths(root, rel, NULL);
 
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(rel);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8679b14b29..3c4d6a44be 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -374,6 +374,14 @@ cost_gather(GatherPath *path, PlannerInfo *root,
 	startup_cost += parallel_setup_cost;
 	run_cost += parallel_tuple_cost * path->path.rows;
 
+	/* add tlist eval costs only if projecting */
+	if (path->path.pathtarget != path->subpath->pathtarget)
+	{
+		/* tlist eval costs are paid per output row, not per tuple scanned */
+		startup_cost += path->path.pathtarget->cost.startup;
+		run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+	}
+
 	path->path.startup_cost = startup_cost;
 	path->path.total_cost = (startup_cost + run_cost);
 }
@@ -441,6 +449,14 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
 	startup_cost += parallel_setup_cost;
 	run_cost += parallel_tuple_cost * path->path.rows * 1.05;
 
+	/* add tlist eval costs only if projecting */
+	if (path->path.pathtarget != path->subpath->pathtarget)
+	{
+		/* tlist eval costs are paid per output row, not per tuple scanned */
+		startup_cost += path->path.pathtarget->cost.startup;
+		run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+	}
+
 	path->path.startup_cost = startup_cost + input_startup_cost;
 	path->path.total_cost = (startup_cost + run_cost + input_total_cost);
 }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 2a4e22b6c8..d66364e718 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1918,46 +1918,78 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		}
 
 		/*
-		 * Upper planning steps which make use of the top scan/join rel's
-		 * partial pathlist will expect partial paths for that rel to produce
-		 * the same output as complete paths ... and we just changed the
-		 * output for the complete paths, so we'll need to do the same thing
-		 * for partial paths.  But only parallel-safe expressions can be
-		 * computed by partial paths.
+		 * When possible, we want target list evaluation to happen in parallel
+		 * worker processes rather than in the leader.  To facilitate this,
+		 * scan/join planning avoids generating Gather or Gather Merge paths
+		 * for the topmost scan/join relation.  That lets us do it here,
+		 * possibly after adjusting the target lists of the partial paths.
+		 *
+		 * In the past, we used to generate Gather or Gather Merge paths first
+		 * and then modify the target lists of their subpaths after the fact,
+		 * but that wasn't good because at that point it's too late for the
+		 * associated cost savings to affect which plans get chosen.  A plan
+		 * that involves using parallel query for the entire scan/join tree
+		 * may gain a significant advantage as compared with a serial plan if
+		 * target list evaluation is expensive.
 		 */
-		if (current_rel->partial_pathlist &&
-			is_parallel_safe(root, (Node *) scanjoin_target->exprs))
+		if (current_rel->partial_pathlist != NIL)
 		{
-			/* Apply the scan/join target to each partial path */
-			foreach(lc, current_rel->partial_pathlist)
+			bool		scanjoin_target_parallel_safe = false;
+
+			/*
+			 * If scanjoin_target is parallel-safe, apply it to all partial
+			 * paths, just like we already did for non-partial paths.
+			 */
+			if (is_parallel_safe(root, (Node *) scanjoin_target->exprs))
 			{
-				Path	   *subpath = (Path *) lfirst(lc);
-				Path	   *newpath;
+				/* Remember that the target list is parallel safe. */
+				scanjoin_target_parallel_safe = true;
 
-				/* Shouldn't have any parameterized paths anymore */
-				Assert(subpath->param_info == NULL);
+				/* Apply the scan/join target to each partial path */
+				foreach(lc, current_rel->partial_pathlist)
+				{
+					Path	   *subpath = (Path *) lfirst(lc);
+					Path	   *newpath;
 
-				/*
-				 * Don't use apply_projection_to_path() here, because there
-				 * could be other pointers to these paths, and therefore we
-				 * mustn't modify them in place.
-				 */
-				newpath = (Path *) create_projection_path(root,
-														  current_rel,
-														  subpath,
-														  scanjoin_target);
-				lfirst(lc) = newpath;
+					/* Shouldn't have any parameterized paths anymore */
+					Assert(subpath->param_info == NULL);
+
+					/*
+					 * Don't use apply_projection_to_path() here, because
+					 * there could be other pointers to these paths, and
+					 * therefore we mustn't modify them in place.
+					 */
+					newpath = (Path *) create_projection_path(root,
+															  current_rel,
+															  subpath,
+															  scanjoin_target);
+					lfirst(lc) = newpath;
+				}
 			}
-		}
-		else
-		{
+
+			/*
+			 * Try building Gather or Gather Merge paths.  We can do this even
+			 * if scanjoin_target isn't parallel-safe; for such queries,
+			 * Gather or Gather Merge will perform projection.  However, we
+			 * must be sure that the paths we generate produce
+			 * scanjoin_target, because the paths already in
+			 * current_rel->pathlist have already been adjusted to do so.
+			 */
+			generate_gather_paths(root, current_rel, scanjoin_target);
+
 			/*
-			 * In the unfortunate event that scanjoin_target is not
-			 * parallel-safe, we can't apply it to the partial paths; in that
-			 * case, we'll need to forget about the partial paths, which
-			 * aren't valid input for upper planning steps.
+			 * If scanjoin_target isn't parallel-safe, the partial paths for
+			 * this relation haven't been adjusted to generate it, which means
+			 * they can't safely be used for upper planning steps.
 			 */
-			current_rel->partial_pathlist = NIL;
+			if (!scanjoin_target_parallel_safe)
+				current_rel->partial_pathlist = NIL;
+
+			/*
+			 * Since generate_gather_paths has likely added new paths to
+			 * current_rel, the cheapest path might have changed.
+			 */
+			set_cheapest(current_rel);
 		}
 
 		/* Now fix things up if scan/join target contains SRFs */
@@ -4700,8 +4732,21 @@ create_ordered_paths(PlannerInfo *root,
 											 ordered_rel,
 											 cheapest_partial_path,
 											 root->sort_pathkeys,
+
 											 limit_tuples);
 
+			/*
+			 * If projection is required, and it's safe to to do it before
+			 * Gather Merge, then do so.
+			 */
+			if (path->pathtarget != target &&
+				is_parallel_safe(root, (Node *) target->exprs))
+				path = (Path *)
+					create_projection_path(root,
+										   ordered_rel,
+										   path,
+										   target);
+
 			total_groups = cheapest_partial_path->rows *
 				cheapest_partial_path->parallel_workers;
 			path = (Path *)
@@ -4711,7 +4756,10 @@ create_ordered_paths(PlannerInfo *root,
 										 root->sort_pathkeys, NULL,
 										 &total_groups);
 
-			/* Add projection step if needed */
+			/*
+			 * If projection is required and we didn't do it before Gather
+			 * Merge, do it now.
+			 */
 			if (path->pathtarget != target)
 				path = apply_projection_to_path(root, ordered_rel,
 												path, target);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fe3b4582d4..26fe5ca6a8 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2435,10 +2435,6 @@ create_projection_path(PlannerInfo *root,
  * knows that the given path isn't referenced elsewhere and so can be modified
  * in-place.
  *
- * If the input path is a GatherPath or GatherMergePath, we try to push the
- * new target down to its input as well; this is a yet more invasive
- * modification of the input path, which create_projection_path() can't do.
- *
  * Note that we mustn't change the source path's parent link; so when it is
  * add_path'd to "rel" things will be a bit inconsistent.  So far that has
  * not caused any trouble.
@@ -2473,57 +2469,6 @@ apply_projection_to_path(PlannerInfo *root,
 	path->total_cost += target->cost.startup - oldcost.startup +
 		(target->cost.per_tuple - oldcost.per_tuple) * path->rows;
 
-	/*
-	 * If the path happens to be a Gather or GatherMerge path, we'd like to
-	 * arrange for the subpath to return the required target list so that
-	 * workers can help project.  But if there is something that is not
-	 * parallel-safe in the target expressions, then we can't.
-	 */
-	if ((IsA(path, GatherPath) ||IsA(path, GatherMergePath)) &&
-		is_parallel_safe(root, (Node *) target->exprs))
-	{
-		/*
-		 * We always use create_projection_path here, even if the subpath is
-		 * projection-capable, so as to avoid modifying the subpath in place.
-		 * It seems unlikely at present that there could be any other
-		 * references to the subpath, but better safe than sorry.
-		 *
-		 * Note that we don't change the parallel path's cost estimates; it
-		 * might be appropriate to do so, to reflect the fact that the bulk of
-		 * the target evaluation will happen in workers.
-		 */
-		if (IsA(path, GatherPath))
-		{
-			GatherPath *gpath = (GatherPath *) path;
-
-			gpath->subpath = (Path *)
-				create_projection_path(root,
-									   gpath->subpath->parent,
-									   gpath->subpath,
-									   target);
-		}
-		else
-		{
-			GatherMergePath *gmpath = (GatherMergePath *) path;
-
-			gmpath->subpath = (Path *)
-				create_projection_path(root,
-									   gmpath->subpath->parent,
-									   gmpath->subpath,
-									   target);
-		}
-	}
-	else if (path->parallel_safe &&
-			 !is_parallel_safe(root, (Node *) target->exprs))
-	{
-		/*
-		 * We're inserting a parallel-restricted target list into a path
-		 * currently marked parallel-safe, so we have to mark it as no longer
-		 * safe.
-		 */
-		path->parallel_safe = false;
-	}
-
 	return path;
 }
 
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0072b7aa0d..4afbcbe13e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -53,7 +53,8 @@ extern void set_dummy_rel_pathlist(RelOptInfo *rel);
 extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
 					 List *initial_rels);
 
-extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
+					  PathTarget *target);
 extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
 						double index_pages);
 extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 452494fbfa..e5a91bd200 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -251,6 +251,23 @@ execute tenk1_count(1);
 (1 row)
 
 deallocate tenk1_count;
+-- test that parallel plan gets selected when target list contains costly
+-- function
+create or replace function costly_func(var1 integer) returns integer
+as $$
+begin
+        return var1 + 10;
+end;
+$$ language plpgsql PARALLEL SAFE Cost 100000;
+explain (costs off) select ten, costly_func(ten) from tenk1;
+            QUERY PLAN            
+----------------------------------
+ Gather
+   Workers Planned: 4
+   ->  Parallel Seq Scan on tenk1
+(3 rows)
+
+drop function costly_func(var1 integer);
 -- test parallel plans for queries containing un-correlated subplans.
 alter table tenk2 set (parallel_workers = 0);
 explain (costs off)
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index b12ba0b74a..0f95b63c23 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -91,6 +91,17 @@ explain (costs off) execute tenk1_count(1);
 execute tenk1_count(1);
 deallocate tenk1_count;
 
+-- test that parallel plan gets selected when target list contains costly
+-- function
+create or replace function costly_func(var1 integer) returns integer
+as $$
+begin
+        return var1 + 10;
+end;
+$$ language plpgsql PARALLEL SAFE Cost 100000;
+explain (costs off) select ten, costly_func(ten) from tenk1;
+drop function costly_func(var1 integer);
+
 -- test parallel plans for queries containing un-correlated subplans.
 alter table tenk2 set (parallel_workers = 0);
 explain (costs off)
