From 50303bdd7f316c4247efaaa510071681185893df Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Fri, 26 Oct 2018 09:18:09 +1300
Subject: [PATCH v9] Allow Append to be used in place of MergeAppend for some
 cases

For RANGE partitioned tables with no default partition the subpaths of a
MergeAppend are always arranged in range order. This means that
MergeAppend, when sorting by the partition key or a superset of the
partition key, will always output tuples from earlier subpaths before later
subpaths.  LIST partitioned tables provide the same guarantee if they also
don't have a default partition, providing that none of the partitions are
defined to allow Datums with values which are interleaved with other
partitions.  For simplicity and speed of checking we currently just
disallow the optimization if any partition allows more than one Datum.
We may want to expand this later, but for now, it's a very cheap check to
implement.  A more thorough check would require performing analysis on the
partition bound.
---
 src/backend/nodes/outfuncs.c                  |   1 +
 src/backend/optimizer/path/allpaths.c         | 211 +++++++++++++++++++++-----
 src/backend/optimizer/path/costsize.c         |  51 ++++++-
 src/backend/optimizer/path/joinrels.c         |   2 +-
 src/backend/optimizer/path/pathkeys.c         |  59 +++++++
 src/backend/optimizer/plan/createplan.c       |  90 ++++++++---
 src/backend/optimizer/plan/planner.c          |   6 +-
 src/backend/optimizer/prep/prepunion.c        |   6 +-
 src/backend/optimizer/util/pathnode.c         |  23 ++-
 src/backend/partitioning/partbounds.c         |   4 +
 src/backend/partitioning/partprune.c          |  59 +++++++
 src/include/nodes/pathnodes.h                 |   1 +
 src/include/optimizer/cost.h                  |   2 +-
 src/include/optimizer/pathnode.h              |   2 +-
 src/include/optimizer/paths.h                 |   2 +
 src/include/partitioning/partprune.h          |   2 +
 src/test/regress/expected/inherit.out         | 211 +++++++++++++++++++++++++-
 src/test/regress/expected/partition_prune.out |  64 ++++----
 src/test/regress/sql/inherit.sql              |  93 ++++++++++++
 src/test/regress/sql/partition_prune.sql      |  10 +-
 20 files changed, 773 insertions(+), 126 deletions(-)

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 65302fe65b..d94944e62f 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1838,6 +1838,7 @@ _outAppendPath(StringInfo str, const AppendPath *node)
 	WRITE_NODE_FIELD(partitioned_rels);
 	WRITE_NODE_FIELD(subpaths);
 	WRITE_INT_FIELD(first_partial_path);
+	WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
 }
 
 static void
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 0debac75c6..a1e592b1f8 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -96,15 +96,16 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 					Index rti, RangeTblEntry *rte);
 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 						Index rti, RangeTblEntry *rte);
-static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
-						   List *live_childrels,
-						   List *all_child_pathkeys,
-						   List *partitioned_rels);
+static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+							 List *live_childrels,
+							 List *all_child_pathkeys,
+							 List *partitioned_rels);
 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
 									  RelOptInfo *rel,
 									  Relids required_outer);
 static void accumulate_append_subpath(Path *path,
 						  List **subpaths, List **special_subpaths);
+static Path *get_singleton_append_subpath(Path *path);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					  Index rti, RangeTblEntry *rte);
 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -1550,7 +1551,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	 */
 	if (subpaths_valid)
 		add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
-												  NULL, 0, false,
+												  NIL, NULL, 0, false,
 												  partitioned_rels, -1));
 
 	/*
@@ -1592,7 +1593,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 		/* Generate a partial append path. */
 		appendpath = create_append_path(root, rel, NIL, partial_subpaths,
-										NULL, parallel_workers,
+										NIL, NULL, parallel_workers,
 										enable_parallel_append,
 										partitioned_rels, -1);
 
@@ -1642,19 +1643,19 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 		appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
 										pa_partial_subpaths,
-										NULL, parallel_workers, true,
+										NIL, NULL, parallel_workers, true,
 										partitioned_rels, partial_rows);
 		add_partial_path(rel, (Path *) appendpath);
 	}
 
 	/*
-	 * Also build unparameterized MergeAppend paths based on the collected
+	 * Also build unparameterized ordered append paths based on the collected
 	 * list of child pathkeys.
 	 */
 	if (subpaths_valid)
-		generate_mergeappend_paths(root, rel, live_childrels,
-								   all_child_pathkeys,
-								   partitioned_rels);
+		generate_orderedappend_paths(root, rel, live_childrels,
+									 all_child_pathkeys,
+									 partitioned_rels);
 
 	/*
 	 * Build Append paths for each parameterization seen among the child rels.
@@ -1704,41 +1705,67 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		if (subpaths_valid)
 			add_path(rel, (Path *)
 					 create_append_path(root, rel, subpaths, NIL,
-										required_outer, 0, false,
+										NIL, required_outer, 0, false,
 										partitioned_rels, -1));
 	}
 }
 
 /*
- * generate_mergeappend_paths
- *		Generate MergeAppend paths for an append relation
+ * generate_orderedappend_paths
+ *		Generate ordered append paths for an append relation.
  *
- * Generate a path for each ordering (pathkey list) appearing in
+ * Generally we generate MergeAppend paths here, but there are some special
+ * cases where we can generate simple Append paths when we're able to
+ * determine that the list of subpaths provide tuples in the required order.
+ *
+ * We generate a path for each ordering (pathkey list) appearing in
  * all_child_pathkeys.
  *
  * We consider both cheapest-startup and cheapest-total cases, ie, for each
  * interesting ordering, collect all the cheapest startup subpaths and all the
- * cheapest total paths, and build a MergeAppend path for each case.
- *
- * We don't currently generate any parameterized MergeAppend paths.  While
- * it would not take much more code here to do so, it's very unclear that it
- * is worth the planning cycles to investigate such paths: there's little
- * use for an ordered path on the inside of a nestloop.  In fact, it's likely
- * that the current coding of add_path would reject such paths out of hand,
- * because add_path gives no credit for sort ordering of parameterized paths,
- * and a parameterized MergeAppend is going to be more expensive than the
+ * cheapest total paths, and build a suitable path for each case.
+ *
+ * We don't currently generate any parameterized paths here.  While it would
+ * not take much more code to do so, it's very unclear that it is worth the
+ * planning cycles to investigate such paths: there's little use for an
+ * ordered path on the inside of a nestloop.  In fact, it's likely that the
+ * current coding of add_path would reject such paths out of hand, because
+ * add_path gives no credit for sort ordering of parameterized paths, and a
+ * parameterized MergeAppend is going to be more expensive than the
  * corresponding parameterized Append path.  If we ever try harder to support
  * parameterized mergejoin plans, it might be worth adding support for
- * parameterized MergeAppends to feed such joins.  (See notes in
+ * parameterized paths here to feed such joins.  (See notes in
  * optimizer/README for why that might not ever happen, though.)
  */
 static void
-generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
-						   List *live_childrels,
-						   List *all_child_pathkeys,
-						   List *partitioned_rels)
+generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
+							 List *live_childrels,
+							 List *all_child_pathkeys,
+							 List *partitioned_rels)
 {
 	ListCell   *lcp;
+	List	   *partition_pathkeys = NIL;
+	List	   *partition_pathkeys_desc = NIL;
+	bool		partition_pathkeys_partial = true;
+	bool		partition_pathkeys_desc_partial = true;
+
+	/*
+	 * Some partitioned table setups may allow us to use an Append node
+	 * instead of a MergeAppend.  This is possible in cases such as RANGE
+	 * partitioned tables where it's guaranteed that an earlier partition must
+	 * contain rows which come earlier in the sort order.
+	 */
+	if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
+		partitions_are_ordered(root, rel))
+	{
+		partition_pathkeys = build_partition_pathkeys(root, rel,
+													  ForwardScanDirection,
+												&partition_pathkeys_partial);
+
+		partition_pathkeys_desc = build_partition_pathkeys(root, rel,
+													BackwardScanDirection,
+											&partition_pathkeys_desc_partial);
+	}
 
 	foreach(lcp, all_child_pathkeys)
 	{
@@ -1747,6 +1774,29 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 		List	   *total_subpaths = NIL;
 		bool		startup_neq_total = false;
 		ListCell   *lcr;
+		bool		partition_order;
+		bool		partition_order_desc;
+
+		/*
+		 * Determine if these pathkeys are contained in the partition pathkeys
+		 * for both ascending and decending partition order.  If the
+		 * partitioned pathkeys happened to be contained in pathkeys then this
+		 * is fine too, providing that the partition pathkeys are complete and
+		 * not just a prefix of the partition order.  In this case an Append
+		 * scan cannot produce any out of order tuples.
+		 */
+		partition_order = pathkeys_contained_in(pathkeys,
+												partition_pathkeys) ||
+						(!partition_pathkeys_partial &&
+						 pathkeys_contained_in(partition_pathkeys, pathkeys));
+
+		partition_order_desc = !partition_order &&
+								(pathkeys_contained_in(pathkeys,
+												partition_pathkeys_desc) ||
+						(!partition_pathkeys_desc_partial &&
+							pathkeys_contained_in(partition_pathkeys_desc,
+												  pathkeys)));
+
 
 		/* Select the child paths for this ordering... */
 		foreach(lcr, live_childrels)
@@ -1789,26 +1839,81 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 			if (cheapest_startup != cheapest_total)
 				startup_neq_total = true;
 
-			accumulate_append_subpath(cheapest_startup,
-									  &startup_subpaths, NULL);
-			accumulate_append_subpath(cheapest_total,
-									  &total_subpaths, NULL);
+			/*
+			 * When in partition order or decending partition order don't
+			 * flatten any sub-partition's paths unless they're an Append or
+			 * MergeAppend with a single subpath.  For the desceding order
+			 * case we build the path list in reverse so that the Append scan
+			 * correctly scans the partitions in reverse order.
+			 */
+			if (partition_order)
+			{
+				/* Do the Append/MergeAppend flattening, when possible */
+				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+				cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+				startup_subpaths = lappend(startup_subpaths, cheapest_startup);
+				total_subpaths = lappend(total_subpaths, cheapest_total);
+			}
+			else if (partition_order_desc)
+			{
+				cheapest_startup = get_singleton_append_subpath(cheapest_startup);
+				cheapest_total = get_singleton_append_subpath(cheapest_total);
+
+				startup_subpaths = lcons(cheapest_startup, startup_subpaths);
+				total_subpaths = lcons(cheapest_total, total_subpaths);
+			}
+			else
+			{
+				accumulate_append_subpath(cheapest_startup,
+										  &startup_subpaths, NULL);
+				accumulate_append_subpath(cheapest_total,
+										  &total_subpaths, NULL);
+			}
 		}
 
-		/* ... and build the MergeAppend paths */
-		add_path(rel, (Path *) create_merge_append_path(root,
+		/* Build simple Append paths if in partition asc/desc order */
+		if (partition_order || partition_order_desc)
+		{
+			add_path(rel, (Path *) create_append_path(root,
 														rel,
 														startup_subpaths,
+														NIL,
 														pathkeys,
 														NULL,
-														partitioned_rels));
-		if (startup_neq_total)
-			add_path(rel, (Path *) create_merge_append_path(root,
+														0,
+														false,
+														partitioned_rels,
+														-1));
+			if (startup_neq_total)
+				add_path(rel, (Path *) create_append_path(root,
 															rel,
 															total_subpaths,
+															NIL,
+															pathkeys,
+															NULL,
+															0,
+															false,
+															partitioned_rels,
+															-1));
+		}
+		else
+		{
+			/* else just build the MergeAppend paths */
+			add_path(rel, (Path *) create_merge_append_path(root,
+															rel,
+															startup_subpaths,
 															pathkeys,
 															NULL,
 															partitioned_rels));
+			if (startup_neq_total)
+				add_path(rel, (Path *) create_merge_append_path(root,
+																rel,
+																total_subpaths,
+																pathkeys,
+																NULL,
+																partitioned_rels));
+		}
 	}
 }
 
@@ -1949,6 +2054,34 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
 	*subpaths = lappend(*subpaths, path);
 }
 
+/*
+ * get_singleton_append_subpath
+ *		Returns the singleton subpath of an Append/MergeAppend or
+ *		return 'path' if it's not a single sub-path Append/MergeAppend.
+ */
+static Path *
+get_singleton_append_subpath(Path *path)
+{
+	if (IsA(path, AppendPath))
+	{
+		AppendPath *apath = (AppendPath *) path;
+
+		Assert(!apath->path.parallel_aware);
+
+		if (list_length(apath->subpaths) == 1)
+			return (Path *) linitial(apath->subpaths);
+	}
+	else if (IsA(path, MergeAppendPath))
+	{
+		MergeAppendPath *mpath = (MergeAppendPath *) path;
+
+		if (list_length(mpath->subpaths) == 1)
+			return (Path *) linitial(mpath->subpaths);
+	}
+
+	return path;
+}
+
 /*
  * set_dummy_rel_pathlist
  *	  Build a dummy path for a relation that's been excluded by constraints
@@ -1969,7 +2102,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
 	rel->pathlist = NIL;
 	rel->partial_pathlist = NIL;
 
-	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
+	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL,
 											  0, false, NIL, -1));
 
 	/*
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 4b9be13f08..bf45b15453 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1872,7 +1872,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(PlannerInfo *root, AppendPath *apath)
 {
 	ListCell   *l;
 
@@ -1884,21 +1884,56 @@ cost_append(AppendPath *apath)
 
 	if (!apath->path.parallel_aware)
 	{
-		Path	   *subpath = (Path *) linitial(apath->subpaths);
-
+		Path	   *isubpath = (Path *) linitial(apath->subpaths);
+		List	   *pathkeys = apath->path.pathkeys;
 		/*
 		 * Startup cost of non-parallel-aware Append is the startup cost of
-		 * first subpath.
+		 * the first subpath. This may be overwritten below if the initial
+		 * path requires a sort.
 		 */
-		apath->path.startup_cost = subpath->startup_cost;
+		apath->path.startup_cost = isubpath->startup_cost;
 
-		/* Compute rows and costs as sums of subplan rows and costs. */
+		/*
+		 * Compute rows and costs as sums of subplan rows and costs taking
+		 * into account the cost of any sorts which may be required on
+		 * subplans.
+		 */
 		foreach(l, apath->subpaths)
 		{
 			Path	   *subpath = (Path *) lfirst(l);
 
 			apath->path.rows += subpath->rows;
-			apath->path.total_cost += subpath->total_cost;
+
+			if (pathkeys != NIL &&
+				!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+			{
+				Path		sort_path;	/* dummy for result of cost_sort */
+
+				/*
+				 * We'll need to insert a Sort node, so include cost for that.
+				 */
+				cost_sort(&sort_path,
+						  root,
+						  pathkeys,
+						  subpath->total_cost,
+						  subpath->parent->tuples,
+						  subpath->pathtarget->width,
+						  0.0,
+						  work_mem,
+						  apath->limit_tuples);
+				apath->path.total_cost += sort_path.total_cost;
+
+				/*
+				 * When the first subpath needs to be sorted, set the startup
+				 * cost of the sort as the startup cost of the Append.
+				 */
+				if (subpath == isubpath)
+					apath->path.startup_cost = sort_path.startup_cost;
+			}
+			else
+			{
+				apath->path.total_cost += subpath->total_cost;
+			}
 		}
 	}
 	else						/* parallel-aware */
@@ -1906,6 +1941,8 @@ cost_append(AppendPath *apath)
 		int			i = 0;
 		double		parallel_divisor = get_parallel_divisor(&apath->path);
 
+		Assert(apath->path.pathkeys == NIL);
+
 		/* Calculate startup cost. */
 		foreach(l, apath->subpaths)
 		{
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index dfbbfdac6d..de28228adf 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1237,7 +1237,7 @@ mark_dummy_rel(RelOptInfo *rel)
 	rel->partial_pathlist = NIL;
 
 	/* Set up the dummy path */
-	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
+	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL,
 											  0, false, NIL, -1));
 
 	/* Set or update cheapest_total_path and related fields */
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 56d839bb31..753a1c24a2 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -24,6 +24,7 @@
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
 
@@ -546,6 +547,64 @@ build_index_pathkeys(PlannerInfo *root,
 	return retval;
 }
 
+/*
+ * build_partition_pathkeys
+ *	  Build a pathkeys list that describes the ordering induced by the
+ *	  partitions of 'partrel'.  (Callers must ensure that this partitioned
+ *	  table guarantees that lower order tuples never will be found in a
+ *	  later partition.).  Sets *partialkeys to false if pathkeys were only
+ *	  built for a prefix of the partition key, otherwise sets it to true.
+ */
+List *
+build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+						 ScanDirection scandir, bool *partialkeys)
+{
+	PartitionScheme		partscheme;
+	List	   *retval = NIL;
+	int			i;
+
+	partscheme = partrel->part_scheme;
+
+	for (i = 0; i < partscheme->partnatts; i++)
+	{
+		PathKey    *cpathkey;
+		Expr	   *keyCol = linitial(partrel->partexprs[i]);
+
+		/*
+		 * OK, try to make a canonical pathkey for this part key.  Note we're
+		 * underneath any outer joins, so nullable_relids should be NULL.
+		 */
+		cpathkey = make_pathkey_from_sortinfo(root,
+											  keyCol,
+											  NULL,
+											  partscheme->partopfamily[i],
+											  partscheme->partopcintype[i],
+											  partscheme->partcollation[i],
+											 ScanDirectionIsBackward(scandir),
+											 ScanDirectionIsBackward(scandir),
+											  0,
+											  partrel->relids,
+											  false);
+
+		/*
+		 * When unable to create the pathkey we'll just need to return
+		 * whatever ones we have so far.
+		 */
+		if (cpathkey == NULL)
+		{
+			*partialkeys = true;
+			return retval;
+		}
+
+		/* Add it to list, unless it's redundant. */
+		if (!pathkey_is_redundant(cpathkey, retval))
+			retval = lappend(retval, cpathkey);
+	}
+
+	*partialkeys = false;
+	return retval;
+}
+
 /*
  * build_expression_pathkey
  *	  Build a pathkeys list that describes an ordering by a single expression
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 236f506cfb..3a90934933 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -205,8 +205,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual
 						 Index scanrelid, char *enrname);
 static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
 				   Index scanrelid, int wtParam);
-static Append *make_append(List *appendplans, int first_partial_plan,
-			List *tlist, PartitionPruneInfo *partpruneinfo);
 static RecursiveUnion *make_recursive_union(List *tlist,
 					 Plan *lefttree,
 					 Plan *righttree,
@@ -1058,12 +1056,24 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path)
 static Plan *
 create_append_plan(PlannerInfo *root, AppendPath *best_path)
 {
-	Append	   *plan;
+	Append	   *node = makeNode(Append);
+	Plan	   *plan = &node->plan;
 	List	   *tlist = build_path_tlist(root, &best_path->path);
+	List	   *pathkeys = best_path->path.pathkeys;
 	List	   *subplans = NIL;
 	ListCell   *subpaths;
 	RelOptInfo *rel = best_path->path.parent;
 	PartitionPruneInfo *partpruneinfo = NULL;
+	int			nodenumsortkeys;
+	AttrNumber *nodeSortColIdx;
+	Oid		   *nodeSortOperators;
+	Oid		   *nodeCollations;
+	bool	   *nodeNullsFirst;
+
+	plan->targetlist = tlist;
+	plan->qual = NIL;
+	plan->lefttree = NULL;
+	plan->righttree = NULL;
 
 	/*
 	 * The subpaths list could be empty, if every child was proven empty by
@@ -1089,6 +1099,23 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
 		return plan;
 	}
 
+	if (pathkeys != NIL)
+	{
+		/*
+		 * Compute sort column info, and adjust the Append's tlist as needed.
+		 * We only need the 'nodeSortColIdx' from all of the output params.
+		 */
+		(void) prepare_sort_from_pathkeys(plan, pathkeys,
+										  best_path->path.parent->relids,
+										  NULL,
+										  true,
+										  &nodenumsortkeys,
+										  &nodeSortColIdx,
+										  &nodeSortOperators,
+										  &nodeCollations,
+										  &nodeNullsFirst);
+	}
+
 	/* Build the plan for each child */
 	foreach(subpaths, best_path->subpaths)
 	{
@@ -1098,6 +1125,39 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
 		/* Must insist that all children return the same tlist */
 		subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST);
 
+		/*
+		 * Now, for appends with pathkeys, insert a Sort node if subplan isn't
+		 * sufficiently ordered.
+		 */
+		if (pathkeys != NIL)
+		{
+			int			numsortkeys;
+			AttrNumber *sortColIdx;
+			Oid		   *sortOperators;
+			Oid		   *collations;
+			bool	   *nullsFirst;
+
+			/* Compute sort column info, and adjust subplan's tlist as needed */
+			subplan = prepare_sort_from_pathkeys(subplan, pathkeys,
+												 subpath->parent->relids,
+												 nodeSortColIdx,
+												 false,
+												 &numsortkeys,
+												 &sortColIdx,
+												 &sortOperators,
+												 &collations,
+												 &nullsFirst);
+
+			if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+			{
+				Sort	   *sort = make_sort(subplan, numsortkeys,
+											 sortColIdx, sortOperators,
+											 collations, nullsFirst);
+
+				label_sort_with_costsize(root, sort, best_path->limit_tuples);
+				subplan = (Plan *) sort;
+			}
+		}
 		subplans = lappend(subplans, subplan);
 	}
 
@@ -1140,10 +1200,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
 	 * parent-rel Vars it'll be asked to emit.
 	 */
 
-	plan = make_append(subplans, best_path->first_partial_path,
-					   tlist, partpruneinfo);
+	node->appendplans = subplans;
+	node->first_partial_plan = best_path->first_partial_path;
+	node->part_prune_info = partpruneinfo;
 
-	copy_generic_path_info(&plan->plan, (Path *) best_path);
+	copy_generic_path_info(plan, (Path *) best_path);
 
 	return (Plan *) plan;
 }
@@ -5281,23 +5342,6 @@ make_foreignscan(List *qptlist,
 	return node;
 }
 
-static Append *
-make_append(List *appendplans, int first_partial_plan,
-			List *tlist, PartitionPruneInfo *partpruneinfo)
-{
-	Append	   *node = makeNode(Append);
-	Plan	   *plan = &node->plan;
-
-	plan->targetlist = tlist;
-	plan->qual = NIL;
-	plan->lefttree = NULL;
-	plan->righttree = NULL;
-	node->appendplans = appendplans;
-	node->first_partial_plan = first_partial_plan;
-	node->part_prune_info = partpruneinfo;
-	return node;
-}
-
 static RecursiveUnion *
 make_recursive_union(List *tlist,
 					 Plan *lefttree,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bc81535905..5ffbad48d9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1602,7 +1602,8 @@ inheritance_planner(PlannerInfo *root)
 
 		/* Make a dummy path, cf set_dummy_rel_pathlist() */
 		dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL,
-												 NULL, 0, false, NIL, -1);
+												 NIL, NULL, 0, false, NIL,
+												 -1);
 
 		/* These lists must be nonempty to make a valid ModifyTable node */
 		subpaths = list_make1(dummy_path);
@@ -3915,6 +3916,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 							   grouped_rel,
 							   paths,
 							   NIL,
+							   NIL,
 							   NULL,
 							   0,
 							   false,
@@ -6951,7 +6953,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		 * node, which would cause this relation to stop appearing to be a
 		 * dummy rel.)
 		 */
-		rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL,
+		rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL, NIL,
 													  NULL, 0, false, NIL,
 													  -1));
 		rel->partial_pathlist = NIL;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index eb815c2f12..65d77a5402 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -616,7 +616,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * Append the child results together.
 	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+	path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL,
 									   NULL, 0, false, NIL, -1);
 
 	/*
@@ -671,7 +671,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		Assert(parallel_workers > 0);
 
 		ppath = (Path *)
-			create_append_path(root, result_rel, NIL, partial_pathlist,
+			create_append_path(root, result_rel, NIL, partial_pathlist, NIL,
 							   NULL, parallel_workers, enable_parallel_append,
 							   NIL, -1);
 		ppath = (Path *)
@@ -782,7 +782,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * Append the child results together.
 	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+	path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL,
 									   NULL, 0, false, NIL, -1);
 
 	/* Identify the grouping semantics */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 169e51e792..356d2469c4 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1208,7 +1208,7 @@ AppendPath *
 create_append_path(PlannerInfo *root,
 				   RelOptInfo *rel,
 				   List *subpaths, List *partial_subpaths,
-				   Relids required_outer,
+				   List *pathkeys, Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
 				   List *partitioned_rels, double rows)
 {
@@ -1242,7 +1242,7 @@ create_append_path(PlannerInfo *root,
 	pathnode->path.parallel_aware = parallel_aware;
 	pathnode->path.parallel_safe = rel->consider_parallel;
 	pathnode->path.parallel_workers = parallel_workers;
-	pathnode->path.pathkeys = NIL;	/* result is always considered unsorted */
+	pathnode->path.pathkeys = pathkeys;
 	pathnode->partitioned_rels = list_copy(partitioned_rels);
 
 	/*
@@ -1252,10 +1252,14 @@ create_append_path(PlannerInfo *root,
 	 * costs.  There may be some paths that require to do startup work by a
 	 * single worker.  In such case, it's better for workers to choose the
 	 * expensive ones first, whereas the leader should choose the cheapest
-	 * startup plan.
+	 * startup plan.  Note: We mustn't fiddle with the order of subpaths
+	 * when the Append has valid pathkeys.  The order they're listed in
+	 * is critical to keeping the pathkeys valid.
 	 */
 	if (pathnode->path.parallel_aware)
 	{
+		Assert(pathkeys == NIL);
+
 		subpaths = list_qsort(subpaths, append_total_cost_compare);
 		partial_subpaths = list_qsort(partial_subpaths,
 									  append_startup_cost_compare);
@@ -1263,6 +1267,15 @@ create_append_path(PlannerInfo *root,
 	pathnode->first_partial_path = list_length(subpaths);
 	pathnode->subpaths = list_concat(subpaths, partial_subpaths);
 
+	/*
+	 * Apply query-wide LIMIT if known and path is for sole base relation.
+	 * (Handling this at this low level is a bit klugy.)
+	 */
+	if (root != NULL && bms_equal(rel->relids, root->all_baserels))
+		pathnode->limit_tuples = root->limit_tuples;
+	else
+		pathnode->limit_tuples = -1.0;
+
 	foreach(l, pathnode->subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
@@ -1276,7 +1289,7 @@ create_append_path(PlannerInfo *root,
 
 	Assert(!parallel_aware || pathnode->path.parallel_safe);
 
-	cost_append(pathnode);
+	cost_append(root, pathnode);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
@@ -3708,7 +3721,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
 				}
 				return (Path *)
 					create_append_path(root, rel, childpaths, partialpaths,
-									   required_outer,
+									   NIL, required_outer,
 									   apath->path.parallel_workers,
 									   apath->path.parallel_aware,
 									   apath->partitioned_rels,
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index e71eb3793b..b32529090b 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -1682,6 +1682,8 @@ qsort_partition_hbound_cmp(const void *a, const void *b)
  * qsort_partition_list_value_cmp
  *
  * Compare two list partition bound datums.
+ *
+ * Note: If changing this, see build_partition_pathkeys()
  */
 static int32
 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
@@ -1699,6 +1701,8 @@ qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
  * qsort_partition_rbound_cmp
  *
  * Used when sorting range bounds across all range partitions.
+ *
+ * Note: If changing this, see build_partition_pathkeys()
  */
 static int32
 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index 8c9721935d..9187e71399 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -175,7 +175,66 @@ static bool partkey_datum_from_expr(PartitionPruneContext *context,
 						Expr *expr, int stateidx,
 						Datum *value, bool *isnull);
 
+/*
+ * partitions_are_ordered
+ *		For the partitioned table given in 'partrel', returns true if the
+ *		partitioned table guarantees that tuples which sort earlier according
+ *		to the partition bound are stored in an earlier partition.  Returns
+ *		false this is not possible, or if we have insufficient means to prove
+ *		it.
+ *
+ * This assumes nothing about the order of tuples inside the actual
+ * partitions.
+ */
+bool
+partitions_are_ordered(PlannerInfo *root, RelOptInfo *partrel)
+{
+	PartitionBoundInfo	boundinfo = partrel->boundinfo;
+
+	Assert(boundinfo != NULL);
+
+	switch (boundinfo->strategy)
+	{
+		/*
+		 * RANGE type partitions guarantee that the partitions can be scanned
+		 * in the order that they're defined in the PartitionDesc to provide
+		 * non-overlapping ranges of tuples.  We must disallow when a DEFAULT
+		 * partition exists as this could contain tuples from either below or
+		 * above the defined range, or contain tuples belonging to gaps in the
+		 * defined range.
+		 */
+		case PARTITION_STRATEGY_RANGE:
+			if (partition_bound_has_default(boundinfo))
+				return false;
+			break;
 
+		/*
+		 * LIST partitions can also guarantee ordering, but we'd need to
+		 * ensure that partitions don't allow interleaved values.  We could
+		 * likely check for this looking at each partition, in order, and
+		 * checking which Datums are accepted.  If we find a Datum in a
+		 * partition that's greater than one previously already seen, then
+		 * values could become out of order and we'd have to disable the
+		 * optimization.  For now, let's just keep it simple and just accept
+		 * LIST partitions without a DEFAULT partition which only accept a
+		 * single Datum per partition.  This is cheap as it does not require
+		 * any per-partition processing.  Maybe we'd like to handle more
+		 * complex cases in the future.
+		 */
+		case PARTITION_STRATEGY_LIST:
+			if (partition_bound_has_default(boundinfo))
+				return false;
+
+			if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) != partrel->nparts)
+				return false;
+			break;
+
+		default:
+			return false;
+	}
+
+	return true;
+}
 /*
  * make_partition_pruneinfo
  *		Builds a PartitionPruneInfo which can be used in the executor to allow
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a008ae07da..66e34a162a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1359,6 +1359,7 @@ typedef struct AppendPath
 
 	/* Index of first partial path in subpaths */
 	int			first_partial_path;
+	double		limit_tuples;	/* hard limit on output tuples, or -1 */
 } AppendPath;
 
 #define IS_DUMMY_PATH(p) \
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index ac6de0f6be..9da3f19c6e 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -103,7 +103,7 @@ extern void cost_sort(Path *path, PlannerInfo *root,
 		  List *pathkeys, Cost input_cost, double tuples, int width,
 		  Cost comparison_cost, int sort_mem,
 		  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern void cost_append(PlannerInfo *root, AppendPath *path);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 				  List *pathkeys, int n_streams,
 				  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 574bb85b50..1bcd0e4235 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -64,7 +64,7 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
 extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
 					List *tidquals, Relids required_outer);
 extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
-				   List *subpaths, List *partial_subpaths,
+				   List *subpaths, List *partial_subpaths, List *pathkeys,
 				   Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
 				   List *partitioned_rels, double rows);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 040335a7c5..66246136d9 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -195,6 +195,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
 extern Path *get_cheapest_parallel_safe_total_inner(List *paths);
 extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
 					 ScanDirection scandir);
+extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+						 ScanDirection scandir, bool *partialkeys);
 extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr,
 						 Relids nullable_relids, Oid opno,
 						 Relids rel, bool create_it);
diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h
index 2f75717ffb..53e492108b 100644
--- a/src/include/partitioning/partprune.h
+++ b/src/include/partitioning/partprune.h
@@ -74,6 +74,8 @@ typedef struct PartitionPruneContext
 #define PruneCxtStateIdx(partnatts, step_id, keyno) \
 	((partnatts) * (step_id) + (keyno))
 
+extern bool partitions_are_ordered(struct PlannerInfo *root,
+					   struct RelOptInfo *partrel);
 extern PartitionPruneInfo *make_partition_pruneinfo(struct PlannerInfo *root,
 						 struct RelOptInfo *parentrel,
 						 List *subpaths,
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 565d947b6d..11f7ec1888 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -2042,7 +2042,210 @@ explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mc
          Filter: ((c > 20) AND (a = 20))
 (9 rows)
 
+-- Test code that uses Append nodes in place of MergeAppend when the
+-- partitions guarantee earlier partitions means lower sort order of the
+-- tuples contained within.
+create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c);
+-- check MergeAppend is uses when a default partition exists
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Merge Append
+   Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5_a_abs_c_idx on mcrparted5
+   ->  Index Scan using mcrparted_def_a_abs_c_idx on mcrparted_def
+(9 rows)
+
+drop table mcrparted_def;
+-- check Append is used for RANGE partitioned table with no default and no subpartitions
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5_a_abs_c_idx on mcrparted5
+(7 rows)
+
+-- check Append is used with subpaths in reverse order with backwards index scans.
+explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Append
+   ->  Index Scan Backward using mcrparted5_a_abs_c_idx on mcrparted5
+   ->  Index Scan Backward using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan Backward using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan Backward using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan Backward using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan Backward using mcrparted0_a_abs_c_idx on mcrparted0
+(7 rows)
+
+-- check that Append plan is used containing a MergeAppend for sub-partitions
+-- that are unordered.
+drop table mcrparted5;
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a);
+create table mcrparted5a partition of mcrparted5 for values in(20);
+create table mcrparted5_def partition of mcrparted5 default;
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Merge Append
+         Sort Key: mcrparted5a.a, (abs(mcrparted5a.b)), mcrparted5a.c
+         ->  Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a
+         ->  Index Scan using mcrparted5_def_a_abs_c_idx on mcrparted5_def
+(10 rows)
+
+drop table mcrparted5_def;
+-- check that an Append plan is used and the sub-partitions are flattened
+-- into the main Append when the sub-partition is unordered but contains
+-- just a single sub-partition.
+explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a
+(7 rows)
+
+-- check that Append is used when the sub-partitioned tables are pruned during planning.
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+         Index Cond: (a < 20)
+(9 rows)
+
+create table mclparted (a int) partition by list(a);
+create table mclparted1 partition of mclparted for values in(1);
+create table mclparted2 partition of mclparted for values in(2);
+create index on mclparted (a);
+-- Ensure an Append is used to for a list partition with an order by.
+explain (costs off) select * from mclparted order by a;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Append
+   ->  Index Only Scan using mclparted1_a_idx on mclparted1
+   ->  Index Only Scan using mclparted2_a_idx on mclparted2
+(3 rows)
+
+-- Ensure a MergeAppend is used when a partition exists with interleaved
+-- datums in the partition bound.
+create table mclparted3_5 partition of mclparted for values in(3,5);
+create table mclparted4 partition of mclparted for values in(4);
+explain (costs off) select * from mclparted order by a;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Append
+   Sort Key: mclparted1.a
+   ->  Index Only Scan using mclparted1_a_idx on mclparted1
+   ->  Index Only Scan using mclparted2_a_idx on mclparted2
+   ->  Index Only Scan using mclparted3_5_a_idx on mclparted3_5
+   ->  Index Only Scan using mclparted4_a_idx on mclparted4
+(6 rows)
+
+drop table mclparted;
+-- Ensure subplans which don't have a path with the correct pathkeys get
+-- sorted correctly.
+drop index mcrparted_a_abs_c_idx;
+create index on mcrparted1 (a, abs(b), c);
+create index on mcrparted2 (a, abs(b), c);
+create index on mcrparted3 (a, abs(b), c);
+create index on mcrparted4 (a, abs(b), c);
+set enable_seqscan = 0;
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Append
+   ->  Sort
+         Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c
+         ->  Seq Scan on mcrparted0
+               Filter: (a < 20)
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+         Index Cond: (a < 20)
+(11 rows)
+
+reset enable_seqscan;
+set enable_bitmapscan = 0;
+-- Ensure Append node can be used when the partition is ordered by some
+-- pathkeys which were deemed redundant.
+explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+         Index Cond: (a = 10)
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+         Index Cond: (a = 10)
+(5 rows)
+
+reset enable_bitmapscan;
 drop table mcrparted;
+-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
+create table bool_rp (b bool) partition by list(b);
+create table bool_rp_true partition of bool_rp for values in(true);
+create table bool_rp_false partition of bool_rp for values in(false);
+create index on bool_rp (b);
+explain (costs off) select * from bool_rp order by b;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Append
+   ->  Index Only Scan using bool_rp_false_b_idx on bool_rp_false
+   ->  Index Only Scan using bool_rp_true_b_idx on bool_rp_true
+(3 rows)
+
+drop table bool_rp;
+-- Ensure an Append scan is chosen when the partition order is a subset of
+-- the required order.
+create table range_parted (a int, b int, c int) partition by range(a, b);
+create table range_parted1 partition of range_parted for values from (0,0) to (10,10);
+create table range_parted2 partition of range_parted for values from (10,10) to (20,20);
+create index on range_parted (a,b,c);
+explain (costs off) select * from range_parted order by a,b,c;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Append
+   ->  Index Only Scan using range_parted1_a_b_c_idx on range_parted1
+   ->  Index Only Scan using range_parted2_a_b_c_idx on range_parted2
+(3 rows)
+
+explain (costs off) select * from range_parted order by a desc,b desc,c desc;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Append
+   ->  Index Only Scan Backward using range_parted2_a_b_c_idx on range_parted2
+   ->  Index Only Scan Backward using range_parted1_a_b_c_idx on range_parted1
+(3 rows)
+
+drop table range_parted;
 -- check that partitioned table Appends cope with being referenced in
 -- subplans
 create table parted_minmax (a int, b varchar(16)) partition by range (a);
@@ -2055,17 +2258,15 @@ explain (costs off) select min(a), max(a) from parted_minmax where b = '12345';
  Result
    InitPlan 1 (returns $0)
      ->  Limit
-           ->  Merge Append
-                 Sort Key: parted_minmax1.a
+           ->  Append
                  ->  Index Only Scan using parted_minmax1i on parted_minmax1
                        Index Cond: ((a IS NOT NULL) AND (b = '12345'::text))
    InitPlan 2 (returns $1)
      ->  Limit
-           ->  Merge Append
-                 Sort Key: parted_minmax1_1.a DESC
+           ->  Append
                  ->  Index Only Scan Backward using parted_minmax1i on parted_minmax1 parted_minmax1_1
                        Index Cond: ((a IS NOT NULL) AND (b = '12345'::text))
-(13 rows)
+(11 rows)
 
 select min(a), max(a) from parted_minmax where b = '12345';
  min | max 
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 30946f77b6..2c9e42b51a 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -3063,14 +3063,14 @@ drop table boolp;
 --
 set enable_seqscan = off;
 set enable_sort = off;
-create table ma_test (a int) partition by range (a);
+create table ma_test (a int, b int) partition by range (a);
 create table ma_test_p1 partition of ma_test for values from (0) to (10);
 create table ma_test_p2 partition of ma_test for values from (10) to (20);
 create table ma_test_p3 partition of ma_test for values from (20) to (30);
-insert into ma_test select x from generate_series(0,29) t(x);
-create index on ma_test (a);
+insert into ma_test select x,x from generate_series(0,29) t(x);
+create index on ma_test (b);
 analyze ma_test;
-prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b;
 -- Execute query 5 times to allow choose_custom_plan
 -- to start considering a generic plan.
 execute mt_q1(0);
@@ -3117,17 +3117,15 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
  Merge Append (actual rows=2 loops=1)
-   Sort Key: ma_test_p2.a
+   Sort Key: ma_test_p2.b
    Subplans Removed: 1
-   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-         Rows Removed by Filter: 4
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
+   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
          Rows Removed by Filter: 9
-(11 rows)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+         Rows Removed by Filter: 9
+(9 rows)
 
 execute mt_q1(15);
  a  
@@ -3140,13 +3138,12 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
  Merge Append (actual rows=1 loops=1)
-   Sort Key: ma_test_p3.a
+   Sort Key: ma_test_p3.b
    Subplans Removed: 2
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-         Rows Removed by Filter: 4
-(7 rows)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+         Rows Removed by Filter: 9
+(6 rows)
 
 execute mt_q1(25);
  a  
@@ -3159,12 +3156,11 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Merge Append (actual rows=0 loops=1)
-   Sort Key: ma_test_p1.a
+   Sort Key: ma_test_p1.b
    Subplans Removed: 2
-   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-(6 rows)
+   ->  Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+(5 rows)
 
 execute mt_q1(35);
  a 
@@ -3173,23 +3169,23 @@ execute mt_q1(35);
 
 deallocate mt_q1;
 -- ensure initplan params properly prune partitions
-explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b;
                                                  QUERY PLAN                                                 
 ------------------------------------------------------------------------------------------------------------
  Merge Append (actual rows=20 loops=1)
-   Sort Key: ma_test_p1.a
+   Sort Key: ma_test_p1.b
    InitPlan 2 (returns $1)
      ->  Result (actual rows=1 loops=1)
            InitPlan 1 (returns $0)
              ->  Limit (actual rows=1 loops=1)
-                   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
-                         Index Cond: (a IS NOT NULL)
-   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
-         Index Cond: (a >= $1)
-   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1)
-         Index Cond: (a >= $1)
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1)
-         Index Cond: (a >= $1)
+                   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
+                         Index Cond: (b IS NOT NULL)
+   ->  Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed)
+         Filter: (a >= $1)
+   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=10 loops=1)
+         Filter: (a >= $1)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=10 loops=1)
+         Filter: (a >= $1)
 (14 rows)
 
 reset enable_seqscan;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index 5480fe7db4..f696808a98 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -715,8 +715,101 @@ explain (costs off) select * from mcrparted where abs(b) = 5;	-- scans all parti
 explain (costs off) select * from mcrparted where a > -1;	-- scans all partitions
 explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10;	-- scans mcrparted4
 explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5, mcrparted_def
+
+-- Test code that uses Append nodes in place of MergeAppend when the
+-- partitions guarantee earlier partitions means lower sort order of the
+-- tuples contained within.
+create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c);
+
+-- check MergeAppend is uses when a default partition exists
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+drop table mcrparted_def;
+
+-- check Append is used for RANGE partitioned table with no default and no subpartitions
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+-- check Append is used with subpaths in reverse order with backwards index scans.
+explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc;
+
+-- check that Append plan is used containing a MergeAppend for sub-partitions
+-- that are unordered.
+drop table mcrparted5;
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a);
+create table mcrparted5a partition of mcrparted5 for values in(20);
+create table mcrparted5_def partition of mcrparted5 default;
+
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+drop table mcrparted5_def;
+
+-- check that an Append plan is used and the sub-partitions are flattened
+-- into the main Append when the sub-partition is unordered but contains
+-- just a single sub-partition.
+explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c;
+
+-- check that Append is used when the sub-partitioned tables are pruned during planning.
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+
+create table mclparted (a int) partition by list(a);
+create table mclparted1 partition of mclparted for values in(1);
+create table mclparted2 partition of mclparted for values in(2);
+create index on mclparted (a);
+
+-- Ensure an Append is used to for a list partition with an order by.
+explain (costs off) select * from mclparted order by a;
+
+-- Ensure a MergeAppend is used when a partition exists with interleaved
+-- datums in the partition bound.
+create table mclparted3_5 partition of mclparted for values in(3,5);
+create table mclparted4 partition of mclparted for values in(4);
+
+explain (costs off) select * from mclparted order by a;
+
+drop table mclparted;
+
+-- Ensure subplans which don't have a path with the correct pathkeys get
+-- sorted correctly.
+drop index mcrparted_a_abs_c_idx;
+create index on mcrparted1 (a, abs(b), c);
+create index on mcrparted2 (a, abs(b), c);
+create index on mcrparted3 (a, abs(b), c);
+create index on mcrparted4 (a, abs(b), c);
+
+set enable_seqscan = 0;
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+reset enable_seqscan;
+
+set enable_bitmapscan = 0;
+-- Ensure Append node can be used when the partition is ordered by some
+-- pathkeys which were deemed redundant.
+explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c;
+reset enable_bitmapscan;
+
 drop table mcrparted;
 
+-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
+create table bool_rp (b bool) partition by list(b);
+create table bool_rp_true partition of bool_rp for values in(true);
+create table bool_rp_false partition of bool_rp for values in(false);
+create index on bool_rp (b);
+
+explain (costs off) select * from bool_rp order by b;
+
+drop table bool_rp;
+
+-- Ensure an Append scan is chosen when the partition order is a subset of
+-- the required order.
+create table range_parted (a int, b int, c int) partition by range(a, b);
+create table range_parted1 partition of range_parted for values from (0,0) to (10,10);
+create table range_parted2 partition of range_parted for values from (10,10) to (20,20);
+create index on range_parted (a,b,c);
+
+explain (costs off) select * from range_parted order by a,b,c;
+explain (costs off) select * from range_parted order by a desc,b desc,c desc;
+
+drop table range_parted;
+
 -- check that partitioned table Appends cope with being referenced in
 -- subplans
 create table parted_minmax (a int, b varchar(16)) partition by range (a);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index dc327caffd..102893b6f6 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -768,15 +768,15 @@ drop table boolp;
 --
 set enable_seqscan = off;
 set enable_sort = off;
-create table ma_test (a int) partition by range (a);
+create table ma_test (a int, b int) partition by range (a);
 create table ma_test_p1 partition of ma_test for values from (0) to (10);
 create table ma_test_p2 partition of ma_test for values from (10) to (20);
 create table ma_test_p3 partition of ma_test for values from (20) to (30);
-insert into ma_test select x from generate_series(0,29) t(x);
-create index on ma_test (a);
+insert into ma_test select x,x from generate_series(0,29) t(x);
+create index on ma_test (b);
 
 analyze ma_test;
-prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b;
 
 -- Execute query 5 times to allow choose_custom_plan
 -- to start considering a generic plan.
@@ -797,7 +797,7 @@ execute mt_q1(35);
 deallocate mt_q1;
 
 -- ensure initplan params properly prune partitions
-explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b;
 
 reset enable_seqscan;
 reset enable_sort;
-- 
2.16.2.windows.1

