diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 781a736..6b19e7e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -81,7 +81,7 @@ static void show_agg_keys(AggState *astate, List *ancestors,
 static void show_group_keys(GroupState *gstate, List *ancestors,
 				ExplainState *es);
 static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
-					 int nkeys, AttrNumber *keycols,
+					 int nkeys, int nPresortedKeys, AttrNumber *keycols,
 					 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
@@ -940,7 +940,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			pname = sname = "Materialize";
 			break;
 		case T_Sort:
-			pname = sname = "Sort";
+			if (((Sort *) plan)->skipCols > 0)
+				pname = sname = "Partial sort";
+			else
+				pname = sname = "Sort";
 			break;
 		case T_Group:
 			pname = sname = "Group";
@@ -1751,7 +1754,7 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es)
 	Sort	   *plan = (Sort *) sortstate->ss.ps.plan;
 
 	show_sort_group_keys((PlanState *) sortstate, "Sort Key",
-						 plan->numCols, plan->sortColIdx,
+						 plan->numCols, plan->skipCols, plan->sortColIdx,
 						 ancestors, es);
 }
 
@@ -1765,7 +1768,7 @@ show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
 	MergeAppend *plan = (MergeAppend *) mstate->ps.plan;
 
 	show_sort_group_keys((PlanState *) mstate, "Sort Key",
-						 plan->numCols, plan->sortColIdx,
+						 plan->numCols, 0, plan->sortColIdx,
 						 ancestors, es);
 }
 
@@ -1783,7 +1786,7 @@ show_agg_keys(AggState *astate, List *ancestors,
 		/* The key columns refer to the tlist of the child plan */
 		ancestors = lcons(astate, ancestors);
 		show_sort_group_keys(outerPlanState(astate), "Group Key",
-							 plan->numCols, plan->grpColIdx,
+							 plan->numCols, 0, plan->grpColIdx,
 							 ancestors, es);
 		ancestors = list_delete_first(ancestors);
 	}
@@ -1801,7 +1804,7 @@ show_group_keys(GroupState *gstate, List *ancestors,
 	/* The key columns refer to the tlist of the child plan */
 	ancestors = lcons(gstate, ancestors);
 	show_sort_group_keys(outerPlanState(gstate), "Group Key",
-						 plan->numCols, plan->grpColIdx,
+						 plan->numCols, 0, plan->grpColIdx,
 						 ancestors, es);
 	ancestors = list_delete_first(ancestors);
 }
@@ -1811,13 +1814,14 @@ show_group_keys(GroupState *gstate, List *ancestors,
  * as arrays of targetlist indexes
  */
 static void
-show_sort_group_keys(PlanState *planstate, const char *qlabel,
-					 int nkeys, AttrNumber *keycols,
+show_sort_group_keys(PlanState *planstate,  const char *qlabel,
+					 int nkeys, int nPresortedKeys, AttrNumber *keycols,
 					 List *ancestors, ExplainState *es)
 {
 	Plan	   *plan = planstate->plan;
 	List	   *context;
-	List	   *result = NIL;
+	List	   *resultSort = NIL;
+	List	   *resultPresorted = NIL;
 	bool		useprefix;
 	int			keyno;
 	char	   *exprstr;
@@ -1844,10 +1848,15 @@ show_sort_group_keys(PlanState *planstate, const char *qlabel,
 		/* Deparse the expression, showing any top-level cast */
 		exprstr = deparse_expression((Node *) target->expr, context,
 									 useprefix, true);
-		result = lappend(result, exprstr);
+
+		if (keyno < nPresortedKeys)
+			resultPresorted = lappend(resultPresorted, exprstr);
+		resultSort = lappend(resultSort, exprstr);
 	}
 
-	ExplainPropertyList(qlabel, result, es);
+	ExplainPropertyList(qlabel, resultSort, es);
+	if (nPresortedKeys > 0)
+		ExplainPropertyList("Presorted Key", resultPresorted, es);
 }
 
 /*
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 640964c..a8e69d2 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -379,7 +379,7 @@ ExecRestrPos(PlanState *node)
  * and valuesscan support is actually useless code at present.)
  */
 bool
-ExecSupportsMarkRestore(NodeTag plantype)
+ExecSupportsMarkRestore(NodeTag plantype, Plan *node)
 {
 	switch (plantype)
 	{
@@ -389,9 +389,15 @@ ExecSupportsMarkRestore(NodeTag plantype)
 		case T_TidScan:
 		case T_ValuesScan:
 		case T_Material:
-		case T_Sort:
 			return true;
 
+		case T_Sort:
+			/* With skipCols sort node holds only last bucket */
+			if (node && ((Sort *)node)->skipCols == 0)
+				return true;
+			else
+				return false;
+
 		case T_Result:
 
 			/*
@@ -466,10 +472,16 @@ ExecSupportsBackwardScan(Plan *node)
 				TargetListSupportsBackwardScan(node->targetlist);
 
 		case T_Material:
-		case T_Sort:
 			/* these don't evaluate tlist */
 			return true;
 
+		case T_Sort:
+			/* With skipCols sort node holds only last bucket */
+			if (((Sort *)node)->skipCols == 0)
+				return true;
+			else
+				return false;
+
 		case T_LockRows:
 		case T_Limit:
 			/* these don't evaluate tlist */
@@ -535,7 +547,7 @@ IndexSupportsBackwardScan(Oid indexid)
  * very low per-tuple cost.
  */
 bool
-ExecMaterializesOutput(NodeTag plantype)
+ExecMaterializesOutput(NodeTag plantype, Plan *node)
 {
 	switch (plantype)
 	{
@@ -543,9 +555,15 @@ ExecMaterializesOutput(NodeTag plantype)
 		case T_FunctionScan:
 		case T_CteScan:
 		case T_WorkTableScan:
-		case T_Sort:
 			return true;
 
+		case T_Sort:
+			/* With skipCols sort node holds only last bucket */
+			if (node && ((Sort *)node)->skipCols == 0)
+				return true;
+			else
+				return false;
+
 		default:
 			break;
 	}
diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c
index 47ed068..c51a144 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -126,19 +126,11 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
 	 * initialize sort-key information
 	 */
 	mergestate->ms_nkeys = node->numCols;
-	mergestate->ms_sortkeys = palloc0(sizeof(SortSupportData) * node->numCols);
-
-	for (i = 0; i < node->numCols; i++)
-	{
-		SortSupport sortKey = mergestate->ms_sortkeys + i;
-
-		sortKey->ssup_cxt = CurrentMemoryContext;
-		sortKey->ssup_collation = node->collations[i];
-		sortKey->ssup_nulls_first = node->nullsFirst[i];
-		sortKey->ssup_attno = node->sortColIdx[i];
-
-		PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
-	}
+	mergestate->ms_sortkeys = MakeSortSupportKeys(mergestate->ms_nkeys,
+												  node->sortColIdx,
+												  node->sortOperators,
+												  node->collations,
+												  node->nullsFirst);
 
 	/*
 	 * initialize to show we have not run the subplans yet
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b88571b..f38190d 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -15,11 +15,37 @@
 
 #include "postgres.h"
 
+#include "access/htup_details.h"
 #include "executor/execdebug.h"
 #include "executor/nodeSort.h"
 #include "miscadmin.h"
 #include "utils/tuplesort.h"
 
+/*
+ * Check if first "skipCols" sort values are equal.
+ */
+static bool
+cmpSortSkipCols(SortState *node, TupleDesc tupDesc, HeapTuple a, TupleTableSlot *b)
+{
+	int n = ((Sort *)node->ss.ps.plan)->skipCols, i;
+
+	for (i = 0; i < n; i++)
+	{
+		Datum datumA, datumB;
+		bool isnullA, isnullB;
+		AttrNumber attno = node->skipKeys[i].ssup_attno;
+
+		datumA = heap_getattr(a, attno, tupDesc, &isnullA);
+		datumB = slot_getattr(b, attno, &isnullB);
+
+		if (ApplySortComparator(datumA, isnullA,
+								datumB, isnullB,
+								&node->skipKeys[i]))
+			return false;
+	}
+	return true;
+}
+
 
 /* ----------------------------------------------------------------
  *		ExecSort
@@ -42,6 +68,11 @@ ExecSort(SortState *node)
 	ScanDirection dir;
 	Tuplesortstate *tuplesortstate;
 	TupleTableSlot *slot;
+	Sort	   *plannode = (Sort *) node->ss.ps.plan;
+	PlanState  *outerNode;
+	TupleDesc	tupDesc;
+	int			skipCols = plannode->skipCols;
+	int64		nTuples = 0;
 
 	/*
 	 * get state info from node
@@ -54,79 +85,148 @@ ExecSort(SortState *node)
 	tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
 
 	/*
+	 * Return next tuple from sorted set if any.
+	 */
+	if (node->sort_Done)
+	{
+		slot = node->ss.ps.ps_ResultTupleSlot;
+		if (tuplesort_gettupleslot(tuplesortstate,
+									  ScanDirectionIsForward(dir),
+									  slot) || node->finished)
+			return slot;
+	}
+
+	/*
 	 * If first time through, read all tuples from outer plan and pass them to
 	 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
 	 */
 
-	if (!node->sort_Done)
-	{
-		Sort	   *plannode = (Sort *) node->ss.ps.plan;
-		PlanState  *outerNode;
-		TupleDesc	tupDesc;
-
-		SO1_printf("ExecSort: %s\n",
-				   "sorting subplan");
+	SO1_printf("ExecSort: %s\n",
+			   "sorting subplan");
 
-		/*
-		 * Want to scan subplan in the forward direction while creating the
-		 * sorted data.
-		 */
-		estate->es_direction = ForwardScanDirection;
+	/*
+	 * Want to scan subplan in the forward direction while creating the
+	 * sorted data.
+	 */
+	estate->es_direction = ForwardScanDirection;
 
-		/*
-		 * Initialize tuplesort module.
-		 */
-		SO1_printf("ExecSort: %s\n",
-				   "calling tuplesort_begin");
+	/*
+	 * Initialize tuplesort module.
+	 */
+	SO1_printf("ExecSort: %s\n",
+			   "calling tuplesort_begin");
 
-		outerNode = outerPlanState(node);
-		tupDesc = ExecGetResultType(outerNode);
+	outerNode = outerPlanState(node);
+	tupDesc = ExecGetResultType(outerNode);
 
+	if (node->tuplesortstate != NULL)
+		tuplesort_reset((Tuplesortstate *) node->tuplesortstate);
+	else
+	{
+		/* Support structures for cmpSortSkipCols - already sorted columns */
+		if (skipCols)
+			node->skipKeys = MakeSortSupportKeys(skipCols,
+												 plannode->sortColIdx,
+												 plannode->sortOperators,
+												 plannode->collations,
+												 plannode->nullsFirst);
+
+		/* Only pass on remaining columns that are unsorted */
 		tuplesortstate = tuplesort_begin_heap(tupDesc,
-											  plannode->numCols,
-											  plannode->sortColIdx,
-											  plannode->sortOperators,
-											  plannode->collations,
-											  plannode->nullsFirst,
+											  plannode->numCols - skipCols,
+											  &(plannode->sortColIdx[skipCols]),
+											  &(plannode->sortOperators[skipCols]),
+											  &(plannode->collations[skipCols]),
+											  &(plannode->nullsFirst[skipCols]),
 											  work_mem,
 											  node->randomAccess);
-		if (node->bounded)
-			tuplesort_set_bound(tuplesortstate, node->bound);
 		node->tuplesortstate = (void *) tuplesortstate;
+	}
 
-		/*
-		 * Scan the subplan and feed all the tuples to tuplesort.
-		 */
+	if (node->bounded)
+		tuplesort_set_bound(tuplesortstate, node->bound - node->bound_Done);
 
-		for (;;)
-		{
-			slot = ExecProcNode(outerNode);
+	/*
+	 * Put next group of tuples where skipCols" sort values are equal to
+	 * tuplesort.
+	 */
+	for (;;)
+	{
+		slot = ExecProcNode(outerNode);
 
+		if (skipCols == 0)
+		{
 			if (TupIsNull(slot))
+			{
+				node->finished = true;
 				break;
-
+			}
 			tuplesort_puttupleslot(tuplesortstate, slot);
+			nTuples++;
 		}
+		else if (node->prev)
+		{
+			ExecStoreTuple(node->prev, node->ss.ps.ps_ResultTupleSlot, InvalidBuffer, false);
+			tuplesort_puttupleslot(tuplesortstate, node->ss.ps.ps_ResultTupleSlot);
+			nTuples++;
 
-		/*
-		 * Complete the sort.
-		 */
-		tuplesort_performsort(tuplesortstate);
+			if (TupIsNull(slot))
+			{
+				node->finished = true;
+				break;
+			}
+			else
+			{
+				bool cmp;
+				cmp = cmpSortSkipCols(node, tupDesc, node->prev, slot);
+				node->prev = ExecCopySlotTuple(slot);
+				if (!cmp)
+					break;
+			}
+		}
+		else
+		{
+			if (TupIsNull(slot))
+			{
+				node->finished = true;
+				break;
+			}
+			else
+			{
+				node->prev = ExecCopySlotTuple(slot);
+			}
+		}
+	}
 
-		/*
-		 * restore to user specified direction
-		 */
-		estate->es_direction = dir;
+	/*
+	 * Complete the sort.
+	 */
+	tuplesort_performsort(tuplesortstate);
 
-		/*
-		 * finally set the sorted flag to true
-		 */
-		node->sort_Done = true;
-		node->bounded_Done = node->bounded;
-		node->bound_Done = node->bound;
-		SO1_printf("ExecSort: %s\n", "sorting done");
+	/*
+	 * restore to user specified direction
+	 */
+	estate->es_direction = dir;
+
+	/*
+	 * finally set the sorted flag to true
+	 */
+	node->sort_Done = true;
+	node->bounded_Done = node->bounded;
+
+	/*
+	 * Adjust bound_Done with number of tuples we've actually sorted.
+	 */
+	if (node->bounded)
+	{
+		if (node->finished)
+			node->bound_Done = node->bound;
+		else
+			node->bound_Done = Min(node->bound, node->bound_Done + nTuples);
 	}
 
+	SO1_printf("ExecSort: %s\n", "sorting done");
+
 	SO1_printf("ExecSort: %s\n",
 			   "retrieving tuple from tuplesort");
 
@@ -157,6 +257,15 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
 			   "initializing sort node");
 
 	/*
+	 * skipCols can't be used with either EXEC_FLAG_REWIND, EXEC_FLAG_BACKWARD
+	 * or EXEC_FLAG_MARK, because we hold only current bucket in
+	 * tuplesortstate.
+	 */
+	Assert(node->skipCols == 0 || (eflags & (EXEC_FLAG_REWIND |
+											 EXEC_FLAG_BACKWARD |
+											 EXEC_FLAG_MARK)) == 0);
+
+	/*
 	 * create state structure
 	 */
 	sortstate = makeNode(SortState);
@@ -174,7 +283,10 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
 
 	sortstate->bounded = false;
 	sortstate->sort_Done = false;
+	sortstate->finished = false;
 	sortstate->tuplesortstate = NULL;
+	sortstate->prev = NULL;
+	sortstate->bound_Done = 0;
 
 	/*
 	 * Miscellaneous initialization
@@ -316,6 +428,7 @@ ExecReScanSort(SortState *node)
 		node->sort_Done = false;
 		tuplesort_end((Tuplesortstate *) node->tuplesortstate);
 		node->tuplesortstate = NULL;
+		node->bound_Done = 0;
 
 		/*
 		 * if chgParam of subnode is not null then plan will be re-scanned by
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 3088578..43f7089 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -735,6 +735,7 @@ _copySort(const Sort *from)
 	CopyPlanFields((const Plan *) from, (Plan *) newnode);
 
 	COPY_SCALAR_FIELD(numCols);
+	COPY_SCALAR_FIELD(skipCols);
 	COPY_POINTER_FIELD(sortColIdx, from->numCols * sizeof(AttrNumber));
 	COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid));
 	COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid));
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0cdb790..314d3ab 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1235,15 +1235,22 @@ cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm)
  */
 void
 cost_sort(Path *path, PlannerInfo *root,
-		  List *pathkeys, Cost input_cost, double tuples, int width,
-		  Cost comparison_cost, int sort_mem,
+		  List *pathkeys, int presorted_keys,
+		  Cost input_startup_cost, Cost input_total_cost,
+		  double tuples, int width, Cost comparison_cost, int sort_mem,
 		  double limit_tuples)
 {
-	Cost		startup_cost = input_cost;
-	Cost		run_cost = 0;
+	Cost		startup_cost = input_startup_cost;
+	Cost		run_cost = 0,
+				rest_cost,
+				group_cost,
+				input_run_cost = input_total_cost - input_startup_cost;
 	double		input_bytes = relation_byte_size(tuples, width);
 	double		output_bytes;
 	double		output_tuples;
+	double		num_groups,
+				group_input_bytes,
+				group_tuples;
 	long		sort_mem_bytes = sort_mem * 1024L;
 
 	if (!enable_sort)
@@ -1273,13 +1280,47 @@ cost_sort(Path *path, PlannerInfo *root,
 		output_bytes = input_bytes;
 	}
 
-	if (output_bytes > sort_mem_bytes)
+	/*
+	 * Estimate number of groups which dataset is divided by presorted keys.
+	 */
+	if (presorted_keys > 0)
+	{
+		List *groupExprs = NIL;
+		ListCell *l;
+		int i = 0;
+
+		foreach(l, pathkeys)
+		{
+			PathKey *key = (PathKey *)lfirst(l);
+			EquivalenceMember *member = (EquivalenceMember *)
+								lfirst(list_head(key->pk_eclass->ec_members));
+
+			groupExprs = lappend(groupExprs, member->em_expr);
+
+			i++;
+			if (i >= presorted_keys)
+				break;
+		}
+
+		num_groups = estimate_num_groups(root, groupExprs, tuples);
+	}
+	else
+	{
+		num_groups = 1.0;
+	}
+
+	/*
+	 * Estimate average cost of one group sorting
+	 */
+	group_input_bytes = input_bytes / num_groups;
+	group_tuples = tuples / num_groups;
+	if (output_bytes > sort_mem_bytes && group_input_bytes > sort_mem_bytes)
 	{
 		/*
 		 * We'll have to use a disk-based sort of all the tuples
 		 */
-		double		npages = ceil(input_bytes / BLCKSZ);
-		double		nruns = (input_bytes / sort_mem_bytes) * 0.5;
+		double		npages = ceil(group_input_bytes / BLCKSZ);
+		double		nruns = (group_input_bytes / sort_mem_bytes) * 0.5;
 		double		mergeorder = tuplesort_merge_order(sort_mem_bytes);
 		double		log_runs;
 		double		npageaccesses;
@@ -1289,7 +1330,7 @@ cost_sort(Path *path, PlannerInfo *root,
 		 *
 		 * Assume about N log2 N comparisons
 		 */
-		startup_cost += comparison_cost * tuples * LOG2(tuples);
+		group_cost = comparison_cost * group_tuples * LOG2(group_tuples);
 
 		/* Disk costs */
 
@@ -1300,10 +1341,10 @@ cost_sort(Path *path, PlannerInfo *root,
 			log_runs = 1.0;
 		npageaccesses = 2.0 * npages * log_runs;
 		/* Assume 3/4ths of accesses are sequential, 1/4th are not */
-		startup_cost += npageaccesses *
+		group_cost += npageaccesses *
 			(seq_page_cost * 0.75 + random_page_cost * 0.25);
 	}
-	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
+	else if (group_tuples > 2 * output_tuples || group_input_bytes > sort_mem_bytes)
 	{
 		/*
 		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
@@ -1311,15 +1352,26 @@ cost_sort(Path *path, PlannerInfo *root,
 		 * factor is a bit higher than for quicksort.  Tweak it so that the
 		 * cost curve is continuous at the crossover point.
 		 */
-		startup_cost += comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		group_cost = comparison_cost * group_tuples * LOG2(2.0 * output_tuples);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		startup_cost += comparison_cost * tuples * LOG2(tuples);
+		group_cost = comparison_cost * group_tuples * LOG2(group_tuples);
 	}
 
 	/*
+	 * We've to sort first group to start output from node. Sorting rest of
+	 * groups are required to return all the tuples.
+	 */
+	startup_cost += group_cost;
+	rest_cost = (num_groups * (output_tuples / tuples) - 1.0) * group_cost;
+	if (rest_cost > 0.0)
+		run_cost += rest_cost;
+	startup_cost += input_run_cost / num_groups;
+	run_cost += input_run_cost * ((num_groups - 1.0) / num_groups);
+
+	/*
 	 * Also charge a small amount (arbitrarily set equal to operator cost) per
 	 * extracted tuple.  We don't charge cpu_tuple_cost because a Sort node
 	 * doesn't do qual-checking or projection, so it has less overhead than
@@ -2029,6 +2081,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 		cost_sort(&sort_path,
 				  root,
 				  outersortkeys,
+				  pathkeys_common(outer_path->pathkeys, outersortkeys),
+				  outer_path->startup_cost,
 				  outer_path->total_cost,
 				  outer_path_rows,
 				  outer_path->parent->width,
@@ -2055,6 +2109,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 		cost_sort(&sort_path,
 				  root,
 				  innersortkeys,
+				  pathkeys_common(inner_path->pathkeys, innersortkeys),
+				  inner_path->startup_cost,
 				  inner_path->total_cost,
 				  inner_path_rows,
 				  inner_path->parent->width,
@@ -2266,7 +2322,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
 	 * it off does not entitle us to deliver an invalid plan.
 	 */
 	else if (innersortkeys == NIL &&
-			 !ExecSupportsMarkRestore(inner_path->pathtype))
+			 !ExecSupportsMarkRestore(inner_path->pathtype, NULL))
 		path->materialize_inner = true;
 
 	/*
@@ -2780,7 +2836,7 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
 		 * every time.
 		 */
 		if (subplan->parParam == NIL &&
-			ExecMaterializesOutput(nodeTag(plan)))
+			ExecMaterializesOutput(nodeTag(plan), plan))
 			sp_cost.startup += plan->startup_cost;
 		else
 			sp_cost.per_tuple += plan->startup_cost;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index be54f3d..7bbad4f 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -820,7 +820,7 @@ match_unsorted_outer(PlannerInfo *root,
 		 * output anyway.
 		 */
 		if (enable_material && inner_cheapest_total != NULL &&
-			!ExecMaterializesOutput(inner_cheapest_total->pathtype))
+			!ExecMaterializesOutput(inner_cheapest_total->pathtype, NULL))
 			matpath = (Path *)
 				create_material_path(innerrel, inner_cheapest_total);
 	}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5d953df..0a9d6f7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -21,11 +21,13 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/clauses.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static PathKey *make_canonical_pathkey(PlannerInfo *root,
@@ -312,6 +314,32 @@ compare_pathkeys(List *keys1, List *keys2)
 }
 
 /*
+ * pathkeys_common
+ *    Returns length of longest common prefix of keys1 and keys2.
+ */
+int
+pathkeys_common(List *keys1, List *keys2)
+{
+	int n;
+	ListCell   *key1,
+			   *key2;
+	n = 0;
+
+	forboth(key1, keys1, key2, keys2)
+	{
+		PathKey    *pathkey1 = (PathKey *) lfirst(key1);
+		PathKey    *pathkey2 = (PathKey *) lfirst(key2);
+
+		if (pathkey1 != pathkey2)
+			return n;
+		n++;
+	}
+
+	return n;
+}
+
+
+/*
  * pathkeys_contained_in
  *	  Common special case of compare_pathkeys: we just want to know
  *	  if keys2 are at least as well sorted as keys1.
@@ -369,9 +397,36 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 }
 
 /*
+ * Compare cost of two paths assuming different fractions of tuples be returned
+ * from each paths.
+ */
+static int
+compare_bifractional_path_costs(Path *path1, Path *path2,
+							  double fraction1, double fraction2)
+{
+	Cost		cost1,
+				cost2;
+
+	if (fraction1 <= 0.0 || fraction1 >= 1.0 ||
+			fraction2 <= 0.0 || fraction2 >= 1.0)
+		return compare_path_costs(path1, path2, TOTAL_COST);
+	cost1 = path1->startup_cost +
+		fraction1 * (path1->total_cost - path1->startup_cost);
+	cost2 = path2->startup_cost +
+		fraction2 * (path2->total_cost - path2->startup_cost);
+	if (cost1 < cost2)
+		return -1;
+	if (cost1 > cost2)
+		return +1;
+	return 0;
+}
+
+/*
  * get_cheapest_fractional_path_for_pathkeys
  *	  Find the cheapest path (for retrieving a specified fraction of all
- *	  the tuples) that satisfies the given pathkeys and parameterization.
+ *	  the tuples) that satisfies given parameterization and at least partially
+ *	  satisfies the given pathkeys. Compares paths according to different
+ *	  fraction of tuples be extracted to start with partial sort.
  *	  Return NULL if no such path.
  *
  * See compare_fractional_path_costs() for the interpretation of the fraction
@@ -386,26 +441,84 @@ Path *
 get_cheapest_fractional_path_for_pathkeys(List *paths,
 										  List *pathkeys,
 										  Relids required_outer,
-										  double fraction)
+										  double fraction,
+										  PlannerInfo *root,
+										  double tuples)
 {
 	Path	   *matched_path = NULL;
+	int			matched_n_common_pathkeys = 0,
+				costs_cmp, n_common_pathkeys,
+				n_pathkeys = list_length(pathkeys);
 	ListCell   *l;
+	List	   *groupExprs = NIL;
+	double	   *num_groups, matched_fraction;
+	int			i;
+
+	/*
+	 * Get number of groups for each possible partial sort.
+	 */
+	i = 0;
+	num_groups = (double *)palloc(sizeof(double) * list_length(pathkeys));
+	foreach(l, pathkeys)
+	{
+		PathKey *key = (PathKey *)lfirst(l);
+		EquivalenceMember *member = (EquivalenceMember *)
+							lfirst(list_head(key->pk_eclass->ec_members));
+
+		groupExprs = lappend(groupExprs, member->em_expr);
+
+		num_groups[i] = estimate_num_groups(root, groupExprs, tuples);
+		i++;
+	}
+
 
 	foreach(l, paths)
 	{
 		Path	   *path = (Path *) lfirst(l);
+		double		current_fraction;
+
+		n_common_pathkeys = pathkeys_common(pathkeys, path->pathkeys);
+		if (n_common_pathkeys < matched_n_common_pathkeys ||
+				n_common_pathkeys == 0)
+			continue;
 
 		/*
-		 * Since cost comparison is a lot cheaper than pathkey comparison, do
-		 * that first.  (XXX is that still true?)
+		 * Estimate fraction of outer tuples be fetched to start returning
+		 * tuples from partial sort.
 		 */
-		if (matched_path != NULL &&
-			compare_fractional_path_costs(matched_path, path, fraction) <= 0)
-			continue;
+		current_fraction = fraction;
+		if (n_common_pathkeys < n_pathkeys)
+		{
+			current_fraction += 1.0 / num_groups[n_common_pathkeys - 1];
+			current_fraction = Max(current_fraction, 1.0);
+		}
 
-		if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+		/*
+		 * Do cost comparison.
+		 */
+		if (matched_path != NULL)
+		{
+			costs_cmp = compare_bifractional_path_costs(matched_path, path,
+					matched_fraction, current_fraction);
+		}
+		else
+		{
+			costs_cmp = 1;
+		}
+
+		/*
+		 * Always prefer best number of common pathkeys.
+		 */
+		if ((
+				n_common_pathkeys > matched_n_common_pathkeys
+				||	(n_common_pathkeys == matched_n_common_pathkeys
+					 && costs_cmp > 0)) &&
 			bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+		{
 			matched_path = path;
+			matched_n_common_pathkeys = n_common_pathkeys;
+			matched_fraction = current_fraction;
+		}
 	}
 	return matched_path;
 }
@@ -1450,23 +1563,26 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
  *		Count the number of pathkeys that are useful for meeting the
  *		query's requested output ordering.
  *
- * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
- * no good to order by just the first key(s) of the requested ordering.
- * So the result is always either 0 or list_length(root->query_pathkeys).
+ * Returns number of pathkeys that maches given argument. Others can be
+ * satisfied by partial sort.
  */
 static int
 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 {
+	int n;
+
 	if (root->query_pathkeys == NIL)
 		return 0;				/* no special ordering requested */
 
 	if (pathkeys == NIL)
 		return 0;				/* unordered path */
 
-	if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
+	n = pathkeys_common(root->query_pathkeys, pathkeys);
+
+	if (n != 0)
 	{
 		/* It's useful ... or at least the first N keys are */
-		return list_length(root->query_pathkeys);
+		return n;
 	}
 
 	return 0;					/* path ordering not useful */
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4b641a2..129ea40 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -149,6 +149,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 			   Plan *lefttree, Plan *righttree,
 			   JoinType jointype);
 static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
+		  List *pathkeys, int skipCols,
 		  AttrNumber *sortColIdx, Oid *sortOperators,
 		  Oid *collations, bool *nullsFirst,
 		  double limit_tuples);
@@ -774,6 +775,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
 		Oid		   *sortOperators;
 		Oid		   *collations;
 		bool	   *nullsFirst;
+		int			n_common_pathkeys;
 
 		/* Build the child plan */
 		subplan = create_plan_recurse(root, subpath);
@@ -807,8 +809,10 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
 					  numsortkeys * sizeof(bool)) == 0);
 
 		/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-		if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		n_common_pathkeys = pathkeys_common(pathkeys, subpath->pathkeys);
+		if (n_common_pathkeys < list_length(pathkeys))
 			subplan = (Plan *) make_sort(root, subplan, numsortkeys,
+										 pathkeys, n_common_pathkeys,
 										 sortColIdx, sortOperators,
 										 collations, nullsFirst,
 										 best_path->limit_tuples);
@@ -2181,9 +2185,11 @@ create_mergejoin_plan(PlannerInfo *root,
 		disuse_physical_tlist(root, outer_plan, best_path->jpath.outerjoinpath);
 		outer_plan = (Plan *)
 			make_sort_from_pathkeys(root,
-									outer_plan,
-									best_path->outersortkeys,
-									-1.0);
+								outer_plan,
+								best_path->outersortkeys,
+								-1.0,
+								pathkeys_common(best_path->outersortkeys,
+									best_path->jpath.outerjoinpath->pathkeys));
 		outerpathkeys = best_path->outersortkeys;
 	}
 	else
@@ -2194,9 +2200,11 @@ create_mergejoin_plan(PlannerInfo *root,
 		disuse_physical_tlist(root, inner_plan, best_path->jpath.innerjoinpath);
 		inner_plan = (Plan *)
 			make_sort_from_pathkeys(root,
-									inner_plan,
-									best_path->innersortkeys,
-									-1.0);
+								inner_plan,
+								best_path->innersortkeys,
+								-1.0,
+								pathkeys_common(best_path->innersortkeys,
+									best_path->jpath.innerjoinpath->pathkeys));
 		innerpathkeys = best_path->innersortkeys;
 	}
 	else
@@ -3736,6 +3744,7 @@ make_mergejoin(List *tlist,
  */
 static Sort *
 make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
+          List *pathkeys, int skipCols,
 		  AttrNumber *sortColIdx, Oid *sortOperators,
 		  Oid *collations, bool *nullsFirst,
 		  double limit_tuples)
@@ -3745,7 +3754,8 @@ make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
 	Path		sort_path;		/* dummy for result of cost_sort */
 
 	copy_plan_costsize(plan, lefttree); /* only care about copying size */
-	cost_sort(&sort_path, root, NIL,
+	cost_sort(&sort_path, root, pathkeys, skipCols,
+			  lefttree->startup_cost,
 			  lefttree->total_cost,
 			  lefttree->plan_rows,
 			  lefttree->plan_width,
@@ -3759,6 +3769,7 @@ make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
 	plan->lefttree = lefttree;
 	plan->righttree = NULL;
 	node->numCols = numCols;
+	node->skipCols = skipCols;
 	node->sortColIdx = sortColIdx;
 	node->sortOperators = sortOperators;
 	node->collations = collations;
@@ -4087,7 +4098,7 @@ find_ec_member_for_tle(EquivalenceClass *ec,
  */
 Sort *
 make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
-						double limit_tuples)
+						double limit_tuples, int skipCols)
 {
 	int			numsortkeys;
 	AttrNumber *sortColIdx;
@@ -4107,7 +4118,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
 										  &nullsFirst);
 
 	/* Now build the Sort node */
-	return make_sort(root, lefttree, numsortkeys,
+	return make_sort(root, lefttree, numsortkeys, pathkeys, skipCols,
 					 sortColIdx, sortOperators, collations,
 					 nullsFirst, limit_tuples);
 }
@@ -4150,7 +4161,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
 		numsortkeys++;
 	}
 
-	return make_sort(root, lefttree, numsortkeys,
+	return make_sort(root, lefttree, numsortkeys, NIL, 0,
 					 sortColIdx, sortOperators, collations,
 					 nullsFirst, -1.0);
 }
@@ -4172,7 +4183,8 @@ Sort *
 make_sort_from_groupcols(PlannerInfo *root,
 						 List *groupcls,
 						 AttrNumber *grpColIdx,
-						 Plan *lefttree)
+						 Plan *lefttree,
+						 List *pathkeys, int skipCols)
 {
 	List	   *sub_tlist = lefttree->targetlist;
 	ListCell   *l;
@@ -4205,7 +4217,7 @@ make_sort_from_groupcols(PlannerInfo *root,
 		numsortkeys++;
 	}
 
-	return make_sort(root, lefttree, numsortkeys,
+	return make_sort(root, lefttree, numsortkeys, pathkeys, skipCols,
 					 sortColIdx, sortOperators, collations,
 					 nullsFirst, -1.0);
 }
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 94ca92d..7eea24e 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -494,7 +494,9 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 		get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
 												  subroot->query_pathkeys,
 												  NULL,
-												  path_fraction);
+												  path_fraction,
+												  subroot,
+												  final_rel->rows);
 	if (!sorted_path)
 		return false;
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e1480cd..58abc43 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1394,7 +1394,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
 													  root->query_pathkeys,
 													  NULL,
-													  tuple_fraction);
+													  tuple_fraction,
+													  root,
+													  path_rows);
 
 		/* Don't consider same path in both guises; just wastes effort */
 		if (sorted_path == cheapest_path)
@@ -1410,10 +1412,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		if (sorted_path)
 		{
 			Path		sort_path;		/* dummy for result of cost_sort */
+			Path		partial_sort_path;	/* dummy for result of cost_sort */
+			int			n_common_pathkeys;
+
+			n_common_pathkeys = pathkeys_common(root->query_pathkeys,
+												cheapest_path->pathkeys);
 
 			if (root->query_pathkeys == NIL ||
-				pathkeys_contained_in(root->query_pathkeys,
-									  cheapest_path->pathkeys))
+					n_common_pathkeys == list_length(root->query_pathkeys))
 			{
 				/* No sort needed for cheapest path */
 				sort_path.startup_cost = cheapest_path->startup_cost;
@@ -1423,12 +1429,35 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			{
 				/* Figure cost for sorting */
 				cost_sort(&sort_path, root, root->query_pathkeys,
+						  n_common_pathkeys,
+						  cheapest_path->startup_cost,
 						  cheapest_path->total_cost,
 						  path_rows, path_width,
 						  0.0, work_mem, root->limit_tuples);
 			}
 
-			if (compare_fractional_path_costs(sorted_path, &sort_path,
+			n_common_pathkeys = pathkeys_common(root->query_pathkeys,
+												sorted_path->pathkeys);
+
+			if (root->query_pathkeys == NIL ||
+					n_common_pathkeys == list_length(root->query_pathkeys))
+			{
+				/* No sort needed for cheapest path */
+				partial_sort_path.startup_cost = sorted_path->startup_cost;
+				partial_sort_path.total_cost = sorted_path->total_cost;
+			}
+			else
+			{
+				/* Figure cost for sorting */
+				cost_sort(&partial_sort_path, root, root->query_pathkeys,
+						  n_common_pathkeys,
+						  sorted_path->startup_cost,
+						  sorted_path->total_cost,
+						  path_rows, path_width,
+						  0.0, work_mem, root->limit_tuples);
+			}
+
+			if (compare_fractional_path_costs(&partial_sort_path, &sort_path,
 											  tuple_fraction) > 0)
 			{
 				/* Presorted path is a loser */
@@ -1509,13 +1538,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			 * results.
 			 */
 			bool		need_sort_for_grouping = false;
+			int			n_common_pathkeys_grouping;
 
 			result_plan = create_plan(root, best_path);
 			current_pathkeys = best_path->pathkeys;
 
 			/* Detect if we'll need an explicit sort for grouping */
+			n_common_pathkeys_grouping = pathkeys_common(root->group_pathkeys,
+														 current_pathkeys);
 			if (parse->groupClause && !use_hashed_grouping &&
-			  !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
+				n_common_pathkeys_grouping < list_length(root->group_pathkeys))
 			{
 				need_sort_for_grouping = true;
 
@@ -1609,7 +1641,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 							make_sort_from_groupcols(root,
 													 parse->groupClause,
 													 groupColIdx,
-													 result_plan);
+													 result_plan,
+													 root->group_pathkeys,
+													n_common_pathkeys_grouping);
 						current_pathkeys = root->group_pathkeys;
 					}
 					aggstrategy = AGG_SORTED;
@@ -1652,7 +1686,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 						make_sort_from_groupcols(root,
 												 parse->groupClause,
 												 groupColIdx,
-												 result_plan);
+												 result_plan,
+												 root->group_pathkeys,
+												 n_common_pathkeys_grouping);
 					current_pathkeys = root->group_pathkeys;
 				}
 
@@ -1769,13 +1805,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				if (window_pathkeys)
 				{
 					Sort	   *sort_plan;
+					int			n_common_pathkeys;
+
+					n_common_pathkeys = pathkeys_common(window_pathkeys,
+													    current_pathkeys);
 
 					sort_plan = make_sort_from_pathkeys(root,
 														result_plan,
 														window_pathkeys,
-														-1.0);
-					if (!pathkeys_contained_in(window_pathkeys,
-											   current_pathkeys))
+														-1.0,
+														n_common_pathkeys);
+					if (n_common_pathkeys < list_length(window_pathkeys))
 					{
 						/* we do indeed need to sort */
 						result_plan = (Plan *) sort_plan;
@@ -1921,19 +1961,21 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			{
 				if (list_length(root->distinct_pathkeys) >=
 					list_length(root->sort_pathkeys))
-					current_pathkeys = root->distinct_pathkeys;
+					needed_pathkeys = root->distinct_pathkeys;
 				else
 				{
-					current_pathkeys = root->sort_pathkeys;
+					needed_pathkeys = root->sort_pathkeys;
 					/* Assert checks that parser didn't mess up... */
 					Assert(pathkeys_contained_in(root->distinct_pathkeys,
-												 current_pathkeys));
+												 needed_pathkeys));
 				}
 
 				result_plan = (Plan *) make_sort_from_pathkeys(root,
 															   result_plan,
-															current_pathkeys,
-															   -1.0);
+															   needed_pathkeys,
+															   -1.0,
+							pathkeys_common(needed_pathkeys, current_pathkeys));
+				current_pathkeys = needed_pathkeys;
 			}
 
 			result_plan = (Plan *) make_unique(result_plan,
@@ -1949,12 +1991,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	 */
 	if (parse->sortClause)
 	{
-		if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+		int common = pathkeys_common(root->sort_pathkeys, current_pathkeys);
+		
+		if (common < list_length(root->sort_pathkeys))
 		{
 			result_plan = (Plan *) make_sort_from_pathkeys(root,
 														   result_plan,
 														 root->sort_pathkeys,
-														   limit_tuples);
+														   limit_tuples,
+														   common);
 			current_pathkeys = root->sort_pathkeys;
 		}
 	}
@@ -2698,6 +2743,7 @@ choose_hashed_grouping(PlannerInfo *root,
 	List	   *current_pathkeys;
 	Path		hashed_p;
 	Path		sorted_p;
+	int			n_common_pathkeys;
 
 	/*
 	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
@@ -2779,7 +2825,8 @@ choose_hashed_grouping(PlannerInfo *root,
 			 path_rows);
 	/* Result of hashed agg is always unsorted */
 	if (target_pathkeys)
-		cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
+		cost_sort(&hashed_p, root, target_pathkeys, 0,
+				  hashed_p.startup_cost, hashed_p.total_cost,
 				  dNumGroups, path_width,
 				  0.0, work_mem, limit_tuples);
 
@@ -2795,9 +2842,12 @@ choose_hashed_grouping(PlannerInfo *root,
 		sorted_p.total_cost = cheapest_path->total_cost;
 		current_pathkeys = cheapest_path->pathkeys;
 	}
-	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
+
+	n_common_pathkeys = pathkeys_common(root->group_pathkeys, current_pathkeys);
+	if (n_common_pathkeys < list_length(root->group_pathkeys))
 	{
-		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
+		cost_sort(&sorted_p, root, root->group_pathkeys,
+				  n_common_pathkeys, sorted_p.startup_cost, sorted_p.total_cost,
 				  path_rows, path_width,
 				  0.0, work_mem, -1.0);
 		current_pathkeys = root->group_pathkeys;
@@ -2812,10 +2862,12 @@ choose_hashed_grouping(PlannerInfo *root,
 		cost_group(&sorted_p, root, numGroupCols, dNumGroups,
 				   sorted_p.startup_cost, sorted_p.total_cost,
 				   path_rows);
+
 	/* The Agg or Group node will preserve ordering */
-	if (target_pathkeys &&
-		!pathkeys_contained_in(target_pathkeys, current_pathkeys))
-		cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
+	n_common_pathkeys = pathkeys_common(target_pathkeys, current_pathkeys);
+	if (target_pathkeys && n_common_pathkeys < list_length(target_pathkeys))
+		cost_sort(&sorted_p, root, target_pathkeys, n_common_pathkeys,
+				  sorted_p.startup_cost, sorted_p.total_cost,
 				  dNumGroups, path_width,
 				  0.0, work_mem, limit_tuples);
 
@@ -2868,6 +2920,7 @@ choose_hashed_distinct(PlannerInfo *root,
 	List	   *needed_pathkeys;
 	Path		hashed_p;
 	Path		sorted_p;
+	int			n_common_pathkeys;
 
 	/*
 	 * If we have a sortable DISTINCT ON clause, we always use sorting. This
@@ -2933,7 +2986,8 @@ choose_hashed_distinct(PlannerInfo *root,
 	 * need to charge for the final sort.
 	 */
 	if (parse->sortClause)
-		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
+		cost_sort(&hashed_p, root, root->sort_pathkeys, 0,
+				  hashed_p.startup_cost, hashed_p.total_cost,
 				  dNumDistinctRows, path_width,
 				  0.0, work_mem, limit_tuples);
 
@@ -2950,23 +3004,30 @@ choose_hashed_distinct(PlannerInfo *root,
 		needed_pathkeys = root->sort_pathkeys;
 	else
 		needed_pathkeys = root->distinct_pathkeys;
-	if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
+
+	n_common_pathkeys = pathkeys_common(needed_pathkeys, current_pathkeys);
+	if (n_common_pathkeys < list_length(needed_pathkeys))
 	{
 		if (list_length(root->distinct_pathkeys) >=
 			list_length(root->sort_pathkeys))
 			current_pathkeys = root->distinct_pathkeys;
 		else
 			current_pathkeys = root->sort_pathkeys;
-		cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
+		cost_sort(&sorted_p, root, current_pathkeys,
+				  n_common_pathkeys, sorted_p.startup_cost, sorted_p.total_cost,
 				  path_rows, path_width,
 				  0.0, work_mem, -1.0);
 	}
 	cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
 			   sorted_p.startup_cost, sorted_p.total_cost,
 			   path_rows);
+
+
+	n_common_pathkeys = pathkeys_common(root->sort_pathkeys, current_pathkeys);
 	if (parse->sortClause &&
-		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
-		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
+		n_common_pathkeys < list_length(root->sort_pathkeys))
+		cost_sort(&sorted_p, root, root->sort_pathkeys, n_common_pathkeys,
+				  sorted_p.startup_cost, sorted_p.total_cost,
 				  dNumDistinctRows, path_width,
 				  0.0, work_mem, limit_tuples);
 
@@ -3756,8 +3817,9 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	/* Estimate the cost of seq scan + sort */
 	seqScanPath = create_seqscan_path(root, rel, NULL);
-	cost_sort(&seqScanAndSortPath, root, NIL,
-			  seqScanPath->total_cost, rel->tuples, rel->width,
+	cost_sort(&seqScanAndSortPath, root, NIL, 0,
+			  seqScanPath->startup_cost, seqScanPath->total_cost,
+			  rel->tuples, rel->width,
 			  comparisonCost, maintenance_work_mem, -1.0);
 
 	/* Estimate the cost of index scan */
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3e7dc85..3f7fbd4 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -780,7 +780,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
 		 * unnecessarily, so we don't.
 		 */
 		else if (splan->parParam == NIL && enable_material &&
-				 !ExecMaterializesOutput(nodeTag(plan)))
+				 !ExecMaterializesOutput(nodeTag(plan), plan))
 			plan = materialize_finished_plan(plan);
 
 		result = (Node *) splan;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 0410fdd..0f5fee2 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -860,7 +860,8 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	sorted_p.startup_cost = input_plan->startup_cost;
 	sorted_p.total_cost = input_plan->total_cost;
 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
-	cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
+	cost_sort(&sorted_p, root, NIL, 0,
+			  sorted_p.startup_cost, sorted_p.total_cost,
 			  input_plan->plan_rows, input_plan->plan_width,
 			  0.0, work_mem, -1.0);
 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 319e8b2..48966df 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -970,10 +970,11 @@ create_merge_append_path(PlannerInfo *root,
 	foreach(l, subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
+		int			n_common_pathkeys = pathkeys_common(pathkeys, subpath->pathkeys);
 
 		pathnode->path.rows += subpath->rows;
 
-		if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (n_common_pathkeys == list_length(pathkeys))
 		{
 			/* Subpath is adequately ordered, we won't need to sort it */
 			input_startup_cost += subpath->startup_cost;
@@ -987,6 +988,8 @@ create_merge_append_path(PlannerInfo *root,
 			cost_sort(&sort_path,
 					  root,
 					  pathkeys,
+					  n_common_pathkeys,
+					  subpath->startup_cost,
 					  subpath->total_cost,
 					  subpath->parent->tuples,
 					  subpath->parent->width,
@@ -1346,7 +1349,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		/*
 		 * Estimate cost for sort+unique implementation
 		 */
-		cost_sort(&sort_path, root, NIL,
+		cost_sort(&sort_path, root, NIL, 0,
+				  subpath->startup_cost,
 				  subpath->total_cost,
 				  rel->rows,
 				  rel->width,
diff --git a/src/backend/utils/sort/sortsupport.c b/src/backend/utils/sort/sortsupport.c
index 2240fd0..de26b7c 100644
--- a/src/backend/utils/sort/sortsupport.c
+++ b/src/backend/utils/sort/sortsupport.c
@@ -86,6 +86,35 @@ PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup)
 }
 
 /*
+ * Build an array of SortSupportData structures from separated arrays.
+ */
+SortSupport
+MakeSortSupportKeys(int nkeys, AttrNumber *attNums,
+					Oid *sortOperators, Oid *sortCollations,
+					bool *nullsFirstFlags)
+{
+	SortSupport sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
+	int			i;
+
+	for (i = 0; i < nkeys; i++)
+	{
+		SortSupport sortKey = sortKeys + i;
+
+		AssertArg(attNums[i] != 0);
+		AssertArg(sortOperators[i] != 0);
+
+		sortKey->ssup_cxt = CurrentMemoryContext;
+		sortKey->ssup_collation = sortCollations[i];
+		sortKey->ssup_nulls_first = nullsFirstFlags[i];
+		sortKey->ssup_attno = attNums[i];
+
+		PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
+	}
+
+	return sortKeys;
+}
+
+/*
  * Fill in SortSupport given an ordering operator (btree "<" or ">" operator).
  *
  * Caller must previously have zeroed the SortSupportData structure and then
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 8e57505..6e28a40 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -604,7 +604,6 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 {
 	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
 	MemoryContext oldcontext;
-	int			i;
 
 	oldcontext = MemoryContextSwitchTo(state->sortcontext);
 
@@ -632,24 +631,11 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 	state->reversedirection = reversedirection_heap;
 
 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
-
-	/* Prepare SortSupport data for each column */
-	state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
-
-	for (i = 0; i < nkeys; i++)
-	{
-		SortSupport sortKey = state->sortKeys + i;
-
-		AssertArg(attNums[i] != 0);
-		AssertArg(sortOperators[i] != 0);
-
-		sortKey->ssup_cxt = CurrentMemoryContext;
-		sortKey->ssup_collation = sortCollations[i];
-		sortKey->ssup_nulls_first = nullsFirstFlags[i];
-		sortKey->ssup_attno = attNums[i];
-
-		PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
-	}
+	state->sortKeys = MakeSortSupportKeys(nkeys,
+										  attNums,
+										  sortOperators,
+										  sortCollations,
+										  nullsFirstFlags);
 
 	if (nkeys == 1)
 		state->onlyKey = state->sortKeys;
@@ -960,6 +946,26 @@ tuplesort_end(Tuplesortstate *state)
 	MemoryContextDelete(state->sortcontext);
 }
 
+void
+tuplesort_reset(Tuplesortstate *state)
+{
+	int i;
+
+	if (state->tapeset)
+		LogicalTapeSetClose(state->tapeset);
+
+	for (i = 0; i < state->memtupcount; i++)
+		free_sort_tuple(state, state->memtuples + i);
+
+	state->status = TSS_INITIAL;
+	state->memtupcount = 0;
+	state->boundUsed = false;
+	state->tapeset = NULL;
+	state->currentRun = 0;
+	state->result_tape = -1;
+	state->bounded = false;
+}
+
 /*
  * Grow the memtuples[] array, if possible within our memory constraint.  We
  * must not exceed INT_MAX tuples in memory or the caller-provided memory
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 239aff3..f0ce4b2 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -102,9 +102,9 @@ extern PGDLLIMPORT ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook;
 extern void ExecReScan(PlanState *node);
 extern void ExecMarkPos(PlanState *node);
 extern void ExecRestrPos(PlanState *node);
-extern bool ExecSupportsMarkRestore(NodeTag plantype);
+extern bool ExecSupportsMarkRestore(NodeTag plantype, Plan *node);
 extern bool ExecSupportsBackwardScan(Plan *node);
-extern bool ExecMaterializesOutput(NodeTag plantype);
+extern bool ExecMaterializesOutput(NodeTag plantype, Plan *node);
 
 /*
  * prototypes from functions in execCurrent.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b271f21..5d86cd5 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1671,8 +1671,11 @@ typedef struct SortState
 	int64		bound;			/* if bounded, how many tuples are needed */
 	bool		sort_Done;		/* sort completed yet? */
 	bool		bounded_Done;	/* value of bounded we did the sort with */
+	bool		finished;
 	int64		bound_Done;		/* value of bound we did the sort with */
 	void	   *tuplesortstate; /* private state of tuplesort.c */
+	SortSupport skipKeys;		/* columns already sorted in input */
+	HeapTuple	prev;
 } SortState;
 
 /* ---------------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 3b9c683..f4f01e2 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -582,6 +582,7 @@ typedef struct Sort
 {
 	Plan		plan;
 	int			numCols;		/* number of sort-key columns */
+	int			skipCols;
 	AttrNumber *sortColIdx;		/* their indexes in the target list */
 	Oid		   *sortOperators;	/* OIDs of operators to sort them by */
 	Oid		   *collations;		/* OIDs of collations */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 75e2afb..bb761f9 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -88,8 +88,9 @@ extern void cost_ctescan(Path *path, PlannerInfo *root,
 			 RelOptInfo *baserel, ParamPathInfo *param_info);
 extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm);
 extern void cost_sort(Path *path, PlannerInfo *root,
-		  List *pathkeys, Cost input_cost, double tuples, int width,
-		  Cost comparison_cost, int sort_mem,
+		  List *pathkeys, int presorted_keys,
+		  Cost input_startup_cost, Cost input_total_cost,
+		  double tuples, int width, Cost comparison_cost, int sort_mem,
 		  double limit_tuples);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 				  List *pathkeys, int n_streams,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9b22fda..9179b4e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -148,13 +148,16 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int pathkeys_common(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion);
 extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
 										  List *pathkeys,
 										  Relids required_outer,
-										  double fraction);
+										  double fraction,
+										  PlannerInfo *root,
+										  double tuples);
 extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
 					 ScanDirection scandir);
 extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 4504250..7b3aa98 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -50,11 +50,12 @@ extern RecursiveUnion *make_recursive_union(List *tlist,
 					 Plan *lefttree, Plan *righttree, int wtParam,
 					 List *distinctList, long numGroups);
 extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree,
-						List *pathkeys, double limit_tuples);
+						List *pathkeys, double limit_tuples, int skipCols);
 extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls,
 						   Plan *lefttree);
 extern Sort *make_sort_from_groupcols(PlannerInfo *root, List *groupcls,
-						 AttrNumber *grpColIdx, Plan *lefttree);
+						 AttrNumber *grpColIdx, Plan *lefttree, List *pathkeys,
+						 int skipCols);
 extern Agg *make_agg(PlannerInfo *root, List *tlist, List *qual,
 		 AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
 		 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h
index 8b6b0de..9c4297c 100644
--- a/src/include/utils/sortsupport.h
+++ b/src/include/utils/sortsupport.h
@@ -150,6 +150,9 @@ ApplySortComparator(Datum datum1, bool isNull1,
 #endif   /*-- PG_USE_INLINE || SORTSUPPORT_INCLUDE_DEFINITIONS */
 
 /* Other functions in utils/sort/sortsupport.c */
+extern SortSupport MakeSortSupportKeys(int nkeys, AttrNumber *attNums,
+					Oid *sortOperators, Oid *sortCollations,
+					bool *nullsFirstFlags);
 extern void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup);
 extern void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup);
 
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 2537883..195e6c1 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -24,6 +24,7 @@
 #include "executor/tuptable.h"
 #include "fmgr.h"
 #include "utils/relcache.h"
+#include "utils/sortsupport.h"
 
 
 /* Tuplesortstate is an opaque type whose details are not known outside
@@ -106,6 +107,8 @@ extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
 
 extern void tuplesort_end(Tuplesortstate *state);
 
+extern void tuplesort_reset(Tuplesortstate *state);
+
 extern void tuplesort_get_stats(Tuplesortstate *state,
 					const char **sortMethod,
 					const char **spaceType,
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 56e2c99..d0de260 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1323,10 +1323,11 @@ ORDER BY thousand, tenthous;
  Merge Append
    Sort Key: tenk1.thousand, tenk1.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1
-   ->  Sort
+   ->  Partial sort
          Sort Key: tenk1_1.thousand, tenk1_1.thousand
+         Presorted Key: tenk1_1.thousand
          ->  Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1
-(6 rows)
+(7 rows)
 
 explain (costs off)
 SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1
@@ -1407,10 +1408,11 @@ ORDER BY x, y;
  Merge Append
    Sort Key: a.thousand, a.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1 a
-   ->  Sort
+   ->  Partial sort
          Sort Key: b.unique2, b.unique2
+         Presorted Key: b.unique2
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
-(6 rows)
+(7 rows)
 
 -- exercise rescan code path via a repeatedly-evaluated subquery
 explain (costs off)
