From eeec8b83542a722876231ceb14e4f2fffb1fc001 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 9 Sep 2024 19:57:53 +0900
Subject: [PATCH v1] Consider explicit incremental sort for mergejoins

---
 src/backend/optimizer/path/costsize.c   | 60 +++++++++++++++----
 src/backend/optimizer/plan/createplan.c | 80 ++++++++++++++++++++++---
 2 files changed, 120 insertions(+), 20 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index e1523d15df..210fe5e754 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3532,7 +3532,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
  * join quals here, except for obtaining the scan selectivity estimate which
  * is really essential (but fortunately, use of caching keeps the cost of
  * getting that down to something reasonable).
- * We also assume that cost_sort is cheap enough to use here.
+ * We also assume that cost_sort/cost_incremental_sort is cheap enough to use
+ * here.
  *
  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
  *		other data to be used by final_cost_mergejoin
@@ -3569,7 +3570,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 				outerendsel,
 				innerstartsel,
 				innerendsel;
-	Path		sort_path;		/* dummy for result of cost_sort */
+	Path		sort_path;		/* dummy for result of
+								 * cost_sort/cost_incremental_sort */
 
 	/* Protect some assumptions below that rowcounts aren't zero */
 	if (outer_path_rows <= 0)
@@ -3682,16 +3684,50 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 
 	if (outersortkeys)			/* do we need to sort outer? */
 	{
-		cost_sort(&sort_path,
-				  root,
-				  outersortkeys,
-				  outer_path->disabled_nodes,
-				  outer_path->total_cost,
-				  outer_path_rows,
-				  outer_path->pathtarget->width,
-				  0.0,
-				  work_mem,
-				  -1.0);
+		bool		use_incremental_sort = false;
+		int			presorted_keys;
+
+		if (enable_incremental_sort)
+		{
+			bool		is_sorted PG_USED_FOR_ASSERTS_ONLY;
+
+			is_sorted = pathkeys_count_contained_in(outersortkeys,
+													outer_path->pathkeys,
+													&presorted_keys);
+			Assert(!is_sorted);
+
+			if (presorted_keys > 0)
+				use_incremental_sort = true;
+		}
+
+		if (!use_incremental_sort)
+		{
+			cost_sort(&sort_path,
+					  root,
+					  outersortkeys,
+					  outer_path->disabled_nodes,
+					  outer_path->total_cost,
+					  outer_path_rows,
+					  outer_path->pathtarget->width,
+					  0.0,
+					  work_mem,
+					  -1.0);
+		}
+		else
+		{
+			cost_incremental_sort(&sort_path,
+								  root,
+								  outersortkeys,
+								  presorted_keys,
+								  outer_path->disabled_nodes,
+								  outer_path->startup_cost,
+								  outer_path->total_cost,
+								  outer_path_rows,
+								  outer_path->pathtarget->width,
+								  0.0,
+								  work_mem,
+								  -1.0);
+		}
 		disabled_nodes += sort_path.disabled_nodes;
 		startup_cost += sort_path.startup_cost;
 		startup_cost += (sort_path.total_cost - sort_path.startup_cost)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index bb45ef318f..bff48d266f 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -179,6 +179,8 @@ static void copy_generic_path_info(Plan *dest, Path *src);
 static void copy_plan_costsize(Plan *dest, Plan *src);
 static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
 									 double limit_tuples);
+static void label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
+												List *pathkeys, double limit_tuples);
 static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
 static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
 								   TableSampleClause *tsc);
@@ -4523,12 +4525,47 @@ create_mergejoin_plan(PlannerInfo *root,
 	if (best_path->outersortkeys)
 	{
 		Relids		outer_relids = outer_path->parent->relids;
-		Sort	   *sort = make_sort_from_pathkeys(outer_plan,
+		Plan	   *sort_plan;
+		bool		use_incremental_sort = false;
+		int			presorted_keys;
+
+		if (enable_incremental_sort)
+		{
+			bool		is_sorted PG_USED_FOR_ASSERTS_ONLY;
+
+			is_sorted = pathkeys_count_contained_in(best_path->outersortkeys,
+													outer_path->pathkeys,
+													&presorted_keys);
+			Assert(!is_sorted);
+
+			if (presorted_keys > 0)
+				use_incremental_sort = true;
+		}
+
+		if (!use_incremental_sort)
+		{
+			sort_plan = (Plan *)
+				make_sort_from_pathkeys(outer_plan,
+										best_path->outersortkeys,
+										outer_relids);
+
+			label_sort_with_costsize(root, (Sort *) sort_plan, -1.0);
+		}
+		else
+		{
+			sort_plan = (Plan *)
+				make_incrementalsort_from_pathkeys(outer_plan,
 												   best_path->outersortkeys,
-												   outer_relids);
+												   outer_relids,
+												   presorted_keys);
 
-		label_sort_with_costsize(root, sort, -1.0);
-		outer_plan = (Plan *) sort;
+			label_incrementalsort_with_costsize(root,
+												(IncrementalSort *) sort_plan,
+												best_path->outersortkeys,
+												-1.0);
+		}
+
+		outer_plan = sort_plan;
 		outerpathkeys = best_path->outersortkeys;
 	}
 	else
@@ -5447,10 +5484,6 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
 	Plan	   *lefttree = plan->plan.lefttree;
 	Path		sort_path;		/* dummy for result of cost_sort */
 
-	/*
-	 * This function shouldn't have to deal with IncrementalSort plans because
-	 * they are only created from corresponding Path nodes.
-	 */
 	Assert(IsA(plan, Sort));
 
 	cost_sort(&sort_path, root, NIL,
@@ -5469,6 +5502,37 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
 	plan->plan.parallel_safe = lefttree->parallel_safe;
 }
 
+/*
+ * Same as label_sort_with_costsize, but labels the IncrementalSort node
+ * instead.
+ */
+static void
+label_incrementalsort_with_costsize(PlannerInfo *root, IncrementalSort *plan,
+									List *pathkeys, double limit_tuples)
+{
+	Plan	   *lefttree = plan->sort.plan.lefttree;
+	Path		sort_path;		/* dummy for result of cost_incremental_sort */
+
+	Assert(IsA(plan, IncrementalSort));
+
+	cost_incremental_sort(&sort_path, root, pathkeys,
+						  plan->nPresortedCols,
+						  plan->sort.plan.disabled_nodes,
+						  lefttree->startup_cost,
+						  lefttree->total_cost,
+						  lefttree->plan_rows,
+						  lefttree->plan_width,
+						  0.0,
+						  work_mem,
+						  limit_tuples);
+	plan->sort.plan.startup_cost = sort_path.startup_cost;
+	plan->sort.plan.total_cost = sort_path.total_cost;
+	plan->sort.plan.plan_rows = lefttree->plan_rows;
+	plan->sort.plan.plan_width = lefttree->plan_width;
+	plan->sort.plan.parallel_aware = false;
+	plan->sort.plan.parallel_safe = lefttree->parallel_safe;
+}
+
 /*
  * bitmap_subplan_mark_shared
  *	 Set isshared flag in bitmap subplan so that it will be created in
-- 
2.43.0

