From fe7d85a3f30481a8c92e12a6e5843d8279bcdac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Thu, 14 Jan 2021 21:39:28 +0800
Subject: [PATCH v1] Improve the cost_sort v1

---
 src/backend/optimizer/path/costsize.c   | 20 +++++++++++---------
 src/backend/optimizer/plan/createplan.c |  2 +-
 src/backend/optimizer/plan/planner.c    |  2 +-
 src/backend/optimizer/prep/prepunion.c  |  2 +-
 src/backend/optimizer/util/pathnode.c   | 10 +++++-----
 src/include/optimizer/cost.h            |  2 +-
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 380336518f..2652a7eeba 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1696,7 +1696,7 @@ static void
 cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
-			   double limit_tuples)
+			   double limit_tuples, int numSortCols)
 {
 	double		input_bytes = relation_byte_size(tuples, width);
 	double		output_bytes;
@@ -1710,8 +1710,9 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
+	Assert(numSortCols);
 	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
+	comparison_cost += (2.0 * cpu_operator_cost * numSortCols);
 
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
@@ -1784,7 +1785,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 
 /*
  * cost_incremental_sort
- * 	Determines and returns the cost of sorting a relation incrementally, when
+ *	Determines and returns the cost of sorting a relation incrementally, when
  *  the input path is presorted by a prefix of the pathkeys.
  *
  * 'presorted_keys' is the number of leading pathkeys by which the input path
@@ -1813,6 +1814,7 @@ cost_incremental_sort(Path *path,
 	ListCell   *l;
 	int			i = 0;
 	bool		unknown_varno = false;
+	int			num_sort_cols = list_length(pathkeys) - presorted_keys;
 
 	Assert(presorted_keys != 0);
 
@@ -1888,7 +1890,7 @@ cost_incremental_sort(Path *path,
 	 */
 	cost_tuplesort(&group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
-				   limit_tuples);
+				   limit_tuples, num_sort_cols);
 
 	/*
 	 * Startup cost of incremental sort is the startup cost of its first group
@@ -1935,7 +1937,7 @@ cost_incremental_sort(Path *path,
  */
 void
 cost_sort(Path *path, PlannerInfo *root,
-		  List *pathkeys, Cost input_cost, double tuples, int width,
+		  int numSortCols, Cost input_cost, double tuples, int width,
 		  Cost comparison_cost, int sort_mem,
 		  double limit_tuples)
 
@@ -1946,7 +1948,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	cost_tuplesort(&startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
-				   limit_tuples);
+				   limit_tuples, numSortCols);
 
 	if (!enable_sort)
 		startup_cost += disable_cost;
@@ -2110,7 +2112,7 @@ cost_append(AppendPath *apath)
 					 */
 					cost_sort(&sort_path,
 							  NULL, /* doesn't currently need root */
-							  pathkeys,
+							  list_length(pathkeys),
 							  subpath->total_cost,
 							  subpath->rows,
 							  subpath->pathtarget->width,
@@ -3072,7 +3074,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	{
 		cost_sort(&sort_path,
 				  root,
-				  outersortkeys,
+				  list_length(outersortkeys),
 				  outer_path->total_cost,
 				  outer_path_rows,
 				  outer_path->pathtarget->width,
@@ -3098,7 +3100,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	{
 		cost_sort(&sort_path,
 				  root,
-				  innersortkeys,
+				  list_length(innersortkeys),
 				  inner_path->total_cost,
 				  inner_path_rows,
 				  inner_path->pathtarget->width,
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 25d4750ca6..f391ece7ec 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -5169,7 +5169,7 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples)
 	 */
 	Assert(IsA(plan, Sort));
 
-	cost_sort(&sort_path, root, NIL,
+	cost_sort(&sort_path, root, plan->numCols,
 			  lefttree->total_cost,
 			  lefttree->plan_rows,
 			  lefttree->plan_width,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 4e6497ff32..6c7525becb 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6331,7 +6331,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	/* Estimate the cost of seq scan + sort */
 	seqScanPath = create_seqscan_path(root, rel, NULL, 0);
-	cost_sort(&seqScanAndSortPath, root, NIL,
+	cost_sort(&seqScanAndSortPath, root, list_length(rel->reltarget->exprs),
 			  seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
 			  comparisonCost, maintenance_work_mem, -1.0);
 
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 86f794c193..011b18233a 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -1083,7 +1083,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	sorted_p.startup_cost = input_path->startup_cost;
 	sorted_p.total_cost = input_path->total_cost;
 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
-	cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
+	cost_sort(&sorted_p, root, list_length(groupClauses), sorted_p.total_cost,
 			  input_path->rows, input_path->pathtarget->width,
 			  0.0, work_mem, -1.0);
 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index d465b9e213..3a73fc3cb8 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1434,7 +1434,7 @@ create_merge_append_path(PlannerInfo *root,
 
 			cost_sort(&sort_path,
 					  root,
-					  pathkeys,
+					  list_length(pathkeys),
 					  subpath->total_cost,
 					  subpath->parent->tuples,
 					  subpath->pathtarget->width,
@@ -1696,7 +1696,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		/*
 		 * Estimate cost for sort+unique implementation
 		 */
-		cost_sort(&sort_path, root, NIL,
+		cost_sort(&sort_path, root, numCols,
 				  subpath->total_cost,
 				  rel->rows,
 				  subpath->pathtarget->width,
@@ -1820,7 +1820,7 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 
 		cost_sort(&sort_path,
 				  root,
-				  pathkeys,
+				  list_length(pathkeys),
 				  subpath->total_cost,
 				  subpath->rows,
 				  subpath->pathtarget->width,
@@ -2870,7 +2870,7 @@ create_sort_path(PlannerInfo *root,
 
 	pathnode->subpath = subpath;
 
-	cost_sort(&pathnode->path, root, pathkeys,
+	cost_sort(&pathnode->path, root, list_length(pathkeys),
 			  subpath->total_cost,
 			  subpath->rows,
 			  subpath->pathtarget->width,
@@ -3186,7 +3186,7 @@ create_groupingsets_path(PlannerInfo *root,
 			else
 			{
 				/* Account for cost of sort, but don't charge input cost again */
-				cost_sort(&sort_path, root, NIL,
+				cost_sort(&sort_path, root,  numGroupCols,
 						  0.0,
 						  subpath->rows,
 						  subpath->pathtarget->width,
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index ed2e4af4be..4d8cbd2c87 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -99,7 +99,7 @@ extern void cost_resultscan(Path *path, PlannerInfo *root,
 							RelOptInfo *baserel, ParamPathInfo *param_info);
 extern void cost_recursive_union(Path *runion, Path *nrterm, Path *rterm);
 extern void cost_sort(Path *path, PlannerInfo *root,
-					  List *pathkeys, Cost input_cost, double tuples, int width,
+					  int numSortCols, Cost input_cost, double tuples, int width,
 					  Cost comparison_cost, int sort_mem,
 					  double limit_tuples);
 extern void cost_incremental_sort(Path *path,
-- 
2.21.0

