From e1e589f5c330a68dbe3e8ccd11510e97bd9b1d01 Mon Sep 17 00:00:00 2001
From: Nikita Malakhov <n.malakhov@postgrespro.ru>
Date: Fri, 29 Nov 2024 10:27:02 +0300
Subject: [PATCH] Considering fractional hashjoin path.

TPC-DS benchmark query 60 shows usage of very ineffective plan instead
of optimal because of partial plans are rejected by add_partial_path_precheck,
discussion [1]. Modifying add_partial_path_precheck to reject path only
if it is clearly less effective (cost is considerably bigger), and passing
partial startup cost instead of using total for both startup and total
allows to use more effective partial paths.

Author: Nikita Malakhov <n.malakhov@postgrespro.ru>

[1] https://www.postgresql.org/message-id/flat/SEZPR06MB649422CDEBEBBA3915154EE58A232%40SEZPR06MB6494.apcprd06.prod.outlook.com
---
 contrib/is_jsonb_valid                |  1 +
 src/backend/optimizer/path/joinpath.c |  6 +++---
 src/backend/optimizer/util/pathnode.c | 11 ++++-------
 src/include/optimizer/pathnode.h      |  2 +-
 4 files changed, 9 insertions(+), 11 deletions(-)
 create mode 160000 contrib/is_jsonb_valid

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 3971138480..1c7238f8d2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -999,7 +999,7 @@ try_partial_nestloop_path(PlannerInfo *root,
 	 */
 	initial_cost_nestloop(root, &workspace, jointype,
 						  outer_path, inner_path, extra);
-	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes, workspace.startup_cost,
 								   workspace.total_cost, pathkeys))
 		return;
 
@@ -1169,7 +1169,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
 						   outersortkeys, innersortkeys,
 						   extra);
 
-	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes, workspace.startup_cost,
 								   workspace.total_cost, pathkeys))
 		return;
 
@@ -1300,7 +1300,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
 	 */
 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
 						  outer_path, inner_path, extra, parallel_hash);
-	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+	if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes, workspace.startup_cost,
 								   workspace.total_cost, NIL))
 		return;
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fc97bf6ee2..2a1088e4bc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -918,7 +918,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
  * is surely a loser.
  */
 bool
-add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
+add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes, Cost startup_cost,
 						  Cost total_cost, List *pathkeys)
 {
 	ListCell   *p1;
@@ -937,16 +937,13 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
 	foreach(p1, parent_rel->partial_pathlist)
 	{
 		Path	   *old_path = (Path *) lfirst(p1);
-		PathKeysComparison keyscmp;
 
 		keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
 		if (keyscmp != PATHKEYS_DIFFERENT)
 		{
-			if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
-				keyscmp != PATHKEYS_BETTER1)
+			if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
 				return false;
-			if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
-				keyscmp != PATHKEYS_BETTER2)
+			if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR)
 				return true;
 		}
 	}
@@ -962,7 +959,7 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
 	 * partial path; the resulting plans, if run in parallel, will be run to
 	 * completion.
 	 */
-	if (!add_path_precheck(parent_rel, disabled_nodes, total_cost, total_cost,
+	if (!add_path_precheck(parent_rel, disabled_nodes, startup_cost, total_cost,
 						   pathkeys, NULL))
 		return false;
 
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 1035e6560c..e6081de370 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -32,7 +32,7 @@ extern bool add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
 							  List *pathkeys, Relids required_outer);
 extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
 extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
-									  int disabled_nodes,
+									  int disabled_nodes, Cost startup_cost,
 									  Cost total_cost, List *pathkeys);
 
 extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
-- 
2.25.1

