From 1c218a235ac8dc800150a11c176cc924fb53f6de Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 24 May 2023 11:06:14 +0800
Subject: [PATCH v1] WIP: Fix the outer-join removal problem

In remove_rel_from_query we are using RINFO_IS_PUSHED_DOWN to tell if a
joinqual should be removed or kept once we'd decided to remove that
join.  This is provably not always correct.

Ideally we should only remove joinquals that syntactically belonged to
the outer join being discarded and put other joinquals back.  This patch
does that by recording the rinfo_serial of quals syntactically belonging
to an outer join and deciding whether to remove a qual according to the
serial numbers.
---
 src/backend/optimizer/path/costsize.c     |  2 ++
 src/backend/optimizer/path/joinrels.c     |  1 +
 src/backend/optimizer/plan/analyzejoins.c |  9 +++++----
 src/backend/optimizer/plan/initsplan.c    | 10 ++++++++++
 src/backend/optimizer/util/orclauses.c    |  1 +
 src/include/nodes/pathnodes.h             |  2 ++
 6 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ef475d95a1..1df570e987 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4797,6 +4797,7 @@ compute_semi_anti_join_factors(PlannerInfo *root,
 	norm_sjinfo.semi_can_hash = false;
 	norm_sjinfo.semi_operators = NIL;
 	norm_sjinfo.semi_rhs_exprs = NIL;
+	norm_sjinfo.syn_serials = NULL;
 
 	nselec = clauselist_selectivity(root,
 									joinquals,
@@ -4966,6 +4967,7 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
 	sjinfo.semi_can_hash = false;
 	sjinfo.semi_operators = NIL;
 	sjinfo.semi_rhs_exprs = NIL;
+	sjinfo.syn_serials = NULL;
 
 	/* Get the approximate selectivity */
 	foreach(l, quals)
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 2feab2184f..2316d0b4be 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -752,6 +752,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 		sjinfo->semi_can_hash = false;
 		sjinfo->semi_operators = NIL;
 		sjinfo->semi_rhs_exprs = NIL;
+		sjinfo->syn_serials = NULL;
 	}
 
 	/*
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index a61e35f92d..564eba6877 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -36,7 +36,7 @@
 /* local functions */
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
-								  Relids joinrelids);
+								  Bitmapset *syn_serials, Relids joinrelids);
 static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
 										 int relid, int ojrelid);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -93,7 +93,8 @@ restart:
 		if (sjinfo->ojrelid != 0)
 			joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
 
-		remove_rel_from_query(root, innerrelid, sjinfo->ojrelid, joinrelids);
+		remove_rel_from_query(root, innerrelid, sjinfo->ojrelid,
+							  sjinfo->syn_serials, joinrelids);
 
 		/* We verify that exactly one reference gets removed from joinlist */
 		nremoved = 0;
@@ -332,7 +333,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
  */
 static void
 remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
-					  Relids joinrelids)
+					  Bitmapset *syn_serials, Relids joinrelids)
 {
 	RelOptInfo *rel = find_base_rel(root, relid);
 	List	   *joininfos;
@@ -467,7 +468,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
 
 		remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
 
-		if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
+		if (!bms_is_member(rinfo->rinfo_serial, syn_serials))
 		{
 			/*
 			 * There might be references to relid or ojrelid in the
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 5cbb7b9a86..bce594df38 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1402,6 +1402,7 @@ make_outerjoininfo(PlannerInfo *root,
 	sjinfo->commute_above_r = NULL;
 	sjinfo->commute_below_l = NULL;
 	sjinfo->commute_below_r = NULL;
+	sjinfo->syn_serials = NULL;
 
 	compute_semijoin_info(root, sjinfo, clause);
 
@@ -2386,6 +2387,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
 	restrictinfo->has_clone = has_clone;
 	restrictinfo->is_clone = is_clone;
 
+	/*
+	 * Record the RestrictInfo's rinfo_serial in the SpecialJoinInfo of the
+	 * join.  It is needed to tell whether this RestrictInfo should be put back
+	 * once we've decided to remove this join.
+	 */
+	if (sjinfo)
+		sjinfo->syn_serials =
+			bms_add_member(sjinfo->syn_serials, restrictinfo->rinfo_serial);
+
 	/*
 	 * If it's a join clause, add vars used in the clause to targetlists of
 	 * their relations, so that they will be emitted by the plan nodes that
diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c
index ca8b5ff92f..4f3bda4540 100644
--- a/src/backend/optimizer/util/orclauses.c
+++ b/src/backend/optimizer/util/orclauses.c
@@ -342,6 +342,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
 		sjinfo.semi_can_hash = false;
 		sjinfo.semi_operators = NIL;
 		sjinfo.semi_rhs_exprs = NIL;
+		sjinfo.syn_serials = NULL;
 
 		/* Compute inner-join size */
 		orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 23dd671bf4..27233d7591 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2841,6 +2841,8 @@ struct SpecialJoinInfo
 	bool		semi_can_hash;	/* true if semi_operators are all hash */
 	List	   *semi_operators; /* OIDs of equality join operators */
 	List	   *semi_rhs_exprs; /* righthand-side expressions of these ops */
+	Bitmapset  *syn_serials;	/* set of rinfo_serial for quals syntactically
+								   from this join level */
 };
 
 /*
-- 
2.31.0

