From 111e9bdc610f0e79da8c78c34b91dd3d2fadf1a1 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 16 Sep 2025 10:05:02 -0400
Subject: [PATCH v6 3/3] Ensure that all joinrel RTIs are discoverable from
 join plans.

Every RTI associated with a joinrel appears either on the outer or inner
side of the joinrel or is an outer join completed by the joinrel.
Previously, the RTIs of outer joins cmopleted by the joinrel were not
stored anywhere; now, we store them in a new 'ojrelids' field of the
Join itself, for the benefit of code that wants to study Plan trees.

We also now assert when constructing a Join plan that we can find all of
the joinrel's RTIs and no others in the plan tree -- either associated
with a node below the outer or inner side of the join, or in the
'ojrelids' field of the Join itself. Any RTIs appearing in this last
place must be of type RTE_JOIN; scanned relations need to be associated
with underlying scan nodes.

All of this is intended as infrastructure to make it possible to
reliably determine the chosen join order from the final plan, although
it's not sufficient for that goal of itself, due to further problems
created by setrefs-time processing.

Note that this depends on the earlier commit to add a relids field to
Result nodes; without that change, a join tree involving two or more
Result nodes would be fundamentally ambiguous (and even a join tree
involving one could only be interpreted by guessing at its origin).
---
 .../expected/pg_overexplain.out               |  40 +++-
 contrib/pg_overexplain/pg_overexplain.c       |  21 ++
 contrib/pg_overexplain/sql/pg_overexplain.sql |  14 +-
 src/backend/optimizer/plan/createplan.c       | 200 +++++++++++++++++-
 src/include/nodes/plannodes.h                 |   2 +
 5 files changed, 272 insertions(+), 5 deletions(-)

diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out
index 55d34666d87..57c997e8b32 100644
--- a/contrib/pg_overexplain/expected/pg_overexplain.out
+++ b/contrib/pg_overexplain/expected/pg_overexplain.out
@@ -377,14 +377,15 @@ $$);
 (15 rows)
 
 -- Create an index, and then attempt to force a nested loop with inner index
--- scan so that we can see parameter-related information. Also, let's try
--- actually running the query, but try to suppress potentially variable output.
+-- scan so that we can see parameter-related information.
 CREATE INDEX ON vegetables (id);
 ANALYZE vegetables;
 SET enable_hashjoin = false;
 SET enable_material = false;
 SET enable_mergejoin = false;
 SET enable_seqscan = false;
+-- Let's try actually running the query, but try to suppress potentially
+-- variable output.
 SELECT explain_filter($$
 EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG)
 SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id;
@@ -440,6 +441,41 @@ $$);
    Parse Location: 0 to end
 (47 rows)
 
+-- Test the RANGE_TABLE otion with a case that involves an outer join.
+SELECT explain_filter($$
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id;
+$$);
+                     explain_filter                      
+---------------------------------------------------------
+ Nested Loop Left Join
+   Outer Join RTIs: 3
+   ->  Index Scan using daucus_id_idx on daucus d
+         Scan RTI: 1
+   ->  Index Scan using brassica_id_idx on brassica b
+         Index Cond: (id = d.id)
+         Scan RTI: 2
+ RTI 1 (relation, in-from-clause):
+   Alias: d ()
+   Eref: d (id, name, genus)
+   Relation: daucus
+   Relation Kind: relation
+   Relation Lock Mode: AccessShareLock
+   Permission Info Index: 1
+ RTI 2 (relation, in-from-clause):
+   Alias: b ()
+   Eref: b (id, name, genus)
+   Relation: brassica
+   Relation Kind: relation
+   Relation Lock Mode: AccessShareLock
+   Permission Info Index: 2
+ RTI 3 (join, in-from-clause):
+   Eref: unnamed_join (id, name, genus, id, name, genus)
+   Join Type: Left
+ Unprunable RTIs: 1 2
+(25 rows)
+
+-- Restore default settings.
 RESET enable_hashjoin;
 RESET enable_material;
 RESET enable_mergejoin;
diff --git a/contrib/pg_overexplain/pg_overexplain.c b/contrib/pg_overexplain/pg_overexplain.c
index bd70b6d9d5e..92cfd8af2eb 100644
--- a/contrib/pg_overexplain/pg_overexplain.c
+++ b/contrib/pg_overexplain/pg_overexplain.c
@@ -248,6 +248,27 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors,
 					overexplain_bitmapset("RTIs",
 										  ((Result *) plan)->relids,
 										  es);
+				break;
+
+			case T_MergeJoin:
+			case T_NestLoop:
+			case T_HashJoin:
+				{
+					Join	   *join = (Join *) plan;
+
+					/*
+					 * 'ojrelids' is only meaningful for non-inner joins, but
+					 * if it somehow ends up set for an inner join, print it
+					 * anyway.
+					 */
+					if (join->jointype != JOIN_INNER ||
+						join->ojrelids != NULL)
+						overexplain_bitmapset("Outer Join RTIs",
+											  join->ojrelids,
+											  es);
+					break;
+				}
+
 			default:
 				break;
 		}
diff --git a/contrib/pg_overexplain/sql/pg_overexplain.sql b/contrib/pg_overexplain/sql/pg_overexplain.sql
index 42e275ac2f9..53aa9ff788e 100644
--- a/contrib/pg_overexplain/sql/pg_overexplain.sql
+++ b/contrib/pg_overexplain/sql/pg_overexplain.sql
@@ -86,18 +86,28 @@ INSERT INTO vegetables (name, genus)
 $$);
 
 -- Create an index, and then attempt to force a nested loop with inner index
--- scan so that we can see parameter-related information. Also, let's try
--- actually running the query, but try to suppress potentially variable output.
+-- scan so that we can see parameter-related information.
 CREATE INDEX ON vegetables (id);
 ANALYZE vegetables;
 SET enable_hashjoin = false;
 SET enable_material = false;
 SET enable_mergejoin = false;
 SET enable_seqscan = false;
+
+-- Let's try actually running the query, but try to suppress potentially
+-- variable output.
 SELECT explain_filter($$
 EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG)
 SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id;
 $$);
+
+-- Test the RANGE_TABLE otion with a case that involves an outer join.
+SELECT explain_filter($$
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id;
+$$);
+
+-- Restore default settings.
 RESET enable_hashjoin;
 RESET enable_material;
 RESET enable_mergejoin;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c9dba7ff346..6fa782d7c58 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -161,6 +161,7 @@ static CustomScan *create_customscan_plan(PlannerInfo *root,
 static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path);
 static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path);
 static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path);
+static Bitmapset *compute_join_ojrelids(PlannerInfo *root, JoinPath *jpath);
 static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
 static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
 static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
@@ -232,6 +233,7 @@ static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
 							   List *joinclauses, List *otherclauses, List *nestParams,
 							   Plan *lefttree, Plan *righttree,
+							   Bitmapset *ojrelids,
 							   JoinType jointype, bool inner_unique);
 static HashJoin *make_hashjoin(List *tlist,
 							   List *joinclauses, List *otherclauses,
@@ -239,6 +241,7 @@ static HashJoin *make_hashjoin(List *tlist,
 							   List *hashoperators, List *hashcollations,
 							   List *hashkeys,
 							   Plan *lefttree, Plan *righttree,
+							   Bitmapset *ojrelids,
 							   JoinType jointype, bool inner_unique);
 static Hash *make_hash(Plan *lefttree,
 					   List *hashkeys,
@@ -253,6 +256,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 								 bool *mergereversals,
 								 bool *mergenullsfirst,
 								 Plan *lefttree, Plan *righttree,
+								 Bitmapset *ojrelids,
 								 JoinType jointype, bool inner_unique,
 								 bool skip_mark_restore);
 static Sort *make_sort(Plan *lefttree, int numCols,
@@ -320,7 +324,15 @@ static ModifyTable *make_modifytable(PlannerInfo *root, Plan *subplan,
 									 int epqParam);
 static GatherMerge *create_gather_merge_plan(PlannerInfo *root,
 											 GatherMergePath *best_path);
-
+static void assert_join_preserves_rtis(PlannerInfo *root,
+									   RelOptInfo *rel,
+									   Plan *outer_plan,
+									   Plan *inner_plan,
+									   Bitmapset *ojrelids);
+
+#ifdef USE_ASSERT_CHECKING
+static Bitmapset *get_plan_rtis(PlannerInfo *root, Plan *plan);
+#endif
 
 /*
  * create_plan
@@ -4335,11 +4347,16 @@ create_nestloop_plan(PlannerInfo *root,
 							  nestParams,
 							  outer_plan,
 							  inner_plan,
+							  compute_join_ojrelids(root, &best_path->jpath),
 							  best_path->jpath.jointype,
 							  best_path->jpath.inner_unique);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
 
+	assert_join_preserves_rtis(root, best_path->jpath.path.parent,
+							   outer_plan, inner_plan,
+							   join_plan->join.ojrelids);
+
 	return join_plan;
 }
 
@@ -4687,6 +4704,7 @@ create_mergejoin_plan(PlannerInfo *root,
 							   mergenullsfirst,
 							   outer_plan,
 							   inner_plan,
+							   compute_join_ojrelids(root, &best_path->jpath),
 							   best_path->jpath.jointype,
 							   best_path->jpath.inner_unique,
 							   best_path->skip_mark_restore);
@@ -4694,6 +4712,10 @@ create_mergejoin_plan(PlannerInfo *root,
 	/* Costs of sort and material steps are included in path cost already */
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
 
+	assert_join_preserves_rtis(root, best_path->jpath.path.parent,
+							   outer_plan, inner_plan,
+							   join_plan->join.ojrelids);
+
 	return join_plan;
 }
 
@@ -4862,14 +4884,71 @@ create_hashjoin_plan(PlannerInfo *root,
 							  outer_hashkeys,
 							  outer_plan,
 							  (Plan *) hash_plan,
+							  compute_join_ojrelids(root, &best_path->jpath),
 							  best_path->jpath.jointype,
 							  best_path->jpath.inner_unique);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
 
+	assert_join_preserves_rtis(root, best_path->jpath.path.parent,
+							   outer_plan, inner_plan,
+							   join_plan->join.ojrelids);
+
 	return join_plan;
 }
 
+/*
+ * compute_join_ojrelids
+ *	  Determine the set of outer joins completed by this join.
+ *
+ * See add_outer_joins_to_relids for related logic. When we execute joins
+ * in syntactic order, this will compute a 1-item RTI set for outer joins
+ * and the empty set for inner joins. When we rearrange the join order using
+ * outer-join identity 3, the outer join isn't fully calculated until the
+ * commuted join has also been computed, so an upper outer join can complete
+ * multiple outer joins while a lower one completes none.
+ */
+static Bitmapset *
+compute_join_ojrelids(PlannerInfo *root, JoinPath *jpath)
+{
+	Bitmapset  *ojrelids;
+
+	ojrelids = jpath->path.parent->relids;
+	ojrelids = bms_difference(ojrelids, jpath->outerjoinpath->parent->relids);
+	ojrelids = bms_difference(ojrelids, jpath->innerjoinpath->parent->relids);
+
+#ifdef USE_ASSERT_CHECKING
+	switch (jpath->jointype)
+	{
+		case JOIN_INNER:
+			/* Inner joins should never complete outer joins. */
+			Assert(ojrelids == NULL);
+			break;
+		case JOIN_FULL:
+			/* Full outer joins cannot be commuted. */
+			Assert(bms_membership(ojrelids) == BMS_SINGLETON);
+			break;
+		default:
+			/* Other types of joins can be rearranged. */
+			break;
+	}
+
+	/* Any RTIs in the ojrelids set should be of type RTE_JOIN. */
+	if (ojrelids != NULL)
+	{
+		int			rti = -1;
+
+		while ((rti = bms_next_member(ojrelids, rti)) >= 0)
+		{
+			RangeTblEntry *rte = planner_rt_fetch(rti, root);
+
+			Assert(rte->rtekind == RTE_JOIN);
+		}
+	}
+#endif
+
+	return ojrelids;
+}
 
 /*****************************************************************************
  *
@@ -5934,6 +6013,7 @@ make_nestloop(List *tlist,
 			  List *nestParams,
 			  Plan *lefttree,
 			  Plan *righttree,
+			  Bitmapset *ojrelids,
 			  JoinType jointype,
 			  bool inner_unique)
 {
@@ -5947,6 +6027,7 @@ make_nestloop(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 	node->nestParams = nestParams;
 
 	return node;
@@ -5962,6 +6043,7 @@ make_hashjoin(List *tlist,
 			  List *hashkeys,
 			  Plan *lefttree,
 			  Plan *righttree,
+			  Bitmapset *ojrelids,
 			  JoinType jointype,
 			  bool inner_unique)
 {
@@ -5979,6 +6061,7 @@ make_hashjoin(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 
 	return node;
 }
@@ -6017,6 +6100,7 @@ make_mergejoin(List *tlist,
 			   bool *mergenullsfirst,
 			   Plan *lefttree,
 			   Plan *righttree,
+			   Bitmapset *ojrelids,
 			   JoinType jointype,
 			   bool inner_unique,
 			   bool skip_mark_restore)
@@ -6037,6 +6121,7 @@ make_mergejoin(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 
 	return node;
 }
@@ -7316,3 +7401,116 @@ is_projection_capable_plan(Plan *plan)
 	}
 	return true;
 }
+
+/*
+ * Check that every joinrel RTI appears in the inner or outer plan or in this
+ * Join's ojrelids set.
+ */
+static void
+assert_join_preserves_rtis(PlannerInfo *root, RelOptInfo *rel,
+						   Plan *outer_plan, Plan *inner_plan,
+						   Bitmapset *ojrelids)
+{
+#ifdef USE_ASSERT_CHECKING
+	Bitmapset  *outerrelids;
+	Bitmapset  *innerrelids;
+	Bitmapset  *joinrelids;
+
+	/* Find outer and inner relid sets. */
+	outerrelids = get_plan_rtis(root, outer_plan);
+	innerrelids = get_plan_rtis(root, inner_plan);
+
+	/* Any given scan RTI should appear in just one set. */
+	Assert(!bms_overlap(innerrelids, outerrelids));
+	Assert(!bms_overlap(outerrelids, ojrelids));
+	Assert(!bms_overlap(innerrelids, ojrelids));
+
+	/* Combine all three sets and check that all RTIs were preserved. */
+	joinrelids = bms_union(ojrelids, bms_union(innerrelids, outerrelids));
+	Assert(bms_equal(joinrelids, rel->relids));
+#endif
+}
+
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Get the set of range table indexes for a scan or join node, or any executor
+ * node that could appear beneath a scan or join node.
+ *
+ * We're only interested in RTIs from within the same subquery, so we do not
+ * attempt to look through T_SubqueryScan here.
+ *
+ * When adding new cases to this function, be sure to also update
+ * ExplainPreScanNode, ExplainNode, and overexplain_per_node_hook as
+ * appropriate.
+ */
+static Bitmapset *
+get_plan_rtis(PlannerInfo *root, Plan *plan)
+{
+	switch (nodeTag(plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_IndexScan:
+		case T_IndexOnlyScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_SubqueryScan:
+		case T_FunctionScan:
+		case T_TableFuncScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_NamedTuplestoreScan:
+		case T_WorkTableScan:
+			return bms_make_singleton(((Scan *) plan)->scanrelid);
+			break;
+		case T_ForeignScan:
+			return ((ForeignScan *) plan)->fs_relids;
+			break;
+		case T_CustomScan:
+			return ((CustomScan *) plan)->custom_relids;
+			break;
+		case T_Append:
+			return ((Append *) plan)->apprelids;
+			break;
+		case T_MergeAppend:
+			return ((MergeAppend *) plan)->apprelids;
+			break;
+		case T_Result:
+			if (plan->lefttree)
+				return get_plan_rtis(root, plan->lefttree);
+			else
+				return ((Result *) plan)->relids;
+			break;
+		case T_HashJoin:
+		case T_MergeJoin:
+		case T_NestLoop:
+			{
+				Bitmapset  *outerrelids;
+				Bitmapset  *innerrelids;
+
+				outerrelids = get_plan_rtis(root, plan->lefttree);
+				innerrelids = get_plan_rtis(root, plan->righttree);
+
+				return bms_union(bms_union(outerrelids, innerrelids),
+								 ((Join *) plan)->ojrelids);
+				break;
+			}
+		case T_Sort:
+		case T_IncrementalSort:
+		case T_Unique:
+		case T_Agg:
+		case T_Hash:
+		case T_Gather:
+		case T_GatherMerge:
+		case T_Material:
+		case T_Memoize:
+			return get_plan_rtis(root, plan->lefttree);
+			break;
+		default:
+			break;
+	}
+
+	return NULL;
+}
+#endif
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 3d196f5078e..16f3f5a7925 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -938,6 +938,7 @@ typedef struct CustomScan
  * inner_unique each outer tuple can match to no more than one inner tuple
  * joinqual:	qual conditions that came from JOIN/ON or JOIN/USING
  *				(plan.qual contains conditions that came from WHERE)
+ * ojrelids:    outer joins completed at this level
  *
  * When jointype is INNER, joinqual and plan.qual are semantically
  * interchangeable.  For OUTER jointypes, the two are *not* interchangeable;
@@ -962,6 +963,7 @@ typedef struct Join
 	bool		inner_unique;
 	/* JOIN quals (in addition to plan.qual) */
 	List	   *joinqual;
+	Bitmapset  *ojrelids;
 } Join;
 
 /* ----------------
-- 
2.39.5 (Apple Git-154)

