From 19d091c74a8a37f6572db3e13f04b222b284e906 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Tue, 26 Aug 2025 12:50:27 +0900
Subject: [PATCH v4] fix variable not found in subplan target lists

---
 src/backend/optimizer/path/equivclass.c | 87 +++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c    | 49 +++++++++++---
 src/include/optimizer/paths.h           |  2 +
 src/test/regress/expected/join.out      | 62 ++++++++++++++++++
 src/test/regress/sql/join.sql           | 26 ++++++++
 5 files changed, 218 insertions(+), 8 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 441f12f6c50..bebbc914618 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -2690,6 +2690,93 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, Oid opfamily)
 	return false;
 }
 
+/*
+ * expr_known_equal_to_constant
+ *	  Detect whether the given expression is known equal to a constant due to
+ *	  equivalence relationships.
+ *
+ * "sortop" is the OID of the ordering operator.
+ */
+bool
+expr_known_equal_to_constant(PlannerInfo *root, Expr *expr, Oid sortop)
+{
+	Oid			opfamily,
+				opcintype,
+				collation;
+	CompareType cmptype;
+	Oid			equality_op;
+	List	   *opfamilies;
+	ListCell   *lc;
+
+	/* Find the operator in pg_amop --- failure shouldn't happen */
+	if (!get_ordering_op_properties(sortop,
+									&opfamily, &opcintype, &cmptype))
+		elog(ERROR, "operator %u is not a valid ordering operator",
+			 sortop);
+
+	collation = exprCollation((Node *) expr);
+
+	/*
+	 * EquivalenceClasses need to contain opfamily lists based on the family
+	 * membership of mergejoinable equality operators, which could belong to
+	 * more than one opfamily.  So we have to look up the opfamily's equality
+	 * operator and get its membership.
+	 */
+	equality_op = get_opfamily_member_for_cmptype(opfamily,
+												  opcintype,
+												  opcintype,
+												  COMPARE_EQ);
+	if (!OidIsValid(equality_op))	/* shouldn't happen */
+		elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+			 COMPARE_EQ, opcintype, opcintype, opfamily);
+	opfamilies = get_mergejoin_opfamilies(equality_op);
+	if (!opfamilies)			/* certainly should find some */
+		elog(ERROR, "could not find opfamilies for equality operator %u",
+			 equality_op);
+
+	/*
+	 * Ensure the expression exposes the correct type and collation.
+	 */
+	expr = canonicalize_ec_expression(expr, opcintype, collation);
+
+	/*
+	 * Scan through the existing EquivalenceClasses for a match
+	 */
+	foreach(lc, root->eq_classes)
+	{
+		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc);
+		EquivalenceMemberIterator it;
+		EquivalenceMember *cur_em;
+
+		/* Ignore EC unless it contains pseudoconstants */
+		if (!cur_ec->ec_has_const)
+			continue;
+
+		/* Never match to a volatile EC */
+		if (cur_ec->ec_has_volatile)
+			continue;
+
+		if (collation != cur_ec->ec_collation)
+			continue;
+		if (!equal(opfamilies, cur_ec->ec_opfamilies))
+			continue;
+
+		setup_eclass_member_iterator(&it, cur_ec, NULL);
+		while ((cur_em = eclass_member_iterator_next(&it)) != NULL)
+		{
+			/* Ignore child members */
+			if (cur_em->em_is_child)
+				continue;
+
+			if (opcintype == cur_em->em_datatype &&
+				equal(expr, cur_em->em_expr))
+				return true;	/* Match! */
+		}
+	}
+
+	return false;
+}
+
 
 /*
  * match_eclasses_to_foreign_key_col
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 65f17101591..1ac9cc869f8 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8284,8 +8284,8 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
  *    the needs of the semijoin represented by sjinfo.  If it is not possible
  *    to identify how to make the data unique, NULL is returned.
  *
- * If used at all, this is likely to be called repeatedly on the same rel;
- * So we cache the result.
+ * If used at all, this is likely to be called repeatedly on the same rel,
+ * so we cache the result.
  */
 RelOptInfo *
 create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
@@ -8294,6 +8294,10 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
 	List	   *sortPathkeys = NIL;
 	List	   *groupClause = NIL;
 	MemoryContext oldcontext;
+	List	   *semi_rhs_exprs = NIL;
+	List	   *semi_operators = NIL;
+	ListCell   *lc1;
+	ListCell   *lc2;
 
 	/* Caller made a mistake if SpecialJoinInfo is the wrong one */
 	Assert(sjinfo->jointype == JOIN_SEMI);
@@ -8307,6 +8311,39 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
 	if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
 		return NULL;
 
+	/*
+	 * To complicate matters, some of the values to be unique-ified may be
+	 * known redundant by the EquivalenceClass machinery (e.g., because they
+	 * have been equated to constants).  There is no need to compare such
+	 * values during unique-ification, and indeed we had better not try
+	 * because the Vars involved may not have propagated as high as the
+	 * semijoin's level.
+	 */
+	forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators)
+	{
+		Expr	   *uniqexpr = lfirst(lc1);
+		Oid			in_oper = lfirst_oid(lc2);
+		Oid			sortop;
+
+		sortop = get_ordering_op_for_equality_op(in_oper, false);
+
+		if (OidIsValid(sortop))
+		{
+			if (expr_known_equal_to_constant(root, uniqexpr, sortop))
+				continue;
+		}
+		else if (sjinfo->semi_can_btree)	/* shouldn't happen */
+			elog(ERROR, "could not find ordering operator for equality operator %u",
+				 in_oper);
+
+		semi_rhs_exprs = lappend(semi_rhs_exprs, uniqexpr);
+		semi_operators = lappend_oid(semi_operators, in_oper);
+	}
+
+	/* If there are no columns left to unique-ify, return NULL. */
+	if (semi_rhs_exprs == NIL)
+		return NULL;
+
 	/*
 	 * When called during GEQO join planning, we are in a short-lived memory
 	 * context.  We must make sure that the unique rel and any subsidiary data
@@ -8367,8 +8404,6 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
 		List	   *newtlist;
 		int			nextresno;
 		List	   *sortList = NIL;
-		ListCell   *lc1;
-		ListCell   *lc2;
 
 		/*
 		 * The values we are supposed to unique-ify may be expressions in the
@@ -8382,7 +8417,7 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
 		newtlist = make_tlist_from_pathtarget(rel->reltarget);
 		nextresno = list_length(newtlist) + 1;
 
-		forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators)
+		forboth(lc1, semi_rhs_exprs, lc2, semi_operators)
 		{
 			Expr	   *uniqexpr = lfirst(lc1);
 			Oid			in_oper = lfirst_oid(lc2);
@@ -8407,9 +8442,7 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
 				SortGroupClause *sortcl;
 
 				sortop = get_ordering_op_for_equality_op(in_oper, false);
-				if (!OidIsValid(sortop))	/* shouldn't happen */
-					elog(ERROR, "could not find ordering operator for equality operator %u",
-						 in_oper);
+				Assert(OidIsValid(sortop));
 
 				/*
 				 * The Unique node will need equality operators.  Normally
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cbade77b717..db47bb88eb1 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -159,6 +159,8 @@ extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root,
 													  RelOptInfo *inner_rel);
 extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2,
 							  Oid opfamily);
+extern bool expr_known_equal_to_constant(PlannerInfo *root, Expr *expr,
+										 Oid sortop);
 extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root,
 														   ForeignKeyOptInfo *fkinfo,
 														   int colno);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 98b05c94a11..054bcb20596 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6500,6 +6500,68 @@ where t1.a = s.c;
 ----------
 (0 rows)
 
+rollback;
+-- check handling of semijoins after join removal: we must suppress
+-- unique-ification of known-constant values
+begin;
+create temp table t (a int unique, b int);
+insert into t values (1, 2);
+explain (verbose, costs off)
+select t1.a from t t1
+  left join t t2 on t1.a = t2.a
+       join t t3 on true
+where exists (select 1 from t t4
+                join t t5 on t4.b = t5.b
+                join t t6 on t5.b = t6.b
+              where t1.a = t4.a and t3.a = t5.a and t4.a = 1);
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.a
+   Inner Unique: true
+   ->  Nested Loop
+         Output: t1.a, t5.a
+         ->  Index Only Scan using t_a_key on pg_temp.t t1
+               Output: t1.a
+               Index Cond: (t1.a = 1)
+         ->  HashAggregate
+               Output: t5.a
+               Group Key: t5.a
+               ->  Hash Join
+                     Output: t5.a
+                     Hash Cond: (t6.b = t4.b)
+                     ->  Seq Scan on pg_temp.t t6
+                           Output: t6.a, t6.b
+                     ->  Hash
+                           Output: t4.b, t5.b, t5.a
+                           ->  Hash Join
+                                 Output: t4.b, t5.b, t5.a
+                                 Inner Unique: true
+                                 Hash Cond: (t5.b = t4.b)
+                                 ->  Seq Scan on pg_temp.t t5
+                                       Output: t5.a, t5.b
+                                 ->  Hash
+                                       Output: t4.b, t4.a
+                                       ->  Index Scan using t_a_key on pg_temp.t t4
+                                             Output: t4.b, t4.a
+                                             Index Cond: (t4.a = 1)
+   ->  Index Only Scan using t_a_key on pg_temp.t t3
+         Output: t3.a
+         Index Cond: (t3.a = t5.a)
+(32 rows)
+
+select t1.a from t t1
+  left join t t2 on t1.a = t2.a
+       join t t3 on true
+where exists (select 1 from t t4
+                join t t5 on t4.b = t5.b
+                join t t6 on t5.b = t6.b
+              where t1.a = t4.a and t3.a = t5.a and t4.a = 1);
+ a 
+---
+ 1
+(1 row)
+
 rollback;
 -- test cases where we can remove a join, but not a PHV computed at it
 begin;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 5f0a475894d..817132d90de 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2420,6 +2420,32 @@ where t1.a = s.c;
 
 rollback;
 
+-- check handling of semijoins after join removal: we must suppress
+-- unique-ification of known-constant values
+begin;
+
+create temp table t (a int unique, b int);
+insert into t values (1, 2);
+
+explain (verbose, costs off)
+select t1.a from t t1
+  left join t t2 on t1.a = t2.a
+       join t t3 on true
+where exists (select 1 from t t4
+                join t t5 on t4.b = t5.b
+                join t t6 on t5.b = t6.b
+              where t1.a = t4.a and t3.a = t5.a and t4.a = 1);
+
+select t1.a from t t1
+  left join t t2 on t1.a = t2.a
+       join t t3 on true
+where exists (select 1 from t t4
+                join t t5 on t4.b = t5.b
+                join t t6 on t5.b = t6.b
+              where t1.a = t4.a and t3.a = t5.a and t4.a = 1);
+
+rollback;
+
 -- test cases where we can remove a join, but not a PHV computed at it
 begin;
 
-- 
2.43.0

