From f1ee02c90f42dfc8a3f49d43ad2c60e6c0030e96 Mon Sep 17 00:00:00 2001
From: Tender Wang <tndrwang@gmail.com>
Date: Thu, 1 Jan 2026 13:44:18 +0800
Subject: [PATCH v4] Reduce JOIN_LEFT TO JOIN_ANTI.

Since we have collected notnull constraints, so we can detect an anti-join,
if we were to check whether Vars coming from the RHS must be non-null because of table constraints.
For example,
SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL;
If the b.z has non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI.
---
 src/backend/optimizer/prep/prepjointree.c | 90 ++++++++++++++++++-----
 src/test/regress/expected/join.out        | 40 ++++++++++
 src/test/regress/sql/join.sql             | 21 ++++++
 3 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index c80bfc88d82..d444637080a 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -177,6 +177,8 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
 static nullingrel_info *get_nullingrels(Query *parse);
 static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
 									nullingrel_info *info);
+static bool have_var_is_notnull(PlannerInfo *root, Relids right_rels,
+								List *forced_null_vars);
 
 
 /*
@@ -3230,6 +3232,57 @@ reduce_outer_joins(PlannerInfo *root)
 	}
 }
 
+/*
+ * have_var_is_notnull
+ * check whether the vars from RHS of join have notnull constraints.
+ */
+static bool
+have_var_is_notnull(PlannerInfo *root, Relids right_rels, List *forced_null_vars)
+{
+	ListCell *lc;
+	Bitmapset *cols;
+	int i;
+	int index;
+	RangeTblEntry *rte;
+	Bitmapset *notnullattnums;
+	bool result = false;
+
+	i = 0;
+	foreach(lc, forced_null_vars)
+	{
+		cols = lfirst_node(Bitmapset, lc);
+		if (cols == NULL)
+		{
+			i++;
+			continue;
+		}
+		/* Skip if the var doesn't belong to RHS of join */
+		if (!bms_is_member(i, right_rels))
+		{
+			i++;
+			continue;
+		}
+		rte = rt_fetch(i, root->parse->rtable);
+		notnullattnums = find_relation_notnullatts(root, rte->relid);
+		index = -1;
+		while ((index = bms_next_member(cols, index)) >= 0)
+		{
+			AttrNumber attno = index + FirstLowInvalidHeapAttributeNumber;
+			if (bms_is_member(attno, notnullattnums))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		if (result)
+			break;
+		i++;
+	}
+
+	return result;
+}
+
 /*
  * reduce_outer_joins_pass1 - phase 1 data collection
  *
@@ -3441,29 +3494,32 @@ reduce_outer_joins_pass2(Node *jtnode,
 		/*
 		 * See if we can reduce JOIN_LEFT to JOIN_ANTI.  This is the case if
 		 * the join's own quals are strict for any var that was forced null by
-		 * higher qual levels.  NOTE: there are other ways that we could
-		 * detect an anti-join, in particular if we were to check whether Vars
+		 * higher qual levels.  Since we have collected notnull constraints, so
+		 * we can detect an anti-join, if we were to check whether Vars
 		 * coming from the RHS must be non-null because of table constraints.
-		 * That seems complicated and expensive though (in particular, one
-		 * would have to be wary of lower outer joins). For the moment this
-		 * seems sufficient.
+		 * For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL;
+		 * if the b.z had non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI.
 		 */
 		if (jointype == JOIN_LEFT)
 		{
-			List	   *nonnullable_vars;
-			Bitmapset  *overlap;
+			if(forced_null_vars != NIL) {
+				List	   *nonnullable_vars;
+				Bitmapset  *overlap;
 
-			/* Find Vars in j->quals that must be non-null in joined rows */
-			nonnullable_vars = find_nonnullable_vars(j->quals);
+				/* Find Vars in j->quals that must be non-null in joined rows */
+				nonnullable_vars = find_nonnullable_vars(j->quals);
 
-			/*
-			 * It's not sufficient to check whether nonnullable_vars and
-			 * forced_null_vars overlap: we need to know if the overlap
-			 * includes any RHS variables.
-			 */
-			overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars);
-			if (bms_overlap(overlap, right_state->relids))
-				jointype = JOIN_ANTI;
+				/*
+				 * It's not sufficient to check whether nonnullable_vars and
+				 * forced_null_vars overlap: we need to know if the overlap
+				 * includes any RHS variables.
+				 */
+				overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars);
+				if (bms_overlap(overlap, right_state->relids))
+					jointype = JOIN_ANTI;
+				else if (have_var_is_notnull(root, right_state->relids, forced_null_vars))
+					jointype = JOIN_ANTI;
+			}
 		}
 
 		/*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d05a0ca0373..0d138d3d3c4 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3194,6 +3194,15 @@ reset enable_nestloop;
 --
 -- basic semijoin and antijoin recognition tests
 --
+CREATE TABLE aj1 (id integer primary key);
+CREATE TABLE aj2 (id integer primary key, aj1_id integer);
+CREATE TABLE aj3 (id integer primary key, aj2_id integer);
+INSERT INTO aj1 VALUES (1);
+INSERT INTO aj1 VALUES (2);
+INSERT INTO aj2 VALUES (1, 1);
+INSERT INTO aj2 VALUES (2, 1);
+INSERT INTO aj3 VALUES (1, 2);
+INSERT INTO aj3 VALUES (2, 2);
 explain (costs off)
 select a.* from tenk1 a
 where unique1 in (select unique2 from tenk1 b);
@@ -3254,6 +3263,37 @@ where b.unique2 is null;
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (5 rows)
 
+explain (costs off)
+select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id
+where aj2.id is null;
+             QUERY PLAN             
+------------------------------------
+ Hash Right Anti Join
+   Hash Cond: (aj2.aj1_id = aj1.id)
+   ->  Seq Scan on aj2
+   ->  Hash
+         ->  Seq Scan on aj1
+(5 rows)
+
+explain (costs off)
+select aj1.* from aj1
+  left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id
+      from aj2 left join aj3 on aj2.id = aj3.aj2_id) x
+        on aj1.id = x.aj1_id
+where x.aj2_id is null;
+                QUERY PLAN                
+------------------------------------------
+ Hash Right Anti Join
+   Hash Cond: (aj2.aj1_id = aj1.id)
+   ->  Hash Right Join
+         Hash Cond: (aj3.aj2_id = aj2.id)
+         ->  Seq Scan on aj3
+         ->  Hash
+               ->  Seq Scan on aj2
+   ->  Hash
+         ->  Seq Scan on aj1
+(9 rows)
+
 -- check that we avoid de-duplicating columns redundantly
 set enable_memoize to off;
 explain (costs off)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index b91fb7574df..c21312191fb 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -837,6 +837,16 @@ reset enable_nestloop;
 --
 -- basic semijoin and antijoin recognition tests
 --
+CREATE TABLE aj1 (id integer primary key);
+CREATE TABLE aj2 (id integer primary key, aj1_id integer);
+CREATE TABLE aj3 (id integer primary key, aj2_id integer);
+
+INSERT INTO aj1 VALUES (1);
+INSERT INTO aj1 VALUES (2);
+INSERT INTO aj2 VALUES (1, 1);
+INSERT INTO aj2 VALUES (2, 1);
+INSERT INTO aj3 VALUES (1, 2);
+INSERT INTO aj3 VALUES (2, 2);
 
 explain (costs off)
 select a.* from tenk1 a
@@ -859,6 +869,17 @@ explain (costs off)
 select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2
 where b.unique2 is null;
 
+explain (costs off)
+select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id
+where aj2.id is null;
+
+explain (costs off)
+select aj1.* from aj1
+  left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id
+      from aj2 left join aj3 on aj2.id = aj3.aj2_id) x
+        on aj1.id = x.aj1_id
+where x.aj2_id is null;
+
 -- check that we avoid de-duplicating columns redundantly
 set enable_memoize to off;
 explain (costs off)
-- 
2.34.1

