Nicolas Adenis-Lamarre <[email protected]> 于2026年1月2日周五
06:28写道:

> Tender,
> sorry, you patch is perfect. I badly tested it this morning. My fault.
> There is just the case with the constant that is not handled (select *
> from a left join (select 1 const1 from b) x where x.const1 is null)
> but i don't think it worth handling it.
> I added 2 regresssion tests in case you want to add them to your patch.
> The first one is important, the second one is just to confirm that
> subqueries are working.
> I've added 3 new tables because in test_setup.sql and join.sql there
> were no table with not null constraints.
>
> Again, thanks a lot.
>

Thanks for your provided test cases. I had added it to the v3 patch.
Please have a look at the attached patches.
-- 
Thanks,
Tender Wang
From bb76e46a728339adcd19e90144c7ae5fea1bad77 Mon Sep 17 00:00:00 2001
From: Tender Wang <[email protected]>
Date: Thu, 1 Jan 2026 13:44:18 +0800
Subject: [PATCH v3 1/2] Reduce JOIN_LEFT TO JOIN_ANTI.

Since we have collected notnull constraints, so we can detect an anti-join,
if we were to check whether Vars coming from the RHS must be non-null because 
of table constraints.
For example,
SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL;
If the b.z has non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI.
---
 src/backend/optimizer/prep/prepjointree.c | 64 +++++++++++++++++++++--
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c 
b/src/backend/optimizer/prep/prepjointree.c
index c80bfc88d82..e346847cce0 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -177,6 +177,8 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int 
relid);
 static nullingrel_info *get_nullingrels(Query *parse);
 static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
                                                                        
nullingrel_info *info);
+static bool have_var_is_notnull(PlannerInfo *root, Relids right_rels,
+                                                               List 
*forced_null_vars);
 
 
 /*
@@ -3230,6 +3232,57 @@ reduce_outer_joins(PlannerInfo *root)
        }
 }
 
+/*
+ * have_var_is_notnull
+ * check whether the vars from RHS of join have notnull constraints.
+ */
+static bool
+have_var_is_notnull(PlannerInfo *root, Relids right_rels, List 
*forced_null_vars)
+{
+       ListCell *lc;
+       Bitmapset *cols;
+       int i;
+       int index;
+       RangeTblEntry *rte;
+       Bitmapset *notnullattnums;
+       bool result = false;
+
+       i = 0;
+       foreach(lc, forced_null_vars)
+       {
+               cols = lfirst_node(Bitmapset, lc);
+               if (cols == NULL)
+               {
+                       i++;
+                       continue;
+               }
+               /* Skip if the var doesn't belong to RHS of join */
+               if (!bms_is_member(i, right_rels))
+               {
+                       i++;
+                       continue;
+               }
+               rte = rt_fetch(i, root->parse->rtable);
+               notnullattnums = find_relation_notnullatts(root, rte->relid);
+               index = -1;
+               while ((index = bms_next_member(cols, index)) >= 0)
+               {
+                       AttrNumber attno = index + 
FirstLowInvalidHeapAttributeNumber;
+                       if (bms_is_member(attno, notnullattnums))
+                       {
+                               result = true;
+                               break;
+                       }
+               }
+
+               if (result)
+                       break;
+               i++;
+       }
+
+       return result;
+}
+
 /*
  * reduce_outer_joins_pass1 - phase 1 data collection
  *
@@ -3441,12 +3494,11 @@ reduce_outer_joins_pass2(Node *jtnode,
                /*
                 * See if we can reduce JOIN_LEFT to JOIN_ANTI.  This is the 
case if
                 * the join's own quals are strict for any var that was forced 
null by
-                * higher qual levels.  NOTE: there are other ways that we could
-                * detect an anti-join, in particular if we were to check 
whether Vars
+                * higher qual levels.  Since we have collected notnull 
constraints, so
+                * we can detect an anti-join, if we were to check whether Vars
                 * coming from the RHS must be non-null because of table 
constraints.
-                * That seems complicated and expensive though (in particular, 
one
-                * would have to be wary of lower outer joins). For the moment 
this
-                * seems sufficient.
+                * For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) 
WHERE b.z IS NULL;
+                * if the b.z had non-null table constraint, we can reduce 
JOIN_LEFT to JOIN_ANTI.
                 */
                if (jointype == JOIN_LEFT)
                {
@@ -3464,6 +3516,8 @@ reduce_outer_joins_pass2(Node *jtnode,
                        overlap = mbms_overlap_sets(nonnullable_vars, 
forced_null_vars);
                        if (bms_overlap(overlap, right_state->relids))
                                jointype = JOIN_ANTI;
+                       else if (have_var_is_notnull(root, right_state->relids, 
forced_null_vars))
+                               jointype = JOIN_ANTI;
                }
 
                /*
-- 
2.34.1

From b81d3993e65e449d2ea74aa1dd7827cd815b7b28 Mon Sep 17 00:00:00 2001
From: Nicolas Adenis-Lamarre <[email protected]>
Date: Thu, 1 Jan 2026 23:09:54 +0100
Subject: [PATCH v3 2/2] Reduce JOIN_LEFT TO JOIN_ANTI : regression tests

---
 src/test/regress/expected/join.out | 40 ++++++++++++++++++++++++++++++
 src/test/regress/sql/join.sql      | 21 ++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/src/test/regress/expected/join.out 
b/src/test/regress/expected/join.out
index d05a0ca0373..0d138d3d3c4 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3194,6 +3194,15 @@ reset enable_nestloop;
 --
 -- basic semijoin and antijoin recognition tests
 --
+CREATE TABLE aj1 (id integer primary key);
+CREATE TABLE aj2 (id integer primary key, aj1_id integer);
+CREATE TABLE aj3 (id integer primary key, aj2_id integer);
+INSERT INTO aj1 VALUES (1);
+INSERT INTO aj1 VALUES (2);
+INSERT INTO aj2 VALUES (1, 1);
+INSERT INTO aj2 VALUES (2, 1);
+INSERT INTO aj3 VALUES (1, 2);
+INSERT INTO aj3 VALUES (2, 2);
 explain (costs off)
 select a.* from tenk1 a
 where unique1 in (select unique2 from tenk1 b);
@@ -3254,6 +3263,37 @@ where b.unique2 is null;
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (5 rows)
 
+explain (costs off)
+select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id
+where aj2.id is null;
+             QUERY PLAN             
+------------------------------------
+ Hash Right Anti Join
+   Hash Cond: (aj2.aj1_id = aj1.id)
+   ->  Seq Scan on aj2
+   ->  Hash
+         ->  Seq Scan on aj1
+(5 rows)
+
+explain (costs off)
+select aj1.* from aj1
+  left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id
+      from aj2 left join aj3 on aj2.id = aj3.aj2_id) x
+        on aj1.id = x.aj1_id
+where x.aj2_id is null;
+                QUERY PLAN                
+------------------------------------------
+ Hash Right Anti Join
+   Hash Cond: (aj2.aj1_id = aj1.id)
+   ->  Hash Right Join
+         Hash Cond: (aj3.aj2_id = aj2.id)
+         ->  Seq Scan on aj3
+         ->  Hash
+               ->  Seq Scan on aj2
+   ->  Hash
+         ->  Seq Scan on aj1
+(9 rows)
+
 -- check that we avoid de-duplicating columns redundantly
 set enable_memoize to off;
 explain (costs off)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index b91fb7574df..c21312191fb 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -837,6 +837,16 @@ reset enable_nestloop;
 --
 -- basic semijoin and antijoin recognition tests
 --
+CREATE TABLE aj1 (id integer primary key);
+CREATE TABLE aj2 (id integer primary key, aj1_id integer);
+CREATE TABLE aj3 (id integer primary key, aj2_id integer);
+
+INSERT INTO aj1 VALUES (1);
+INSERT INTO aj1 VALUES (2);
+INSERT INTO aj2 VALUES (1, 1);
+INSERT INTO aj2 VALUES (2, 1);
+INSERT INTO aj3 VALUES (1, 2);
+INSERT INTO aj3 VALUES (2, 2);
 
 explain (costs off)
 select a.* from tenk1 a
@@ -859,6 +869,17 @@ explain (costs off)
 select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2
 where b.unique2 is null;
 
+explain (costs off)
+select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id
+where aj2.id is null;
+
+explain (costs off)
+select aj1.* from aj1
+  left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id
+      from aj2 left join aj3 on aj2.id = aj3.aj2_id) x
+        on aj1.id = x.aj1_id
+where x.aj2_id is null;
+
 -- check that we avoid de-duplicating columns redundantly
 set enable_memoize to off;
 explain (costs off)
-- 
2.34.1

Reply via email to