Nicolas Adenis-Lamarre <[email protected]> 于2026年1月2日周五 06:28写道:
> Tender, > sorry, you patch is perfect. I badly tested it this morning. My fault. > There is just the case with the constant that is not handled (select * > from a left join (select 1 const1 from b) x where x.const1 is null) > but i don't think it worth handling it. > I added 2 regresssion tests in case you want to add them to your patch. > The first one is important, the second one is just to confirm that > subqueries are working. > I've added 3 new tables because in test_setup.sql and join.sql there > were no table with not null constraints. > > Again, thanks a lot. > Thanks for your provided test cases. I had added it to the v3 patch. Please have a look at the attached patches. -- Thanks, Tender Wang
From bb76e46a728339adcd19e90144c7ae5fea1bad77 Mon Sep 17 00:00:00 2001 From: Tender Wang <[email protected]> Date: Thu, 1 Jan 2026 13:44:18 +0800 Subject: [PATCH v3 1/2] Reduce JOIN_LEFT TO JOIN_ANTI. Since we have collected notnull constraints, so we can detect an anti-join, if we were to check whether Vars coming from the RHS must be non-null because of table constraints. For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL; If the b.z has non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI. --- src/backend/optimizer/prep/prepjointree.c | 64 +++++++++++++++++++++-- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c80bfc88d82..e346847cce0 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -177,6 +177,8 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid); static nullingrel_info *get_nullingrels(Query *parse); static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels, nullingrel_info *info); +static bool have_var_is_notnull(PlannerInfo *root, Relids right_rels, + List *forced_null_vars); /* @@ -3230,6 +3232,57 @@ reduce_outer_joins(PlannerInfo *root) } } +/* + * have_var_is_notnull + * check whether the vars from RHS of join have notnull constraints. + */ +static bool +have_var_is_notnull(PlannerInfo *root, Relids right_rels, List *forced_null_vars) +{ + ListCell *lc; + Bitmapset *cols; + int i; + int index; + RangeTblEntry *rte; + Bitmapset *notnullattnums; + bool result = false; + + i = 0; + foreach(lc, forced_null_vars) + { + cols = lfirst_node(Bitmapset, lc); + if (cols == NULL) + { + i++; + continue; + } + /* Skip if the var doesn't belong to RHS of join */ + if (!bms_is_member(i, right_rels)) + { + i++; + continue; + } + rte = rt_fetch(i, root->parse->rtable); + notnullattnums = find_relation_notnullatts(root, rte->relid); + index = -1; + while ((index = bms_next_member(cols, index)) >= 0) + { + AttrNumber attno = index + FirstLowInvalidHeapAttributeNumber; + if (bms_is_member(attno, notnullattnums)) + { + result = true; + break; + } + } + + if (result) + break; + i++; + } + + return result; +} + /* * reduce_outer_joins_pass1 - phase 1 data collection * @@ -3441,12 +3494,11 @@ reduce_outer_joins_pass2(Node *jtnode, /* * See if we can reduce JOIN_LEFT to JOIN_ANTI. This is the case if * the join's own quals are strict for any var that was forced null by - * higher qual levels. NOTE: there are other ways that we could - * detect an anti-join, in particular if we were to check whether Vars + * higher qual levels. Since we have collected notnull constraints, so + * we can detect an anti-join, if we were to check whether Vars * coming from the RHS must be non-null because of table constraints. - * That seems complicated and expensive though (in particular, one - * would have to be wary of lower outer joins). For the moment this - * seems sufficient. + * For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL; + * if the b.z had non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI. */ if (jointype == JOIN_LEFT) { @@ -3464,6 +3516,8 @@ reduce_outer_joins_pass2(Node *jtnode, overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); if (bms_overlap(overlap, right_state->relids)) jointype = JOIN_ANTI; + else if (have_var_is_notnull(root, right_state->relids, forced_null_vars)) + jointype = JOIN_ANTI; } /* -- 2.34.1
From b81d3993e65e449d2ea74aa1dd7827cd815b7b28 Mon Sep 17 00:00:00 2001 From: Nicolas Adenis-Lamarre <[email protected]> Date: Thu, 1 Jan 2026 23:09:54 +0100 Subject: [PATCH v3 2/2] Reduce JOIN_LEFT TO JOIN_ANTI : regression tests --- src/test/regress/expected/join.out | 40 ++++++++++++++++++++++++++++++ src/test/regress/sql/join.sql | 21 ++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d05a0ca0373..0d138d3d3c4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3194,6 +3194,15 @@ reset enable_nestloop; -- -- basic semijoin and antijoin recognition tests -- +CREATE TABLE aj1 (id integer primary key); +CREATE TABLE aj2 (id integer primary key, aj1_id integer); +CREATE TABLE aj3 (id integer primary key, aj2_id integer); +INSERT INTO aj1 VALUES (1); +INSERT INTO aj1 VALUES (2); +INSERT INTO aj2 VALUES (1, 1); +INSERT INTO aj2 VALUES (2, 1); +INSERT INTO aj3 VALUES (1, 2); +INSERT INTO aj3 VALUES (2, 2); explain (costs off) select a.* from tenk1 a where unique1 in (select unique2 from tenk1 b); @@ -3254,6 +3263,37 @@ where b.unique2 is null; -> Index Only Scan using tenk1_unique2 on tenk1 b (5 rows) +explain (costs off) +select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id +where aj2.id is null; + QUERY PLAN +------------------------------------ + Hash Right Anti Join + Hash Cond: (aj2.aj1_id = aj1.id) + -> Seq Scan on aj2 + -> Hash + -> Seq Scan on aj1 +(5 rows) + +explain (costs off) +select aj1.* from aj1 + left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id + from aj2 left join aj3 on aj2.id = aj3.aj2_id) x + on aj1.id = x.aj1_id +where x.aj2_id is null; + QUERY PLAN +------------------------------------------ + Hash Right Anti Join + Hash Cond: (aj2.aj1_id = aj1.id) + -> Hash Right Join + Hash Cond: (aj3.aj2_id = aj2.id) + -> Seq Scan on aj3 + -> Hash + -> Seq Scan on aj2 + -> Hash + -> Seq Scan on aj1 +(9 rows) + -- check that we avoid de-duplicating columns redundantly set enable_memoize to off; explain (costs off) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b91fb7574df..c21312191fb 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -837,6 +837,16 @@ reset enable_nestloop; -- -- basic semijoin and antijoin recognition tests -- +CREATE TABLE aj1 (id integer primary key); +CREATE TABLE aj2 (id integer primary key, aj1_id integer); +CREATE TABLE aj3 (id integer primary key, aj2_id integer); + +INSERT INTO aj1 VALUES (1); +INSERT INTO aj1 VALUES (2); +INSERT INTO aj2 VALUES (1, 1); +INSERT INTO aj2 VALUES (2, 1); +INSERT INTO aj3 VALUES (1, 2); +INSERT INTO aj3 VALUES (2, 2); explain (costs off) select a.* from tenk1 a @@ -859,6 +869,17 @@ explain (costs off) select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 where b.unique2 is null; +explain (costs off) +select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id +where aj2.id is null; + +explain (costs off) +select aj1.* from aj1 + left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id + from aj2 left join aj3 on aj2.id = aj3.aj2_id) x + on aj1.id = x.aj1_id +where x.aj2_id is null; + -- check that we avoid de-duplicating columns redundantly set enable_memoize to off; explain (costs off) -- 2.34.1
