From 8001e52ebe22c775179c9432ec91220a0d089cce Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 3 Nov 2022 16:12:54 +0800
Subject: [PATCH v2] Check SubPlan clause for nonnullable rels/Vars

---
 src/backend/optimizer/util/clauses.c | 46 +++++++++++++++++++++++
 src/test/regress/expected/join.out   | 56 ++++++++++++++++++++++++++++
 src/test/regress/sql/join.sql        | 28 ++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 7fb32a0710..ef3d3ab617 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -1534,6 +1534,29 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
 			bms_membership(phv->phrels) == BMS_SINGLETON)
 			result = bms_add_members(result, phv->phrels);
 	}
+	else if (IsA(node, SubPlan))
+	{
+		/* descend through testexpr for ALL/ANY/ROWCOMPARE */
+		SubPlan *splan = (SubPlan *) node;
+
+		/*
+		 * For ALL_SUBLINK, if the subplan produces zero rows, the result is
+		 * always TRUE.  So ALL_SUBLINK is not strict.
+		 *
+		 * For ANY_SUBLINK, if the subplan produces zero rows, the result is
+		 * always FALSE.  If the subplan produces more than one rows, the
+		 * per-row results are combined using OR semantics.  So ANY_SUBLINK can
+		 * be strict only at top level.
+		 *
+		 * For ROWCOMPARE_SUBLINK, if the subplan produces zero rows, the
+		 * result is always NULL.  Otherwise, the subplan is only allowed to
+		 * produce one row, and the result for ROWCOMPARE_SUBLINK is the same
+		 * as its testexpr's.
+		 */
+		if ((top_level && splan->subLinkType == ANY_SUBLINK) ||
+			splan->subLinkType == ROWCOMPARE_SUBLINK)
+			result = find_nonnullable_rels_walker(splan->testexpr, top_level);
+	}
 	return result;
 }
 
@@ -1742,6 +1765,29 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
 
 		result = find_nonnullable_vars_walker((Node *) phv->phexpr, top_level);
 	}
+	else if (IsA(node, SubPlan))
+	{
+		/* descend through testexpr for ALL/ANY/ROWCOMPARE */
+		SubPlan *splan = (SubPlan *) node;
+
+		/*
+		 * For ALL_SUBLINK, if the subplan produces zero rows, the result is
+		 * always TRUE.  So ALL_SUBLINK is not strict.
+		 *
+		 * For ANY_SUBLINK, if the subplan produces zero rows, the result is
+		 * always FALSE.  If the subplan produces more than one rows, the
+		 * per-row results are combined using OR semantics.  So ANY_SUBLINK can
+		 * be strict only at top level.
+		 *
+		 * For ROWCOMPARE_SUBLINK, if the subplan produces zero rows, the
+		 * result is always NULL.  Otherwise, the subplan is only allowed to
+		 * produce one row, and the result for ROWCOMPARE_SUBLINK is the same
+		 * as its testexpr's.
+		 */
+		if ((top_level && splan->subLinkType == ANY_SUBLINK) ||
+			splan->subLinkType == ROWCOMPARE_SUBLINK)
+			result = find_nonnullable_vars_walker(splan->testexpr, top_level);
+	}
 	return result;
 }
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index b901d7299f..b365af97db 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6720,3 +6720,59 @@ where exists (select 1 from j3
 (13 rows)
 
 drop table j3;
+--
+-- test check of SubPlan clause for nonnullable rels/Vars when reducing outer joins
+--
+begin;
+create temp table a (i int, j int);
+create temp table b (i int, j int);
+create temp table c (i int, j int);
+insert into a values (1,1), (2,2), (3,3);
+insert into b values (2,2), (3,3), (4,4);
+insert into c values (3,3), (4,4), (5,5);
+analyze a;
+analyze b;
+analyze c;
+explain (costs off)
+select * from a left join b on a.i = b.i where b.i = ANY (select i from c where c.j = b.j);
+            QUERY PLAN             
+-----------------------------------
+ Hash Join
+   Hash Cond: (b.i = a.i)
+   ->  Seq Scan on b
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on c
+                 Filter: (j = b.j)
+   ->  Hash
+         ->  Seq Scan on a
+(9 rows)
+
+select * from a left join b on a.i = b.i where b.i = ANY (select i from c where c.j = b.j);
+ i | j | i | j 
+---+---+---+---
+ 3 | 3 | 3 | 3
+(1 row)
+
+explain (costs off)
+select * from a left join b on b.i = ANY (select i from c where c.j = a.j) where b.i is null;
+         QUERY PLAN          
+-----------------------------
+ Nested Loop Anti Join
+   Join Filter: (SubPlan 1)
+   ->  Seq Scan on a
+   ->  Materialize
+         ->  Seq Scan on b
+   SubPlan 1
+     ->  Seq Scan on c
+           Filter: (j = a.j)
+(8 rows)
+
+select * from a left join b on b.i = ANY (select i from c where c.j = a.j) where b.i is null;
+ i | j | i | j 
+---+---+---+---
+ 1 | 1 |   |  
+ 2 | 2 |   |  
+(2 rows)
+
+rollback;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index ccbbe5454c..f218f92675 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2366,3 +2366,31 @@ where exists (select 1 from j3
       and t1.unique1 < 1;
 
 drop table j3;
+
+--
+-- test check of SubPlan clause for nonnullable rels/Vars when reducing outer joins
+--
+
+begin;
+
+create temp table a (i int, j int);
+create temp table b (i int, j int);
+create temp table c (i int, j int);
+
+insert into a values (1,1), (2,2), (3,3);
+insert into b values (2,2), (3,3), (4,4);
+insert into c values (3,3), (4,4), (5,5);
+
+analyze a;
+analyze b;
+analyze c;
+
+explain (costs off)
+select * from a left join b on a.i = b.i where b.i = ANY (select i from c where c.j = b.j);
+select * from a left join b on a.i = b.i where b.i = ANY (select i from c where c.j = b.j);
+
+explain (costs off)
+select * from a left join b on b.i = ANY (select i from c where c.j = a.j) where b.i is null;
+select * from a left join b on b.i = ANY (select i from c where c.j = a.j) where b.i is null;
+
+rollback;
-- 
2.31.0

