From cd9f79c71e0bdcc8cbf62f319649d5359a74ea7b Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 2 Dec 2024 10:31:39 +0900
Subject: [PATCH v6] Avoid unnecessary wrapping for Vars and PHVs

When pulling up a lateral subquery that is under an outer join, the
current code always wraps a Var or PHV in the subquery's targetlist
into a new PlaceHolderVar if it is a lateral reference to something
outside the subquery.  This is necessary when the Var/PHV references
the non-nullable side of the outer join from the nullable side: we
need to ensure that it is evaluated at the right place and hence is
forced to null when the outer join should do so.  However, if the
referenced rel is under the same lowest nulling outer join, we can
actually omit the wrapping.  That's safe because if the subquery
variable is forced to NULL by the outer join, the lateral reference
variable will come out as NULL too.  It could be beneficial to get rid
of such PHVs because they imply lateral dependencies, which force us
to resort to nestloop joins.

This patch leverages the newly introduced nullingrel_info to check if
the nullingrels of the subquery RTE are a subset of those of the
laterally referenced rel, in order to determine if the referenced rel
is under the same lowest nulling outer join.

No backpatch as this could result in plan changes.

Author: Richard Guo
Reviewed-by: James Coleman, Dmitry Dolgov
Discussion: https://postgr.es/m/CAMbWs48uk6C7Z9m_FNT8_21CMCk68hrgAsz=z6zpP1PNZMkeoQ@mail.gmail.com
---
 src/backend/optimizer/prep/prepjointree.c |  39 ++++--
 src/test/regress/expected/subselect.out   | 138 ++++++++++++++++++++++
 src/test/regress/sql/subselect.sql        |  36 ++++++
 3 files changed, 204 insertions(+), 9 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 2ebd938f6b..3fa4d78c3e 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -2598,26 +2598,47 @@ pullup_replace_vars_callback(Var *var,
 				/*
 				 * Simple Vars always escape being wrapped, unless they are
 				 * lateral references to something outside the subquery being
-				 * pulled up.  (Even then, we could omit the PlaceHolderVar if
-				 * the referenced rel is under the same lowest outer join, but
-				 * it doesn't seem worth the trouble to check that.)
+				 * pulled up and the referenced rel is not under the same
+				 * lowest nulling outer join.
 				 */
+				wrap = false;
 				if (rcon->target_rte->lateral &&
 					!bms_is_member(((Var *) newnode)->varno, rcon->relids))
-					wrap = true;
-				else
-					wrap = false;
+				{
+					nullingrel_info *nullinfo = rcon->nullinfo;
+					int			lvarno = ((Var *) newnode)->varno;
+
+					Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
+					if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
+									   nullinfo->nullingrels[lvarno]))
+						wrap = true;
+				}
 			}
 			else if (newnode && IsA(newnode, PlaceHolderVar) &&
 					 ((PlaceHolderVar *) newnode)->phlevelsup == 0)
 			{
 				/* The same rules apply for a PlaceHolderVar */
+				wrap = false;
 				if (rcon->target_rte->lateral &&
 					!bms_is_subset(((PlaceHolderVar *) newnode)->phrels,
 								   rcon->relids))
-					wrap = true;
-				else
-					wrap = false;
+				{
+					nullingrel_info *nullinfo = rcon->nullinfo;
+					Relids		lvarnos = ((PlaceHolderVar *) newnode)->phrels;
+					int			lvarno;
+
+					lvarno = -1;
+					while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
+					{
+						Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
+						if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
+										   nullinfo->nullingrels[lvarno]))
+						{
+							wrap = true;
+							break;
+						}
+					}
+				}
 			}
 			else if (rcon->wrap_non_vars)
 			{
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 6c1fb2bfdb..566dec67fa 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1848,6 +1848,144 @@ order by 1, 2;
  4567890123456789 | 9135780246913578
 (11 rows)
 
+-- lateral references for simple Vars can escape being wrapped if the
+-- referenced rel is under the same lowest nulling outer join
+explain (verbose, costs off)
+select t1.q1, x from
+  int8_tbl t1 left join
+  (int8_tbl t2 inner join
+   lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
+  on t1.q1 = t2.q1
+order by 1, 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Output: t1.q1, t2.q2
+   Sort Key: t1.q1, t2.q2
+   ->  Hash Right Join
+         Output: t1.q1, t2.q2
+         Hash Cond: (t2.q1 = t1.q1)
+         ->  Hash Join
+               Output: t2.q2, t2.q1
+               Hash Cond: (t2.q2 = t3.q1)
+               ->  Seq Scan on public.int8_tbl t2
+                     Output: t2.q1, t2.q2
+               ->  Hash
+                     Output: t3.q1
+                     ->  Seq Scan on public.int8_tbl t3
+                           Output: t3.q1
+         ->  Hash
+               Output: t1.q1
+               ->  Seq Scan on public.int8_tbl t1
+                     Output: t1.q1
+(19 rows)
+
+select t1.q1, x from
+  int8_tbl t1 left join
+  (int8_tbl t2 inner join
+   lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
+  on t1.q1 = t2.q1
+order by 1, 2;
+        q1        |        x         
+------------------+------------------
+              123 | 4567890123456789
+              123 | 4567890123456789
+              123 | 4567890123456789
+              123 | 4567890123456789
+              123 | 4567890123456789
+              123 | 4567890123456789
+ 4567890123456789 |              123
+ 4567890123456789 |              123
+ 4567890123456789 |              123
+ 4567890123456789 |              123
+ 4567890123456789 |              123
+ 4567890123456789 |              123
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789
+(21 rows)
+
+-- lateral references for PHVs can also escape being wrapped if the
+-- referenced rel is under the same lowest nulling outer join
+explain (verbose, costs off)
+select ss2.* from
+  int8_tbl t1 left join
+  (int8_tbl t2 left join
+   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
+  on t1.q2 = ss2.q1
+order by 1, 2, 3;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Sort
+   Output: (COALESCE(t3.q1)), t4.q1, t4.q2
+   Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2
+   ->  Hash Right Join
+         Output: (COALESCE(t3.q1)), t4.q1, t4.q2
+         Hash Cond: (t4.q1 = t1.q2)
+         ->  Hash Join
+               Output: (COALESCE(t3.q1)), t4.q1, t4.q2
+               Hash Cond: (t2.q2 = t4.q1)
+               ->  Hash Left Join
+                     Output: t2.q2, (COALESCE(t3.q1))
+                     Hash Cond: (t2.q1 = t3.q2)
+                     ->  Seq Scan on public.int8_tbl t2
+                           Output: t2.q1, t2.q2
+                     ->  Hash
+                           Output: t3.q2, (COALESCE(t3.q1))
+                           ->  Seq Scan on public.int8_tbl t3
+                                 Output: t3.q2, COALESCE(t3.q1)
+               ->  Hash
+                     Output: t4.q1, t4.q2
+                     ->  Seq Scan on public.int8_tbl t4
+                           Output: t4.q1, t4.q2
+         ->  Hash
+               Output: t1.q2
+               ->  Seq Scan on public.int8_tbl t1
+                     Output: t1.q2
+(26 rows)
+
+select ss2.* from
+  int8_tbl t1 left join
+  (int8_tbl t2 left join
+   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
+  on t1.q2 = ss2.q1
+order by 1, 2, 3;
+        y         |        q1        |        q2         
+------------------+------------------+-------------------
+              123 |              123 |               456
+              123 |              123 |  4567890123456789
+              123 | 4567890123456789 | -4567890123456789
+              123 | 4567890123456789 | -4567890123456789
+              123 | 4567890123456789 |               123
+              123 | 4567890123456789 |               123
+              123 | 4567890123456789 |  4567890123456789
+              123 | 4567890123456789 |  4567890123456789
+ 4567890123456789 |              123 |               456
+ 4567890123456789 |              123 |  4567890123456789
+ 4567890123456789 | 4567890123456789 | -4567890123456789
+ 4567890123456789 | 4567890123456789 | -4567890123456789
+ 4567890123456789 | 4567890123456789 | -4567890123456789
+ 4567890123456789 | 4567890123456789 | -4567890123456789
+ 4567890123456789 | 4567890123456789 |               123
+ 4567890123456789 | 4567890123456789 |               123
+ 4567890123456789 | 4567890123456789 |               123
+ 4567890123456789 | 4567890123456789 |               123
+ 4567890123456789 | 4567890123456789 |  4567890123456789
+ 4567890123456789 | 4567890123456789 |  4567890123456789
+ 4567890123456789 | 4567890123456789 |  4567890123456789
+ 4567890123456789 | 4567890123456789 |  4567890123456789
+                  |                  |                  
+                  |                  |                  
+(24 rows)
+
 --
 -- Tests for CTE inlining behavior
 --
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index e5a562c3f5..23c6350d94 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -939,6 +939,42 @@ select t1.q1, x from
   on t1.q2 = t2.q2
 order by 1, 2;
 
+-- lateral references for simple Vars can escape being wrapped if the
+-- referenced rel is under the same lowest nulling outer join
+explain (verbose, costs off)
+select t1.q1, x from
+  int8_tbl t1 left join
+  (int8_tbl t2 inner join
+   lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
+  on t1.q1 = t2.q1
+order by 1, 2;
+
+select t1.q1, x from
+  int8_tbl t1 left join
+  (int8_tbl t2 inner join
+   lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
+  on t1.q1 = t2.q1
+order by 1, 2;
+
+-- lateral references for PHVs can also escape being wrapped if the
+-- referenced rel is under the same lowest nulling outer join
+explain (verbose, costs off)
+select ss2.* from
+  int8_tbl t1 left join
+  (int8_tbl t2 left join
+   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
+  on t1.q2 = ss2.q1
+order by 1, 2, 3;
+
+select ss2.* from
+  int8_tbl t1 left join
+  (int8_tbl t2 left join
+   (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
+   lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
+  on t1.q2 = ss2.q1
+order by 1, 2, 3;
+
 --
 -- Tests for CTE inlining behavior
 --
-- 
2.43.0

