From f0812129f4925289d41d59ea9de549281bfd43bc Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Tue, 13 Jun 2023 10:23:33 +0800
Subject: [PATCH v3] Fix nulling bitmap for Memoize cache keys

---
 src/backend/optimizer/path/joinpath.c | 37 +++++++++++++++++++++++++++
 src/backend/optimizer/plan/setrefs.c  | 10 ++++----
 src/test/regress/expected/join.out    | 21 +++++++++++++++
 src/test/regress/sql/join.sql         |  7 +++++
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index cd80e61fd7..4e585ffe99 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -427,6 +427,24 @@ have_unsafe_outer_join_ref(PlannerInfo *root,
  * Additionally we also collect the outer exprs and the hash operators for
  * each parameter to innerrel.  These set in 'param_exprs', 'operators' and
  * 'binary_mode' when we return true.
+ *
+ * An additional complication is that innerrel's lateral_vars may contain
+ * nullingrel markers that need adjustment.  This occurs if we have applied
+ * outer join identity 3,
+ *		(A leftjoin B on (Pab)) leftjoin C on (Pb*c)
+ *		= A leftjoin (B leftjoin C on (Pbc)) on (Pab)
+ * and C contains lateral references to B.  It's still safe to apply the
+ * identity, but the parser will have created those references in the form
+ * "b*" (i.e., with varnullingrels listing the A/B join), while what we will
+ * have available from the nestloop's outer side is just "b".  We deal with
+ * that here by stripping the nullingrels down to what is available from the
+ * outer side according to outerrel->relids.
+ * That fixes matters for the case of forward application of identity 3.  If
+ * the identity was applied in the reverse direction, we will have
+ * innerrel's lateral_vars containing too few nullingrel bits rather than
+ * too many.  Currently, that causes no problems because setrefs.c applies
+ * only a subset check to nullingrels in NestLoopParams, but we'd have to
+ * work harder if we ever want to tighten that check.
  */
 static bool
 paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
@@ -529,6 +547,25 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
 			return false;
 		}
 
+		/* add it after adjusting its nullingrels */
+		expr = copyObject(expr);
+		if (IsA(expr, Var))
+		{
+			Var		   *var = (Var *) expr;
+
+			var->varnullingrels = bms_intersect(var->varnullingrels,
+												outerrel->relids);
+		}
+		else if (IsA(expr, PlaceHolderVar))
+		{
+			PlaceHolderVar *phv = (PlaceHolderVar *) expr;
+
+			phv->phnullingrels = bms_intersect(phv->phnullingrels,
+											   outerrel->relids);
+		}
+		else
+			Assert(false);
+
 		*operators = lappend_oid(*operators, typentry->eq_opr);
 		*param_exprs = lappend(*param_exprs, expr);
 
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 3585a703fb..97868ba4ac 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2289,11 +2289,11 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
 			 * the outer-join level at which they are used, Vars seen in the
 			 * NestLoopParam expression may have nullingrels that are just a
 			 * subset of those in the Vars actually available from the outer
-			 * side.  Another case that can cause that to happen is explained
-			 * in the comments for process_subquery_nestloop_params.  Not
-			 * checking this exactly is a bit grotty, but the work needed to
-			 * make things match up perfectly seems well out of proportion to
-			 * the value.
+			 * side.  Another two cases that can cause that to happen is
+			 * explained in the comments for process_subquery_nestloop_params
+			 * and paraminfo_get_equal_hashops.  Not checking this exactly
+			 * is a bit grotty, but the work needed to make things match up
+			 * perfectly seems well out of proportion to the value.
 			 */
 			nlp->paramval = (Var *) fix_upper_expr(root,
 												   (Node *) nlp->paramval,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 4999c99f3b..98b2667821 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2607,6 +2607,27 @@ select * from int8_tbl t1
                      Filter: (q1 = t2.q1)
 (8 rows)
 
+explain (costs off)
+select * from onek t1
+    left join onek t2 on true
+    left join lateral
+      (select * from onek t3 where t3.two = t2.two offset 0) s
+      on t2.unique1 = 1;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop Left Join
+   ->  Seq Scan on onek t1
+   ->  Materialize
+         ->  Nested Loop Left Join
+               Join Filter: (t2.unique1 = 1)
+               ->  Seq Scan on onek t2
+               ->  Memoize
+                     Cache Key: t2.two
+                     Cache Mode: binary
+                     ->  Seq Scan on onek t3
+                           Filter: (two = t2.two)
+(11 rows)
+
 --
 -- check a case where we formerly got confused by conflicting sort orders
 -- in redundant merge join path keys
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 56ca759772..7daa390b1d 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -521,6 +521,13 @@ select * from int8_tbl t1
       (select * from int8_tbl t3 where t3.q1 = t2.q1 offset 0) s
       on t2.q1 = 1;
 
+explain (costs off)
+select * from onek t1
+    left join onek t2 on true
+    left join lateral
+      (select * from onek t3 where t3.two = t2.two offset 0) s
+      on t2.unique1 = 1;
+
 --
 -- check a case where we formerly got confused by conflicting sort orders
 -- in redundant merge join path keys
-- 
2.31.0

