From e0bc064032af36d027c497aa5e4302d4fb667950 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Wed, 9 Oct 2024 18:44:25 +0300
Subject: [PATCH v45] Allow usage of match_orclause_to_indexcol() for joins

This commit allows transformation of OR-clauses into SAOP's for index scans
within nested loop joins.  That required the following changes.

 1. Make match_orclause_to_indexcol() and group_similar_or_args() understand
    const-ness in the same way as match_opclause_to_indexcol().  This
    generally makes our approach more uniform.
 2. Make match_join_clauses_to_index() pass OR-clauses to
    match_clause_to_index().
---
 src/backend/optimizer/path/indxpath.c        | 61 +++++++++++---------
 src/test/regress/expected/join.out           |  9 +--
 src/test/regress/expected/partition_join.out | 12 ++--
 3 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 5f428e835b0..c03547a9973 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1255,6 +1255,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
 	ListCell   *lc2;
 	List	   *orargs;
 	List	   *result = NIL;
+	Index		relid = rel->relid;
 
 	Assert(IsA(rinfo->orclause, BoolExpr));
 	orargs = ((BoolExpr *) rinfo->orclause)->args;
@@ -1319,10 +1320,13 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
 		/*
 		 * Check for clauses of the form: (indexkey operator constant) or
 		 * (constant operator indexkey).  But we don't know a particular index
-		 * yet.  First check for a constant, which must be Const or Param.
-		 * That's cheaper than search for an index key among all indexes.
+		 * yet.  Therefore, we try to distinguish the potential index key and
+		 * constant first, then search for a matching index key among all
+		 * indexes.
 		 */
-		if (IsA(leftop, Const) || IsA(leftop, Param))
+		if (bms_is_member(relid, argrinfo->right_relids) &&
+			!bms_is_member(relid, argrinfo->left_relids) &&
+			!contain_volatile_functions(leftop))
 		{
 			opno = get_commutator(opno);
 
@@ -1333,7 +1337,9 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
 			}
 			nonConstExpr = rightop;
 		}
-		else if (IsA(rightop, Const) || IsA(rightop, Param))
+		else if (bms_is_member(relid, argrinfo->left_relids) &&
+				 !bms_is_member(relid, argrinfo->right_relids) &&
+				 !contain_volatile_functions(rightop))
 		{
 			nonConstExpr = leftop;
 		}
@@ -2435,8 +2441,8 @@ match_join_clauses_to_index(PlannerInfo *root,
 		/* Potentially usable, so see if it matches the index or is an OR */
 		if (restriction_is_or_clause(rinfo))
 			*joinorclauses = lappend(*joinorclauses, rinfo);
-		else
-			match_clause_to_index(root, rinfo, index, clauseset);
+
+		match_clause_to_index(root, rinfo, index, clauseset);
 	}
 }
 
@@ -2585,10 +2591,7 @@ match_clause_to_index(PlannerInfo *root,
  *	  (3)  must match the collation of the index, if collation is relevant.
  *
  *	  Our definition of "const" is exceedingly liberal: we allow anything that
- *	  doesn't involve a volatile function or a Var of the index's relation
- *	  except for a boolean OR expression input: due to a trade-off between the
- *	  expected execution speedup and planning complexity, we limit or->saop
- *	  transformation by obvious cases when an index scan can get a profit.
+ *	  doesn't involve a volatile function or a Var of the index's relation.
  *	  In particular, Vars belonging to other relations of the query are
  *	  accepted here, since a clause of that form can be used in a
  *	  parameterized indexscan.  It's the responsibility of higher code levels
@@ -3246,7 +3249,8 @@ match_orclause_to_indexcol(PlannerInfo *root,
 	Oid			arraytype = InvalidOid;
 	Oid			inputcollid = InvalidOid;
 	bool		firstTime = true;
-	bool		haveParam = false;
+	bool		haveNonConst = false;
+	Index		indexRelid = index->rel->relid;
 
 	Assert(IsA(orclause, BoolExpr));
 	Assert(orclause->boolop == OR_EXPR);
@@ -3258,10 +3262,9 @@ match_orclause_to_indexcol(PlannerInfo *root,
 	/*
 	 * Try to convert a list of OR-clauses to a single SAOP expression. Each
 	 * OR entry must be in the form: (indexkey operator constant) or (constant
-	 * operator indexkey).  Operators of all the entries must match.  Constant
-	 * might be either Const or Param.  To be effective, give up on the first
-	 * non-matching entry.  Exit is implemented as a break from the loop,
-	 * which is catched afterwards.
+	 * operator indexkey).  Operators of all the entries must match.  To be
+	 * effective, give up on the first non-matching entry.  Exit is
+	 * implemented as a break from the loop, which is catched afterwards.
 	 */
 	foreach(lc, orclause->args)
 	{
@@ -3312,17 +3315,21 @@ match_orclause_to_indexcol(PlannerInfo *root,
 
 		/*
 		 * Check for clauses of the form: (indexkey operator constant) or
-		 * (constant operator indexkey).  Determine indexkey side first, check
-		 * the constant later.
+		 * (constant operator indexkey).  See match_clause_to_indexcol's notes
+		 * about const-ness.
 		 */
 		leftop = (Node *) linitial(subClause->args);
 		rightop = (Node *) lsecond(subClause->args);
-		if (match_index_to_operand(leftop, indexcol, index))
+		if (match_index_to_operand(leftop, indexcol, index) &&
+			!bms_is_member(indexRelid, rinfo->right_relids) &&
+			!contain_volatile_functions(rightop))
 		{
 			indexExpr = leftop;
 			constExpr = rightop;
 		}
-		else if (match_index_to_operand(rightop, indexcol, index))
+		else if (match_index_to_operand(rightop, indexcol, index) &&
+			!bms_is_member(indexRelid, rinfo->left_relids) &&
+			!contain_volatile_functions(leftop))
 		{
 			opno = get_commutator(opno);
 			if (!OidIsValid(opno))
@@ -3349,10 +3356,6 @@ match_orclause_to_indexcol(PlannerInfo *root,
 		if (IsA(indexExpr, RelabelType))
 			indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
 
-		/* We allow constant to be Const or Param */
-		if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
-			break;
-
 		/* Forbid transformation for composite types, records. */
 		if (type_is_rowtype(exprType(constExpr)) ||
 			type_is_rowtype(exprType(indexExpr)))
@@ -3389,8 +3392,12 @@ match_orclause_to_indexcol(PlannerInfo *root,
 				break;
 		}
 
-		if (IsA(constExpr, Param))
-			haveParam = true;
+		/*
+		 * Check if our list of constants in match_clause_to_indexcol's
+		 * understanding of const-ness have something other than Const.
+		 */
+		if (!IsA(constExpr, Const))
+			haveNonConst = true;
 		consts = lappend(consts, constExpr);
 	}
 
@@ -3407,10 +3414,10 @@ match_orclause_to_indexcol(PlannerInfo *root,
 
 	/*
 	 * Assemble an array from the list of constants.  It seems more profitable
-	 * to build a const array.  But in the presence of parameters, we don't
+	 * to build a const array.  But in the presence of other nodes, we don't
 	 * have a specific value here and must employ an ArrayExpr instead.
 	 */
-	if (haveParam)
+	if (haveNonConst)
 	{
 		ArrayExpr  *arrayExpr = makeNode(ArrayExpr);
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 079fcf46f0d..634eff4d751 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3849,14 +3849,11 @@ where q1 = thousand or q2 = thousand;
                ->  Seq Scan on q2
          ->  Bitmap Heap Scan on tenk1
                Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
-               ->  BitmapOr
-                     ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: (thousand = q1.q1)
-                     ->  Bitmap Index Scan on tenk1_thous_tenthous
-                           Index Cond: (thousand = q2.q2)
+               ->  Bitmap Index Scan on tenk1_thous_tenthous
+                     Index Cond: (thousand = ANY (ARRAY[q1.q1, q2.q2]))
    ->  Hash
          ->  Seq Scan on int4_tbl
-(15 rows)
+(12 rows)
 
 explain (costs off)
 select * from
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 108f9ecb445..af468682a2d 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2533,24 +2533,24 @@ where not exists (select 1 from prtx2
          ->  Seq Scan on prtx1_1
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_1
-               Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
+               Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
                Filter: (a = prtx1_1.a)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on prtx2_1_b_idx
-                           Index Cond: (b = (prtx1_1.b + 1))
                      ->  Bitmap Index Scan on prtx2_1_c_idx
                            Index Cond: (c = 99)
+                     ->  Bitmap Index Scan on prtx2_1_b_idx
+                           Index Cond: (b = (prtx1_1.b + 1))
    ->  Nested Loop Anti Join
          ->  Seq Scan on prtx1_2
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_2
-               Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
+               Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
                Filter: (a = prtx1_2.a)
                ->  BitmapOr
-                     ->  Bitmap Index Scan on prtx2_2_b_idx
-                           Index Cond: (b = (prtx1_2.b + 1))
                      ->  Bitmap Index Scan on prtx2_2_c_idx
                            Index Cond: (c = 99)
+                     ->  Bitmap Index Scan on prtx2_2_b_idx
+                           Index Cond: (b = (prtx1_2.b + 1))
 (23 rows)
 
 select * from prtx1
-- 
2.39.5 (Apple Git-154)

