Hi.

There's the following inconsistency between try_mergejoin_path() and create_mergejoin_plan(). When clause operator has no commutator, we can end up with mergejoin path. Later create_mergejoin_plan() will call get_switched_clauses(). This function can error out with

ERROR:  could not find commutator for operator XXX

The similar behavior seems to be present also for hash join.

Attaching a test case (in patch) and a possible fix.

--
Best regards,
Alexander Pyhalov,
Postgres Professional
From ea9497c9f62b3613482d5b267c7907070ba8fcd4 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyha...@postgrespro.ru>
Date: Mon, 17 Jun 2024 10:33:10 +0300
Subject: [PATCH] Merge and hash joins need more checks with custom operators

---
 src/backend/optimizer/path/joinpath.c    | 46 +++++++++++++++++++++++-
 src/test/regress/expected/equivclass.out | 45 +++++++++++++++++++++++
 src/test/regress/sql/equivclass.sql      | 20 +++++++++++
 3 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e095..dae06117569 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -95,7 +95,7 @@ static void generate_mergejoin_paths(PlannerInfo *root,
 									 Path *inner_cheapest_total,
 									 List *merge_pathkeys,
 									 bool is_partial);
-
+static bool all_clauses_compatible(Relids outerrelids, List *clauses);
 
 /*
  * add_paths_to_joinrel
@@ -972,6 +972,10 @@ try_mergejoin_path(PlannerInfo *root,
 		return;
 	}
 
+	/* Sometimes can't create merjejoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses))
+		return;
+
 	/*
 	 * If the given paths are already well enough ordered, we can skip doing
 	 * an explicit sort.
@@ -1036,6 +1040,10 @@ try_partial_mergejoin_path(PlannerInfo *root,
 {
 	JoinCostWorkspace workspace;
 
+	/* Sometimes can't create merjejoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses))
+		return;
+
 	/*
 	 * See comments in try_partial_hashjoin_path().
 	 */
@@ -1129,6 +1137,10 @@ try_hashjoin_path(PlannerInfo *root,
 		return;
 	}
 
+	/* Sometimes can't create hashjoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, hashclauses))
+		return;
+
 	/*
 	 * See comments in try_nestloop_path().  Also note that hashjoin paths
 	 * never have any output pathkeys, per comments in create_hashjoin_path.
@@ -2431,3 +2443,35 @@ select_mergejoin_clauses(PlannerInfo *root,
 
 	return result_list;
 }
+
+/*
+ * all_clauses_compatible
+ *
+ * Determine that all clauses' operators have commutator
+ * when necessary.  While constructing merge join plan or
+ * hash join plan, optimizer can call get_switched_clauses(),
+ * which can error out if clauses are not commutable.
+ * The logic here should match one in get_switched_clauses().
+ */
+static bool
+all_clauses_compatible(Relids outer_relids, List *clauses)
+{
+	ListCell   *l;
+
+	foreach(l, clauses)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+		OpExpr	   *clause = (OpExpr *) restrictinfo->clause;
+		Oid			opoid;
+
+		if (bms_is_subset(restrictinfo->right_relids, outer_relids))
+		{
+			opoid = get_commutator(clause->opno);
+
+			if (!OidIsValid(opoid))
+				return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 126f7047fed..c8c85fd6c2d 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -323,6 +323,51 @@ explain (costs off)
                            Index Cond: (ff = '42'::bigint)
 (19 rows)
 
+-- merge join should check if conditions are commutable
+explain (costs off)
+with ss1 as materialized (
+	select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1
+)
+  select * from ec1,
+    (select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1) as ss2,
+  ss1
+  where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Merge Join
+   Merge Cond: ((((ec1_1.ff + 2) + 1)) = ss1.x)
+   CTE ss1
+     ->  Append
+           ->  Seq Scan on ec1 ec1_4
+           ->  Seq Scan on ec1 ec1_5
+           ->  Seq Scan on ec1 ec1_6
+   ->  Merge Append
+         Sort Key: (((ec1_1.ff + 2) + 1))
+         ->  Index Scan using ec1_expr2 on ec1 ec1_1
+         ->  Index Scan using ec1_expr3 on ec1 ec1_2
+         ->  Index Scan using ec1_expr4 on ec1 ec1_3
+   ->  Materialize
+         ->  Merge Join
+               Merge Cond: (ss1.x = ec1.f1)
+               ->  Sort
+                     Sort Key: ss1.x
+                     ->  CTE Scan on ss1
+               ->  Sort
+                     Sort Key: ec1.f1 USING <
+                     ->  Index Scan using ec1_pkey on ec1
+                           Index Cond: (ff = '42'::bigint)
+(22 rows)
+
 -- check partially indexed scan
 set enable_nestloop = on;
 set enable_mergejoin = off;
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 247b0a31055..5f74987dbdc 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -193,6 +193,26 @@ explain (costs off)
      select ff + 4 as x from ec1) as ss2
   where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
 
+-- merge join should check if conditions are commutable
+explain (costs off)
+with ss1 as materialized (
+	select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1
+)
+  select * from ec1,
+    (select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1) as ss2,
+  ss1
+  where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
+
 -- check partially indexed scan
 set enable_nestloop = on;
 set enable_mergejoin = off;
-- 
2.34.1

Reply via email to