Hi.There's the following inconsistency between try_mergejoin_path() and create_mergejoin_plan(). When clause operator has no commutator, we can end up with mergejoin path. Later create_mergejoin_plan() will call get_switched_clauses(). This function can error out with
ERROR: could not find commutator for operator XXX The similar behavior seems to be present also for hash join. Attaching a test case (in patch) and a possible fix. -- Best regards, Alexander Pyhalov, Postgres Professional
From ea9497c9f62b3613482d5b267c7907070ba8fcd4 Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov <a.pyha...@postgrespro.ru> Date: Mon, 17 Jun 2024 10:33:10 +0300 Subject: [PATCH] Merge and hash joins need more checks with custom operators --- src/backend/optimizer/path/joinpath.c | 46 +++++++++++++++++++++++- src/test/regress/expected/equivclass.out | 45 +++++++++++++++++++++++ src/test/regress/sql/equivclass.sql | 20 +++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 5be8da9e095..dae06117569 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -95,7 +95,7 @@ static void generate_mergejoin_paths(PlannerInfo *root, Path *inner_cheapest_total, List *merge_pathkeys, bool is_partial); - +static bool all_clauses_compatible(Relids outerrelids, List *clauses); /* * add_paths_to_joinrel @@ -972,6 +972,10 @@ try_mergejoin_path(PlannerInfo *root, return; } + /* Sometimes can't create merjejoin plan if we have non-commutable clauses */ + if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses)) + return; + /* * If the given paths are already well enough ordered, we can skip doing * an explicit sort. @@ -1036,6 +1040,10 @@ try_partial_mergejoin_path(PlannerInfo *root, { JoinCostWorkspace workspace; + /* Sometimes can't create merjejoin plan if we have non-commutable clauses */ + if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses)) + return; + /* * See comments in try_partial_hashjoin_path(). */ @@ -1129,6 +1137,10 @@ try_hashjoin_path(PlannerInfo *root, return; } + /* Sometimes can't create hashjoin plan if we have non-commutable clauses */ + if (!all_clauses_compatible(outer_path->parent->relids, hashclauses)) + return; + /* * See comments in try_nestloop_path(). Also note that hashjoin paths * never have any output pathkeys, per comments in create_hashjoin_path. @@ -2431,3 +2443,35 @@ select_mergejoin_clauses(PlannerInfo *root, return result_list; } + +/* + * all_clauses_compatible + * + * Determine that all clauses' operators have commutator + * when necessary. While constructing merge join plan or + * hash join plan, optimizer can call get_switched_clauses(), + * which can error out if clauses are not commutable. + * The logic here should match one in get_switched_clauses(). + */ +static bool +all_clauses_compatible(Relids outer_relids, List *clauses) +{ + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); + OpExpr *clause = (OpExpr *) restrictinfo->clause; + Oid opoid; + + if (bms_is_subset(restrictinfo->right_relids, outer_relids)) + { + opoid = get_commutator(clause->opno); + + if (!OidIsValid(opoid)) + return false; + } + } + + return true; +} diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index 126f7047fed..c8c85fd6c2d 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -323,6 +323,51 @@ explain (costs off) Index Cond: (ff = '42'::bigint) (19 rows) +-- merge join should check if conditions are commutable +explain (costs off) +with ss1 as materialized ( + select ff + 1 as x from + (select ff + 2 as ff from ec1 + union all + select ff + 3 as ff from ec1) ss0 + union all + select ff + 4 as x from ec1 +) + select * from ec1, + (select ff + 1 as x from + (select ff + 2 as ff from ec1 + union all + select ff + 3 as ff from ec1) ss0 + union all + select ff + 4 as x from ec1) as ss2, + ss1 + where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8; + QUERY PLAN +----------------------------------------------------------- + Merge Join + Merge Cond: ((((ec1_1.ff + 2) + 1)) = ss1.x) + CTE ss1 + -> Append + -> Seq Scan on ec1 ec1_4 + -> Seq Scan on ec1 ec1_5 + -> Seq Scan on ec1 ec1_6 + -> Merge Append + Sort Key: (((ec1_1.ff + 2) + 1)) + -> Index Scan using ec1_expr2 on ec1 ec1_1 + -> Index Scan using ec1_expr3 on ec1 ec1_2 + -> Index Scan using ec1_expr4 on ec1 ec1_3 + -> Materialize + -> Merge Join + Merge Cond: (ss1.x = ec1.f1) + -> Sort + Sort Key: ss1.x + -> CTE Scan on ss1 + -> Sort + Sort Key: ec1.f1 USING < + -> Index Scan using ec1_pkey on ec1 + Index Cond: (ff = '42'::bigint) +(22 rows) + -- check partially indexed scan set enable_nestloop = on; set enable_mergejoin = off; diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 247b0a31055..5f74987dbdc 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -193,6 +193,26 @@ explain (costs off) select ff + 4 as x from ec1) as ss2 where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8; +-- merge join should check if conditions are commutable +explain (costs off) +with ss1 as materialized ( + select ff + 1 as x from + (select ff + 2 as ff from ec1 + union all + select ff + 3 as ff from ec1) ss0 + union all + select ff + 4 as x from ec1 +) + select * from ec1, + (select ff + 1 as x from + (select ff + 2 as ff from ec1 + union all + select ff + 3 as ff from ec1) ss0 + union all + select ff + 4 as x from ec1) as ss2, + ss1 + where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8; + -- check partially indexed scan set enable_nestloop = on; set enable_mergejoin = off; -- 2.34.1