On 11/30/20 7:43 PM, Anastasia Lubennikova wrote:
This entry was inactive during this CF, so I've marked it as returned
with feedback. Feel free to resubmit an updated version to a future
commitfest.
Attached version is rebased on current master and fixes problems with
complex parameterized plans - 'reparameterize by child' feature.
Problems with reparameterization machinery can be demonstrated by TPC-H
benchmark.
--
regards,
Andrey Lepikhov
Postgres Professional
>From 6a15a52bfb90659c51b3a918d48037c474ffe9dd Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepik...@postgrespro.ru>
Date: Fri, 2 Apr 2021 11:02:20 +0500
Subject: [PATCH] Asymmetric partitionwise join.
Teach optimizer to consider partitionwise join of non-partitioned
table with each partition of partitioned table.
This technique cause changes of 'reparameterize by child' machinery.
---
src/backend/optimizer/path/joinpath.c | 9 +
src/backend/optimizer/path/joinrels.c | 151 ++++++++++++++
src/backend/optimizer/util/appendinfo.c | 28 ++-
src/backend/optimizer/util/pathnode.c | 9 +-
src/backend/optimizer/util/relnode.c | 14 +-
src/include/optimizer/paths.h | 7 +-
src/test/regress/expected/partition_join.out | 209 +++++++++++++++++++
src/test/regress/sql/partition_join.sql | 99 +++++++++
8 files changed, 509 insertions(+), 17 deletions(-)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index e9b6968b1d..6ba6d32ae4 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -335,6 +335,15 @@ add_paths_to_joinrel(PlannerInfo *root,
if (set_join_pathlist_hook)
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
jointype, &extra);
+
+ /*
+ * 7. If outer relation is delivered from partition-tables, consider
+ * distributing inner relation to every partition-leaf prior to
+ * append these leafs.
+ */
+ try_asymmetric_partitionwise_join(root, joinrel,
+ outerrel, innerrel,
+ jointype, &extra);
}
/*
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 0dbe2ac726..6f900475bb 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -16,6 +16,7 @@
#include "miscadmin.h"
#include "optimizer/appendinfo.h"
+#include "optimizer/cost.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -1551,6 +1552,156 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
}
}
+/*
+ * Build RelOptInfo on JOIN of each partition of the outer relation and the inner
+ * relation. Return List of such RelOptInfo's. Return NIL, if at least one of
+ * these JOINs are impossible to build.
+ */
+static List *
+extract_asymmetric_partitionwise_subjoin(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ AppendPath *append_path,
+ RelOptInfo *inner_rel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach (lc, append_path->subpaths)
+ {
+ Path *child_path = lfirst(lc);
+ RelOptInfo *child_rel = child_path->parent;
+ Relids child_join_relids;
+ RelOptInfo *child_join_rel;
+ SpecialJoinInfo *child_sjinfo;
+ List *child_restrictlist;
+ AppendRelInfo **appinfos;
+ int nappinfos;
+
+ child_join_relids = bms_union(child_rel->relids,
+ inner_rel->relids);
+ appinfos = find_appinfos_by_relids(root, child_join_relids,
+ &nappinfos);
+ child_sjinfo = build_child_join_sjinfo(root, extra->sjinfo,
+ child_rel->relids,
+ inner_rel->relids);
+ child_restrictlist = (List *)
+ adjust_appendrel_attrs(root, (Node *)extra->restrictlist,
+ nappinfos, appinfos);
+ pfree(appinfos);
+
+ child_join_rel = find_join_rel(root, child_join_relids);
+ if (!child_join_rel)
+ {
+ child_join_rel = build_child_join_rel(root,
+ child_rel,
+ inner_rel,
+ joinrel,
+ child_restrictlist,
+ child_sjinfo,
+ jointype);
+ if (!child_join_rel)
+ {
+ /*
+ * If can't build JOIN between inner relation and one of the outer
+ * partitions - return immediately.
+ */
+ return NIL;
+ }
+ }
+ else
+ {
+ /*
+ * TODO:
+ * Can't imagine situation when join relation already exists. But in
+ * the 'partition_join' regression test it happens.
+ * It may be an indicator of possible problems.
+ */
+ }
+
+ populate_joinrel_with_paths(root,
+ child_rel,
+ inner_rel,
+ child_join_rel,
+ child_sjinfo,
+ child_restrictlist);
+
+ /* Give up if asymmetric partition-wise join is not available */
+ if (child_join_rel->pathlist == NIL)
+ return NIL;
+
+ set_cheapest(child_join_rel);
+ result = lappend(result, child_join_rel);
+ }
+ return result;
+}
+
+void
+try_asymmetric_partitionwise_join(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ ListCell *lc;
+
+ if (!enable_partitionwise_join)
+ return;
+
+ if (IS_OTHER_REL(outer_rel) || IS_OTHER_REL(inner_rel))
+ return;
+
+ if (jointype != JOIN_INNER && jointype != JOIN_LEFT)
+ return;
+
+ foreach (lc, outer_rel->pathlist)
+ {
+ AppendPath *append_path = lfirst(lc);
+
+ /*
+ * MEMO: We assume this pathlist keeps at least one AppendPath that
+ * represents partitioned table-scan, symmetric or asymmetric
+ * partition-wise join. It is not correct right now, however, a hook
+ * on add_path() to give additional decision for path removel allows
+ * to retain this kind of AppendPath, regardless of its cost.
+ */
+ if (IsA(append_path, AppendPath))
+ {
+ List **join_rel_level_saved;
+ List *live_childrels = NIL;
+
+ join_rel_level_saved = root->join_rel_level;
+ PG_TRY();
+ {
+ /* temporary disables "dynamic programming" algorithm */
+ root->join_rel_level = NULL;
+
+ live_childrels =
+ extract_asymmetric_partitionwise_subjoin(root,
+ joinrel,
+ append_path,
+ inner_rel,
+ jointype,
+ extra);
+ }
+ PG_CATCH();
+ {
+ root->join_rel_level = join_rel_level_saved;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+ root->join_rel_level = join_rel_level_saved;
+
+ if (live_childrels != NIL)
+ add_paths_to_append_rel(root, joinrel, live_childrels);
+
+ break;
+ }
+ }
+}
+
/*
* Construct the SpecialJoinInfo for a child-join by translating
* SpecialJoinInfo for the join between parents. left_relids and right_relids
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index af46f581ac..be4624b619 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -202,7 +202,9 @@ adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos,
context.appinfos = appinfos;
/* If there's nothing to adjust, don't call this function. */
- Assert(nappinfos >= 1 && appinfos != NULL);
+ /* If there's nothing to adjust, just return a duplication */
+ if (nappinfos == 0)
+ return copyObject(node);
/* Should never be translating a Query tree. */
Assert(node == NULL || !IsA(node, Query));
@@ -494,8 +496,6 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
int nappinfos;
int cnt;
- Assert(bms_num_members(child_relids) == bms_num_members(top_parent_relids));
-
appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
/* Construct relids set for the immediate parent of given child. */
@@ -506,8 +506,13 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
}
- /* Recurse if immediate parent is not the top parent. */
- if (!bms_equal(parent_relids, top_parent_relids))
+ /*
+ * Recurse if immediate parent is not the top parent. Keep in mind that in a
+ * case of asymmetric JOIN top_parent_relids can contain relids which aren't
+ * part of an append node.
+ */
+ if (!bms_equal(parent_relids, top_parent_relids) &&
+ !bms_is_subset(parent_relids, top_parent_relids))
node = adjust_appendrel_attrs_multilevel(root, node, parent_relids,
top_parent_relids);
@@ -565,6 +570,7 @@ adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
AppendRelInfo **appinfos;
int nappinfos;
Relids parent_relids = NULL;
+ Relids normal_relids = NULL;
Relids result;
Relids tmp_result = NULL;
int cnt;
@@ -579,12 +585,17 @@ adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
/* Construct relids set for the immediate parent of the given child. */
+ normal_relids = bms_copy(child_relids);
for (cnt = 0; cnt < nappinfos; cnt++)
{
AppendRelInfo *appinfo = appinfos[cnt];
parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
+ normal_relids = bms_del_member(normal_relids, appinfo->child_relid);
}
+ parent_relids = bms_union(parent_relids, normal_relids);
+ if (normal_relids)
+ bms_free(normal_relids);
/* Recurse if immediate parent is not the top parent. */
if (!bms_equal(parent_relids, top_parent_relids))
@@ -715,11 +726,11 @@ AppendRelInfo **
find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
{
AppendRelInfo **appinfos;
+ int nrooms = bms_num_members(relids);
int cnt = 0;
int i;
- *nappinfos = bms_num_members(relids);
- appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos);
+ appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * nrooms);
i = -1;
while ((i = bms_next_member(relids, i)) >= 0)
@@ -727,10 +738,11 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
AppendRelInfo *appinfo = root->append_rel_array[i];
if (!appinfo)
- elog(ERROR, "child rel %d not found in append_rel_array", i);
+ continue;
appinfos[cnt++] = appinfo;
}
+ *nappinfos = cnt;
return appinfos;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index b248b038e0..73cfb4748d 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -4209,7 +4209,14 @@ do { \
MemoryContextSwitchTo(oldcontext);
}
- bms_free(required_outer);
+
+ /*
+ * If adjust_child_relids_multilevel don't do replacements it returns
+ * the original set, not a copy. It is possible in the case of asymmetric
+ * JOIN and child_rel->relids contains relids only of plane relations.
+ */
+ if (required_outer != old_ppi->ppi_req_outer)
+ bms_free(required_outer);
new_path->param_info = new_ppi;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index e105a4d5f1..f98a82e725 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -790,11 +790,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
AppendRelInfo **appinfos;
int nappinfos;
- /* Only joins between "other" relations land here. */
- Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
-
- /* The parent joinrel should have consider_partitionwise_join set. */
- Assert(parent_joinrel->consider_partitionwise_join);
+ /* Either of relations must be "other" relation at least. */
+ Assert(IS_OTHER_REL(outer_rel) || IS_OTHER_REL(inner_rel));
joinrel->reloptkind = RELOPT_OTHER_JOINREL;
joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
@@ -851,8 +848,11 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL;
- joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
- inner_rel->top_parent_relids);
+ joinrel->top_parent_relids =
+ bms_union(IS_OTHER_REL(outer_rel) ?
+ outer_rel->top_parent_relids : outer_rel->relids,
+ IS_OTHER_REL(inner_rel) ?
+ inner_rel->top_parent_relids : inner_rel->relids);
/* Compute information relevant to foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 035d3e1206..d64aa37f80 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -109,7 +109,12 @@ extern bool have_join_order_restriction(PlannerInfo *root,
extern bool have_dangerous_phv(PlannerInfo *root,
Relids outer_relids, Relids inner_params);
extern void mark_dummy_rel(RelOptInfo *rel);
-
+extern void try_asymmetric_partitionwise_join(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
/*
* equivclass.c
* routines for managing EquivalenceClasses
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 27f7525b3e..2bc7ad70d9 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2320,6 +2320,215 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a =
375 | 0375 | 375 | 0375
(8 rows)
+--
+-- For asymmetric partition-wise join
+--
+CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey);
+CREATE TABLE prt5_p0 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 0);
+CREATE TABLE prt5_p1 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 1);
+CREATE TABLE prt5_p2 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 2);
+CREATE TABLE t5_1 (aid int, alabel text);
+CREATE TABLE t5_2 (bid int, blabel text);
+INSERT INTO prt5 (SELECT x, (1000.0 * random())::int,
+ (1000.0 * random())::int
+ FROM generate_series(1,1000000) x);
+INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+VACUUM ANALYZE prt5;
+VACUUM ANALYZE t5_1;
+VACUUM ANALYZE t5_2;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS OFF)
+SELECT *
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+ QUERY PLAN
+-------------------------------------------------------
+ Append
+ -> Hash Join
+ Hash Cond: (prt5_1.a = t5_1.aid)
+ -> Seq Scan on prt5_p0 prt5_1
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%abc%'::text)
+ -> Hash Join
+ Hash Cond: (prt5_2.a = t5_1.aid)
+ -> Seq Scan on prt5_p1 prt5_2
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%abc%'::text)
+ -> Hash Join
+ Hash Cond: (prt5_3.a = t5_1.aid)
+ -> Seq Scan on prt5_p2 prt5_3
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%abc%'::text)
+(19 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT *
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+ QUERY PLAN
+------------------------------------------------------------
+ Append
+ -> Hash Join
+ Hash Cond: (prt5_1.a = t5_1.aid)
+ -> Hash Join
+ Hash Cond: (prt5_1.b = t5_2.bid)
+ -> Seq Scan on prt5_p0 prt5_1
+ -> Hash
+ -> Seq Scan on t5_2
+ Filter: (blabel ~~ '%cd%'::text)
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%ab%'::text)
+ -> Hash Join
+ Hash Cond: (prt5_2.a = t5_1.aid)
+ -> Hash Join
+ Hash Cond: (prt5_2.b = t5_2.bid)
+ -> Seq Scan on prt5_p1 prt5_2
+ -> Hash
+ -> Seq Scan on t5_2
+ Filter: (blabel ~~ '%cd%'::text)
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%ab%'::text)
+ -> Hash Join
+ Hash Cond: (prt5_3.a = t5_1.aid)
+ -> Hash Join
+ Hash Cond: (prt5_3.b = t5_2.bid)
+ -> Seq Scan on prt5_p2 prt5_3
+ -> Hash
+ -> Seq Scan on t5_2
+ Filter: (blabel ~~ '%cd%'::text)
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%ab%'::text)
+(34 rows)
+
+-- unable to extract non-partitioned right relation
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+ QUERY PLAN
+-----------------------------------------------
+ Hash Right Join
+ Hash Cond: (prt5.a = t5_1.aid)
+ Join Filter: (t5_1.alabel ~~ '%abc%'::text)
+ -> Append
+ -> Seq Scan on prt5_p0 prt5_1
+ -> Seq Scan on prt5_p1 prt5_2
+ -> Seq Scan on prt5_p2 prt5_3
+ -> Hash
+ -> Seq Scan on t5_1
+(9 rows)
+
+-- left side can be extracted, but no cost benefit
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+ QUERY PLAN
+-------------------------------------------------
+ Hash Left Join
+ Hash Cond: (prt5.a = t5_1.aid)
+ -> Append
+ -> Seq Scan on prt5_p0 prt5_1
+ -> Seq Scan on prt5_p1 prt5_2
+ -> Seq Scan on prt5_p2 prt5_3
+ -> Hash
+ -> Seq Scan on t5_1
+ Filter: (alabel ~~ '%abc%'::text)
+(9 rows)
+
+-- validation of the results with/without asymmetric partition-wise join
+SELECT * INTO pg_temp.result01a
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+SELECT * INTO pg_temp.result02a
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+SET enable_partitionwise_join = off;
+SELECT * INTO pg_temp.result01b
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+SELECT * INTO pg_temp.result02b
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b;
+ hkey | a | b | aid | alabel
+------+---+---+-----+--------
+(0 rows)
+
+SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b;
+ hkey | a | b | aid | alabel | bid | blabel
+------+---+---+-----+--------+-----+--------
+(0 rows)
+
+RESET max_parallel_workers_per_gather;
+SET enable_partitionwise_join = true;
+-- Parameterized path examples.
+CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prta1 (id);
+CREATE UNIQUE INDEX ON prta2 (id);
+INSERT INTO prta (id, payload)
+ (SELECT *, ('abc' || id)::text AS payload
+ FROM generate_series(1,1) AS id);
+CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prtb1 (id);
+CREATE UNIQUE INDEX ON prtb2 (id);
+INSERT INTO prtb (id, payload)
+ (SELECT *, ('def' || id)::text AS payload
+ FROM generate_series(1,1000) AS id);
+CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1);
+INSERT INTO e (id, payload)
+ (SELECT *, ('ghi' || id)::text AS payload
+ FROM generate_series(1,1000) AS id);
+CREATE UNIQUE INDEX ON e1 (id);
+CREATE UNIQUE INDEX ON e2 (id);
+ANALYZE prta,prtb,e;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb WHERE prta.id=prtb.id;
+ QUERY PLAN
+-----------------------------------------------------------
+ Append
+ -> Nested Loop
+ -> Seq Scan on prta1 prta_1
+ -> Index Scan using prtb1_id_idx on prtb1 prtb_1
+ Index Cond: (id = prta_1.id)
+ -> Nested Loop
+ -> Seq Scan on prta2 prta_2
+ -> Index Scan using prtb2_id_idx on prtb2 prtb_2
+ Index Cond: (id = prta_2.id)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Append
+ -> Nested Loop
+ Join Filter: (prta_1.id = e_1.id)
+ -> Nested Loop
+ -> Seq Scan on prta1 prta_1
+ -> Index Scan using prtb1_id_idx on prtb1 prtb_1
+ Index Cond: (id = prta_1.id)
+ -> Index Scan using e1_id_idx on e1 e_1
+ Index Cond: (id = prtb_1.id)
+ -> Nested Loop
+ Join Filter: (prta_2.id = e_2.id)
+ -> Nested Loop
+ -> Seq Scan on prta2 prta_2
+ -> Index Scan using prtb2_id_idx on prtb2 prtb_2
+ Index Cond: (id = prta_2.id)
+ -> Index Scan using e2_id_idx on e2 e_2
+ Index Cond: (id = prtb_2.id)
+(17 rows)
+
-- semi join
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a;
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index d97b5b69ff..c206fd736a 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -536,6 +536,105 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+--
+-- For asymmetric partition-wise join
+--
+CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey);
+CREATE TABLE prt5_p0 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 0);
+CREATE TABLE prt5_p1 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 1);
+CREATE TABLE prt5_p2 PARTITION OF prt5
+ FOR VALUES WITH (modulus 3, remainder 2);
+CREATE TABLE t5_1 (aid int, alabel text);
+CREATE TABLE t5_2 (bid int, blabel text);
+
+INSERT INTO prt5 (SELECT x, (1000.0 * random())::int,
+ (1000.0 * random())::int
+ FROM generate_series(1,1000000) x);
+INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+
+VACUUM ANALYZE prt5;
+VACUUM ANALYZE t5_1;
+VACUUM ANALYZE t5_2;
+
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS OFF)
+SELECT *
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+EXPLAIN (COSTS OFF)
+SELECT *
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+-- unable to extract non-partitioned right relation
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+-- left side can be extracted, but no cost benefit
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+-- validation of the results with/without asymmetric partition-wise join
+SELECT * INTO pg_temp.result01a
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+SELECT * INTO pg_temp.result02a
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+SET enable_partitionwise_join = off;
+
+SELECT * INTO pg_temp.result01b
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+SELECT * INTO pg_temp.result02b
+ FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+ JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b;
+SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b;
+
+RESET max_parallel_workers_per_gather;
+SET enable_partitionwise_join = true;
+
+-- Parameterized path examples.
+CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prta1 (id);
+CREATE UNIQUE INDEX ON prta2 (id);
+INSERT INTO prta (id, payload)
+ (SELECT *, ('abc' || id)::text AS payload
+ FROM generate_series(1,1) AS id);
+
+CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prtb1 (id);
+CREATE UNIQUE INDEX ON prtb2 (id);
+INSERT INTO prtb (id, payload)
+ (SELECT *, ('def' || id)::text AS payload
+ FROM generate_series(1,1000) AS id);
+
+CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1);
+INSERT INTO e (id, payload)
+ (SELECT *, ('ghi' || id)::text AS payload
+ FROM generate_series(1,1000) AS id);
+CREATE UNIQUE INDEX ON e1 (id);
+CREATE UNIQUE INDEX ON e2 (id);
+
+ANALYZE prta,prtb,e;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb WHERE prta.id=prtb.id;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id;
+
-- semi join
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a;
--
2.25.1