On 5/7/21 23:15, Zhihong Yu wrote:
On Mon, Jul 5, 2021 at 2:57 AM Andrey Lepikhov <a.lepik...@postgrespro.ru <mailto:a.lepik...@postgrespro.ru>> wrote: +            * Can't imagine situation when join relation already exists. But in
+            * the 'partition_join' regression test it happens.
+            * It may be an indicator of possible problems.
Should a log be added in the above case ?
I made additional analysis of this branch of code. This situation can happen in the case of one child or if we join two plane tables with partitioned. Both situations are legal and I think we don't needed to add any log message here.
Other mistakes were fixed.

--
regards,
Andrey Lepikhov
Postgres Professional
From b9d7a148ab51f297c98345127e975f864fe6ef92 Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepik...@postgrespro.ru>
Date: Fri, 2 Apr 2021 11:02:20 +0500
Subject: [PATCH] Asymmetric partitionwise join.

Teach optimizer to consider partitionwise join of non-partitioned
table with each partition of partitioned table.

Disallow asymmetric machinery for joining of two partitioned (or appended)
relations because it could cause huge consumption of CPU and memory
during reparameterization of NestLoop path.
---
 src/backend/optimizer/path/joinpath.c        |   9 +
 src/backend/optimizer/path/joinrels.c        | 172 +++++++++++
 src/backend/optimizer/plan/setrefs.c         |  17 +-
 src/backend/optimizer/util/appendinfo.c      |  37 ++-
 src/backend/optimizer/util/pathnode.c        |   9 +-
 src/backend/optimizer/util/relnode.c         |  19 +-
 src/include/optimizer/paths.h                |   7 +-
 src/test/regress/expected/partition_join.out | 306 +++++++++++++++++++
 src/test/regress/sql/partition_join.sql      | 126 ++++++++
 9 files changed, 672 insertions(+), 30 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c 
b/src/backend/optimizer/path/joinpath.c
index b67b517770..7a1a7b2e86 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -335,6 +335,15 @@ add_paths_to_joinrel(PlannerInfo *root,
        if (set_join_pathlist_hook)
                set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
                                                           jointype, &extra);
+
+       /*
+        * 7. If outer relation is delivered from partition-tables, consider
+        * distributing inner relation to every partition-leaf prior to
+        * append these leafs.
+        */
+       try_asymmetric_partitionwise_join(root, joinrel,
+                                                                         
outerrel, innerrel,
+                                                                         
jointype, &extra);
 }
 
 /*
diff --git a/src/backend/optimizer/path/joinrels.c 
b/src/backend/optimizer/path/joinrels.c
index 0dbe2ac726..e9cbff9709 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -16,6 +16,7 @@
 
 #include "miscadmin.h"
 #include "optimizer/appendinfo.h"
+#include "optimizer/cost.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -1551,6 +1552,177 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo 
*rel1, RelOptInfo *rel2,
        }
 }
 
+/*
+ * Build RelOptInfo on JOIN of each partition of the outer relation and the 
inner
+ * relation. Return List of such RelOptInfo's. Return NIL, if at least one of
+ * these JOINs is impossible to build.
+ */
+static List *
+extract_asymmetric_partitionwise_subjoin(PlannerInfo *root,
+                                                                               
 RelOptInfo *joinrel,
+                                                                               
 AppendPath *append_path,
+                                                                               
 RelOptInfo *inner_rel,
+                                                                               
 JoinType jointype,
+                                                                               
 JoinPathExtraData *extra)
+{
+       List            *result = NIL;
+       ListCell        *lc;
+
+       foreach (lc, append_path->subpaths)
+       {
+               Path                    *child_path = lfirst(lc);
+               RelOptInfo              *child_rel = child_path->parent;
+               Relids                  child_joinrelids;
+               Relids                  parent_relids;
+               RelOptInfo              *child_joinrel;
+               SpecialJoinInfo *child_sjinfo;
+               List                    *child_restrictlist;
+
+               child_joinrelids = bms_union(child_rel->relids, 
inner_rel->relids);
+               parent_relids = bms_union(append_path->path.parent->relids,
+                                                                 
inner_rel->relids);
+
+               child_sjinfo = build_child_join_sjinfo(root, extra->sjinfo,
+                                                                               
           child_rel->relids,
+                                                                               
           inner_rel->relids);
+               child_restrictlist = (List *)
+                       adjust_appendrel_attrs_multilevel(root, (Node 
*)extra->restrictlist,
+                                                                               
          child_joinrelids, parent_relids);
+
+               child_joinrel = find_join_rel(root, child_joinrelids);
+               if (!child_joinrel)
+                       child_joinrel = build_child_join_rel(root,
+                                                                               
                 child_rel,
+                                                                               
                 inner_rel,
+                                                                               
                 joinrel,
+                                                                               
                 child_restrictlist,
+                                                                               
                 child_sjinfo,
+                                                                               
                 jointype);
+               else
+               {
+                       /*
+                        * The join relation already exists. For example, it 
could happen if
+                        * we join two plane tables with partitioned table(s). 
Do nothing.
+                        */
+               }
+
+               populate_joinrel_with_paths(root,
+                                                                       
child_rel,
+                                                                       
inner_rel,
+                                                                       
child_joinrel,
+                                                                       
child_sjinfo,
+                                                                       
child_restrictlist);
+
+               /* Give up if asymmetric partition-wise join is not available */
+               if (child_joinrel->pathlist == NIL)
+                       return NIL;
+
+               set_cheapest(child_joinrel);
+               result = lappend(result, child_joinrel);
+       }
+       return result;
+}
+
+static bool
+is_asymmetric_join_feasible(PlannerInfo *root,
+                                                  RelOptInfo *outer_rel,
+                                                  RelOptInfo *inner_rel,
+                                                  JoinType jointype)
+{
+       ListCell *lc;
+
+       if (jointype != JOIN_INNER && jointype != JOIN_LEFT)
+               return false;
+
+       if (IS_OTHER_REL(outer_rel) || IS_OTHER_REL(inner_rel))
+               return false;
+
+       /* Disallow recursive usage of asymmertic join machinery */
+       if (root->join_rel_level == NULL)
+               return false;
+
+       /*
+        * Don't allow asymmetric JOIN of two append subplans.
+        * In the case of a parameterized NL join, a reparameterization 
procedure
+        * will lead to large memory allocations and a CPU consumption:
+        * each reparameterization will induce subpath duplication, creating new
+        * ParamPathInfo instance and increasing of ppilist up to number of
+        * partitions in the inner. Also, if we have many partitions, each 
bitmapset
+        * variable will be large and many leaks of such variable (caused by 
relid
+        * replacement) will highly increase memory consumption.
+        * So, we deny such paths for now.
+        */
+       foreach(lc, inner_rel->pathlist)
+       {
+               if (IsA(lfirst(lc), AppendPath))
+                       return false;
+       }
+
+       return true;
+}
+
+void
+try_asymmetric_partitionwise_join(PlannerInfo *root,
+                                                                 RelOptInfo 
*joinrel,
+                                                                 RelOptInfo 
*outer_rel,
+                                                                 RelOptInfo 
*inner_rel,
+                                                                 JoinType 
jointype,
+                                                                 
JoinPathExtraData *extra)
+{
+       ListCell *lc;
+
+       /*
+        * Try this kind of paths if we allow complex partitionwise joins and 
we know
+        * we can build this join safely.
+        */
+       if (!enable_partitionwise_join ||
+               !is_asymmetric_join_feasible(root, outer_rel, inner_rel, 
jointype))
+               return;
+
+       foreach (lc, outer_rel->pathlist)
+       {
+               AppendPath *append_path = lfirst(lc);
+
+               /*
+                * We assume this pathlist keeps at least one AppendPath that
+                * represents partitioned table-scan, symmetric or asymmetric
+                * partition-wise join. Asymmetric join isn't needed if the 
append node
+                * has only one child.
+                */
+               if (IsA(append_path, AppendPath) &&
+                       list_length(append_path->subpaths) > 1)
+               {
+                       List **join_rel_level_saved;
+                       List *live_childrels = NIL;
+
+                       join_rel_level_saved = root->join_rel_level;
+                       PG_TRY();
+                       {
+                               /* temporary disables "dynamic programming" 
algorithm */
+                               root->join_rel_level = NULL;
+
+                               live_childrels =
+                                       
extract_asymmetric_partitionwise_subjoin(root,
+                                                                               
                                         joinrel,
+                                                                               
                                         append_path,
+                                                                               
                                         inner_rel,
+                                                                               
                                         jointype,
+                                                                               
                                         extra);
+                       }
+                       PG_FINALLY();
+                       {
+                               root->join_rel_level = join_rel_level_saved;
+                       }
+                       PG_END_TRY();
+
+                       if (live_childrels != NIL)
+                               add_paths_to_append_rel(root, joinrel, 
live_childrels);
+
+                       break;
+               }
+       }
+}
+
 /*
  * Construct the SpecialJoinInfo for a child-join by translating
  * SpecialJoinInfo for the join between parents. left_relids and right_relids
diff --git a/src/backend/optimizer/plan/setrefs.c 
b/src/backend/optimizer/plan/setrefs.c
index 61ccfd300b..834843ba33 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -281,24 +281,29 @@ set_plan_references(PlannerInfo *root, Plan *plan)
 
        /*
         * Adjust RT indexes of AppendRelInfos and add to final appendrels list.
-        * We assume the AppendRelInfos were built during planning and don't 
need
-        * to be copied.
+        * The AppendRelInfos are copied, because as a part of a subplan its 
could
+        * be visited many times in the case of asymmetric join.
         */
        foreach(lc, root->append_rel_list)
        {
                AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
+               AppendRelInfo *newappinfo;
+
+               /* flat copy is enough since all valuable fields are scalars */
+               newappinfo = (AppendRelInfo *) palloc(sizeof(AppendRelInfo));
+               memcpy(newappinfo, appinfo, sizeof(AppendRelInfo));
 
                /* adjust RT indexes */
-               appinfo->parent_relid += rtoffset;
-               appinfo->child_relid += rtoffset;
+               newappinfo->parent_relid += rtoffset;
+               newappinfo->child_relid += rtoffset;
 
                /*
                 * Rather than adjust the translated_vars entries, just drop 
'em.
                 * Neither the executor nor EXPLAIN currently need that data.
                 */
-               appinfo->translated_vars = NIL;
+               newappinfo->translated_vars = NIL;
 
-               glob->appendRelations = lappend(glob->appendRelations, appinfo);
+               glob->appendRelations = lappend(glob->appendRelations, 
newappinfo);
        }
 
        /* Now fix the Plan tree */
diff --git a/src/backend/optimizer/util/appendinfo.c 
b/src/backend/optimizer/util/appendinfo.c
index af46f581ac..ec8dee99b0 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -202,7 +202,9 @@ adjust_appendrel_attrs(PlannerInfo *root, Node *node, int 
nappinfos,
        context.appinfos = appinfos;
 
        /* If there's nothing to adjust, don't call this function. */
-       Assert(nappinfos >= 1 && appinfos != NULL);
+       /* If there's nothing to adjust, just return a duplication */
+       if (nappinfos == 0)
+               return copyObject(node);
 
        /* Should never be translating a Query tree. */
        Assert(node == NULL || !IsA(node, Query));
@@ -489,12 +491,10 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node 
*node,
                                                                  Relids 
child_relids,
                                                                  Relids 
top_parent_relids)
 {
-       AppendRelInfo **appinfos;
-       Bitmapset  *parent_relids = NULL;
-       int                     nappinfos;
-       int                     cnt;
-
-       Assert(bms_num_members(child_relids) == 
bms_num_members(top_parent_relids));
+       AppendRelInfo   **appinfos;
+       Relids                  parent_relids = NULL;
+       int                             nappinfos;
+       int                             cnt;
 
        appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
 
@@ -506,8 +506,13 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node 
*node,
                parent_relids = bms_add_member(parent_relids, 
appinfo->parent_relid);
        }
 
-       /* Recurse if immediate parent is not the top parent. */
-       if (!bms_equal(parent_relids, top_parent_relids))
+       /*
+        * Recurse if immediate parent is not the top parent. Keep in mind that 
in a
+        * case of asymmetric JOIN top_parent_relids can contain relids which 
aren't
+        * part of an append node.
+        */
+       if (!bms_equal(parent_relids, top_parent_relids) &&
+               !bms_is_subset(parent_relids, top_parent_relids))
                node = adjust_appendrel_attrs_multilevel(root, node, 
parent_relids,
                                                                                
                 top_parent_relids);
 
@@ -515,6 +520,7 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node 
*node,
        node = adjust_appendrel_attrs(root, node, nappinfos, appinfos);
 
        pfree(appinfos);
+       pfree(parent_relids);
 
        return node;
 }
@@ -565,6 +571,7 @@ adjust_child_relids_multilevel(PlannerInfo *root, Relids 
relids,
        AppendRelInfo **appinfos;
        int                     nappinfos;
        Relids          parent_relids = NULL;
+       Relids          normal_relids = NULL;
        Relids          result;
        Relids          tmp_result = NULL;
        int                     cnt;
@@ -579,12 +586,17 @@ adjust_child_relids_multilevel(PlannerInfo *root, Relids 
relids,
        appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
 
        /* Construct relids set for the immediate parent of the given child. */
+       normal_relids = bms_copy(child_relids);
        for (cnt = 0; cnt < nappinfos; cnt++)
        {
                AppendRelInfo *appinfo = appinfos[cnt];
 
                parent_relids = bms_add_member(parent_relids, 
appinfo->parent_relid);
+               normal_relids = bms_del_member(normal_relids, 
appinfo->child_relid);
        }
+       parent_relids = bms_union(parent_relids, normal_relids);
+       if (normal_relids)
+               bms_free(normal_relids);
 
        /* Recurse if immediate parent is not the top parent. */
        if (!bms_equal(parent_relids, top_parent_relids))
@@ -715,11 +727,11 @@ AppendRelInfo **
 find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
 {
        AppendRelInfo **appinfos;
+       int                     nrooms = bms_num_members(relids);
        int                     cnt = 0;
        int                     i;
 
-       *nappinfos = bms_num_members(relids);
-       appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * 
*nappinfos);
+       appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * nrooms);
 
        i = -1;
        while ((i = bms_next_member(relids, i)) >= 0)
@@ -727,10 +739,11 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, 
int *nappinfos)
                AppendRelInfo *appinfo = root->append_rel_array[i];
 
                if (!appinfo)
-                       elog(ERROR, "child rel %d not found in 
append_rel_array", i);
+                       continue;
 
                appinfos[cnt++] = appinfo;
        }
+       *nappinfos = cnt;
        return appinfos;
 }
 
diff --git a/src/backend/optimizer/util/pathnode.c 
b/src/backend/optimizer/util/pathnode.c
index 9ce5f95e3b..03c0640473 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -4226,7 +4226,14 @@ do { \
 
                MemoryContextSwitchTo(oldcontext);
        }
-       bms_free(required_outer);
+
+       /*
+        * If adjust_child_relids_multilevel don't do replacements it returns
+        * the original set, not a copy. It is possible in the case of 
asymmetric
+        * JOIN and child_rel->relids contains relids only of plane relations.
+        */
+       if (required_outer != old_ppi->ppi_req_outer)
+               bms_free(required_outer);
 
        new_path->param_info = new_ppi;
 
diff --git a/src/backend/optimizer/util/relnode.c 
b/src/backend/optimizer/util/relnode.c
index e105a4d5f1..bb15fb2716 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -790,11 +790,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo 
*outer_rel,
        AppendRelInfo **appinfos;
        int                     nappinfos;
 
-       /* Only joins between "other" relations land here. */
-       Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
-
-       /* The parent joinrel should have consider_partitionwise_join set. */
-       Assert(parent_joinrel->consider_partitionwise_join);
+       /* Either of relations must be "other" relation at least. */
+       Assert(IS_OTHER_REL(outer_rel) || IS_OTHER_REL(inner_rel));
 
        joinrel->reloptkind = RELOPT_OTHER_JOINREL;
        joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
@@ -851,8 +848,11 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo 
*outer_rel,
        joinrel->partexprs = NULL;
        joinrel->nullable_partexprs = NULL;
 
-       joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
-                                                                               
   inner_rel->top_parent_relids);
+       joinrel->top_parent_relids =
+               bms_union(IS_OTHER_REL(outer_rel) ?
+                                 outer_rel->top_parent_relids : 
outer_rel->relids,
+                                 IS_OTHER_REL(inner_rel) ?
+                                 inner_rel->top_parent_relids : 
inner_rel->relids);
 
        /* Compute information relevant to foreign relations. */
        set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
@@ -2033,9 +2033,8 @@ build_child_join_reltarget(PlannerInfo *root,
 {
        /* Build the targetlist */
        childrel->reltarget->exprs = (List *)
-               adjust_appendrel_attrs(root,
-                                                          (Node *) 
parentrel->reltarget->exprs,
-                                                          nappinfos, appinfos);
+               adjust_appendrel_attrs_multilevel(root, (Node 
*)parentrel->reltarget->exprs,
+                                                                               
childrel->relids, parentrel->relids);
 
        /* Set the cost and width fields */
        childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..a0106dc539 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -109,7 +109,12 @@ extern bool have_join_order_restriction(PlannerInfo *root,
 extern bool have_dangerous_phv(PlannerInfo *root,
                                                           Relids outer_relids, 
Relids inner_params);
 extern void mark_dummy_rel(RelOptInfo *rel);
-
+extern void try_asymmetric_partitionwise_join(PlannerInfo *root,
+                                                                               
          RelOptInfo *joinrel,
+                                                                               
          RelOptInfo *outer_rel,
+                                                                               
          RelOptInfo *inner_rel,
+                                                                               
          JoinType jointype,
+                                                                               
          JoinPathExtraData *extra);
 /*
  * equivclass.c
  *       routines for managing EquivalenceClasses
diff --git a/src/test/regress/expected/partition_join.out 
b/src/test/regress/expected/partition_join.out
index 27f7525b3e..601f08662e 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2320,6 +2320,312 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER 
JOIN prt2_adv t2 ON (t1.a =
  375 | 0375 | 375 | 0375
 (8 rows)
 
+--
+-- For asymmetric partition-wise join
+--
+CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey);
+CREATE TABLE prt5_p0 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 0);
+CREATE TABLE prt5_p1 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 1);
+CREATE TABLE prt5_p2 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 2);
+CREATE TABLE prt6 (aid int, alabel text) PARTITION BY HASH(aid);
+CREATE TABLE prt6_p0 PARTITION OF prt6
+                     FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prt6_p1 PARTITION OF prt6
+                     FOR VALUES WITH (modulus 2, remainder 1);
+CREATE TABLE t5_1 (aid int, alabel text);
+CREATE TABLE t5_2 (bid int, blabel text);
+INSERT INTO prt5 (SELECT x, (1000.0 * random())::int,
+                            (1000.0 * random())::int
+                    FROM generate_series(1,1000000) x);
+INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO prt6 (SELECT * FROM t5_1);
+VACUUM ANALYZE prt5,prt6,t5_1,t5_2;
+SET max_parallel_workers_per_gather = 0;
+-- Trivial asymmetric JOIN of partitioned table with a relation
+EXPLAIN (COSTS OFF)
+SELECT *
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Append
+   ->  Hash Join
+         Hash Cond: (prt5_1.a = t5_1.aid)
+         ->  Seq Scan on prt5_p0 prt5_1
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_2.a = t5_1.aid)
+         ->  Seq Scan on prt5_p1 prt5_2
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_3.a = t5_1.aid)
+         ->  Seq Scan on prt5_p2 prt5_3
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+(19 rows)
+
+-- The same, but appended with UNION ALL
+EXPLAIN (COSTS OFF)
+SELECT * FROM (
+       (SELECT * FROM prt5_p0)
+       UNION ALL
+       (SELECT * FROM prt5_p1)
+       UNION ALL
+       (SELECT * FROM prt5_p2)
+       ) AS sq1
+JOIN t5_1 ON a = aid AND alabel like '%abc%';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Append
+   ->  Hash Join
+         Hash Cond: (prt5_p0.a = t5_1.aid)
+         ->  Seq Scan on prt5_p0
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_p1.a = t5_1.aid)
+         ->  Seq Scan on prt5_p1
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_p2.a = t5_1.aid)
+         ->  Seq Scan on prt5_p2
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%abc%'::text)
+(19 rows)
+
+-- Don't allow asymmetric JOIN of two partitioned tables.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Hash Join
+   Hash Cond: (prt5.a = prt6.aid)
+   ->  Append
+         ->  Seq Scan on prt5_p0 prt5_1
+         ->  Seq Scan on prt5_p1 prt5_2
+         ->  Seq Scan on prt5_p2 prt5_3
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt6_p0 prt6_1
+                     Filter: (alabel ~~ '%abc%'::text)
+               ->  Seq Scan on prt6_p1 prt6_2
+                     Filter: (alabel ~~ '%abc%'::text)
+(12 rows)
+
+-- Check asymmetric JOIN with Subquery
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 JOIN (
+       SELECT * FROM prt6 LIMIT 1000
+) AS sq1 ON a = aid AND alabel like '%abc%';
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Append
+   ->  Hash Join
+         Hash Cond: (prt5_1.a = sq1.aid)
+         ->  Seq Scan on prt5_p0 prt5_1
+         ->  Hash
+               ->  Subquery Scan on sq1
+                     Filter: (sq1.alabel ~~ '%abc%'::text)
+                     ->  Limit
+                           ->  Append
+                                 ->  Seq Scan on prt6_p0 prt6_1
+                                 ->  Seq Scan on prt6_p1 prt6_2
+   ->  Hash Join
+         Hash Cond: (prt5_2.a = sq1.aid)
+         ->  Seq Scan on prt5_p1 prt5_2
+         ->  Hash
+               ->  Subquery Scan on sq1
+                     Filter: (sq1.alabel ~~ '%abc%'::text)
+                     ->  Limit
+                           ->  Append
+                                 ->  Seq Scan on prt6_p0 prt6_4
+                                 ->  Seq Scan on prt6_p1 prt6_5
+   ->  Hash Join
+         Hash Cond: (prt5_3.a = sq1.aid)
+         ->  Seq Scan on prt5_p2 prt5_3
+         ->  Hash
+               ->  Subquery Scan on sq1
+                     Filter: (sq1.alabel ~~ '%abc%'::text)
+                     ->  Limit
+                           ->  Append
+                                 ->  Seq Scan on prt6_p0 prt6_7
+                                 ->  Seq Scan on prt6_p1 prt6_8
+(31 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT *
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Append
+   ->  Hash Join
+         Hash Cond: (prt5_1.a = t5_1.aid)
+         ->  Hash Join
+               Hash Cond: (prt5_1.b = t5_2.bid)
+               ->  Seq Scan on prt5_p0 prt5_1
+               ->  Hash
+                     ->  Seq Scan on t5_2
+                           Filter: (blabel ~~ '%cd%'::text)
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%ab%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_2.a = t5_1.aid)
+         ->  Hash Join
+               Hash Cond: (prt5_2.b = t5_2.bid)
+               ->  Seq Scan on prt5_p1 prt5_2
+               ->  Hash
+                     ->  Seq Scan on t5_2
+                           Filter: (blabel ~~ '%cd%'::text)
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%ab%'::text)
+   ->  Hash Join
+         Hash Cond: (prt5_3.a = t5_1.aid)
+         ->  Hash Join
+               Hash Cond: (prt5_3.b = t5_2.bid)
+               ->  Seq Scan on prt5_p2 prt5_3
+               ->  Hash
+                     ->  Seq Scan on t5_2
+                           Filter: (blabel ~~ '%cd%'::text)
+         ->  Hash
+               ->  Seq Scan on t5_1
+                     Filter: (alabel ~~ '%ab%'::text)
+(34 rows)
+
+-- unable to extract non-partitioned right relation
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Right Join
+   Hash Cond: (prt5.a = t5_1.aid)
+   Join Filter: (t5_1.alabel ~~ '%abc%'::text)
+   ->  Append
+         ->  Seq Scan on prt5_p0 prt5_1
+         ->  Seq Scan on prt5_p1 prt5_2
+         ->  Seq Scan on prt5_p2 prt5_3
+   ->  Hash
+         ->  Seq Scan on t5_1
+(9 rows)
+
+-- left side can be extracted, but no cost benefit
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Left Join
+   Hash Cond: (prt5.a = t5_1.aid)
+   ->  Append
+         ->  Seq Scan on prt5_p0 prt5_1
+         ->  Seq Scan on prt5_p1 prt5_2
+         ->  Seq Scan on prt5_p2 prt5_3
+   ->  Hash
+         ->  Seq Scan on t5_1
+               Filter: (alabel ~~ '%abc%'::text)
+(9 rows)
+
+-- validation of the results with/without asymmetric partition-wise join
+SELECT * INTO pg_temp.result01a
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+SELECT * INTO pg_temp.result02a
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+SET enable_partitionwise_join = off;
+SELECT * INTO pg_temp.result01b
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+SELECT * INTO pg_temp.result02b
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b;
+ hkey | a | b | aid | alabel 
+------+---+---+-----+--------
+(0 rows)
+
+SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b;
+ hkey | a | b | aid | alabel | bid | blabel 
+------+---+---+-----+--------+-----+--------
+(0 rows)
+
+RESET max_parallel_workers_per_gather;
+SET enable_partitionwise_join = true;
+-- Parameterized path examples.
+CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prta1 (id);
+CREATE UNIQUE INDEX ON prta2 (id);
+INSERT INTO prta (id, payload)
+       (SELECT *, ('abc' || id)::text AS payload
+               FROM generate_series(1,1) AS id);
+CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prtb1 (id);
+CREATE UNIQUE INDEX ON prtb2 (id);
+INSERT INTO prtb (id, payload)
+    (SELECT *, ('def' || id)::text AS payload
+               FROM generate_series(1,1000) AS id);
+CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1);
+INSERT INTO e (id, payload)
+       (SELECT *, ('ghi' || id)::text AS payload
+               FROM generate_series(1,1000) AS id);
+CREATE UNIQUE INDEX ON e1 (id);
+CREATE UNIQUE INDEX ON e2 (id);
+ANALYZE prta,prtb,e;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb WHERE prta.id=prtb.id;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Append
+   ->  Nested Loop
+         ->  Seq Scan on prta1 prta_1
+         ->  Index Scan using prtb1_id_idx on prtb1 prtb_1
+               Index Cond: (id = prta_1.id)
+   ->  Nested Loop
+         ->  Seq Scan on prta2 prta_2
+         ->  Index Scan using prtb2_id_idx on prtb2 prtb_2
+               Index Cond: (id = prta_2.id)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Append
+   ->  Nested Loop
+         Join Filter: (prta_1.id = e_1.id)
+         ->  Nested Loop
+               ->  Seq Scan on prta1 prta_1
+               ->  Index Scan using prtb1_id_idx on prtb1 prtb_1
+                     Index Cond: (id = prta_1.id)
+         ->  Index Scan using e1_id_idx on e1 e_1
+               Index Cond: (id = prtb_1.id)
+   ->  Nested Loop
+         Join Filter: (prta_2.id = e_2.id)
+         ->  Nested Loop
+               ->  Seq Scan on prta2 prta_2
+               ->  Index Scan using prtb2_id_idx on prtb2 prtb_2
+                     Index Cond: (id = prta_2.id)
+         ->  Index Scan using e2_id_idx on e2 e_2
+               Index Cond: (id = prtb_2.id)
+(17 rows)
+
 -- semi join
 EXPLAIN (COSTS OFF)
 SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE 
t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a;
diff --git a/src/test/regress/sql/partition_join.sql 
b/src/test/regress/sql/partition_join.sql
index d97b5b69ff..51b81ce3f8 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -536,6 +536,132 @@ EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a 
= t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b;
 SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a 
= t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b;
 
+--
+-- For asymmetric partition-wise join
+--
+CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey);
+CREATE TABLE prt5_p0 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 0);
+CREATE TABLE prt5_p1 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 1);
+CREATE TABLE prt5_p2 PARTITION OF prt5
+                     FOR VALUES WITH (modulus 3, remainder 2);
+CREATE TABLE prt6 (aid int, alabel text) PARTITION BY HASH(aid);
+CREATE TABLE prt6_p0 PARTITION OF prt6
+                     FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prt6_p1 PARTITION OF prt6
+                     FOR VALUES WITH (modulus 2, remainder 1);
+CREATE TABLE t5_1 (aid int, alabel text);
+CREATE TABLE t5_2 (bid int, blabel text);
+
+INSERT INTO prt5 (SELECT x, (1000.0 * random())::int,
+                            (1000.0 * random())::int
+                    FROM generate_series(1,1000000) x);
+INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 800) x);
+INSERT INTO prt6 (SELECT * FROM t5_1);
+
+VACUUM ANALYZE prt5,prt6,t5_1,t5_2;
+
+SET max_parallel_workers_per_gather = 0;
+
+-- Trivial asymmetric JOIN of partitioned table with a relation
+EXPLAIN (COSTS OFF)
+SELECT *
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+-- The same, but appended with UNION ALL
+EXPLAIN (COSTS OFF)
+SELECT * FROM (
+       (SELECT * FROM prt5_p0)
+       UNION ALL
+       (SELECT * FROM prt5_p1)
+       UNION ALL
+       (SELECT * FROM prt5_p2)
+       ) AS sq1
+JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+-- Don't allow asymmetric JOIN of two partitioned tables.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%';
+
+-- Check asymmetric JOIN with Subquery
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 JOIN (
+       SELECT * FROM prt6 LIMIT 1000
+) AS sq1 ON a = aid AND alabel like '%abc%';
+
+EXPLAIN (COSTS OFF)
+SELECT *
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+-- unable to extract non-partitioned right relation
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+-- left side can be extracted, but no cost benefit
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+-- validation of the results with/without asymmetric partition-wise join
+SELECT * INTO pg_temp.result01a
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+SELECT * INTO pg_temp.result02a
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+SET enable_partitionwise_join = off;
+
+SELECT * INTO pg_temp.result01b
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%';
+
+SELECT * INTO pg_temp.result02b
+  FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%'
+            JOIN t5_2 ON b = bid AND blabel like '%cd%';
+
+SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b;
+SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b;
+
+RESET max_parallel_workers_per_gather;
+SET enable_partitionwise_join = true;
+
+-- Parameterized path examples.
+CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prta1 (id);
+CREATE UNIQUE INDEX ON prta2 (id);
+INSERT INTO prta (id, payload)
+       (SELECT *, ('abc' || id)::text AS payload
+               FROM generate_series(1,1) AS id);
+
+CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1);
+CREATE UNIQUE INDEX ON prtb1 (id);
+CREATE UNIQUE INDEX ON prtb2 (id);
+INSERT INTO prtb (id, payload)
+    (SELECT *, ('def' || id)::text AS payload
+               FROM generate_series(1,1000) AS id);
+
+CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id);
+CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0);
+CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1);
+INSERT INTO e (id, payload)
+       (SELECT *, ('ghi' || id)::text AS payload
+               FROM generate_series(1,1000) AS id);
+CREATE UNIQUE INDEX ON e1 (id);
+CREATE UNIQUE INDEX ON e2 (id);
+
+ANALYZE prta,prtb,e;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb WHERE prta.id=prtb.id;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id;
+
 -- semi join
 EXPLAIN (COSTS OFF)
 SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE 
t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a;
-- 
2.31.1

Reply via email to