On 19 January 2017 at 11:06, David Rowley <[email protected]> wrote:
> Old patch no longer applies, so I've attached a rebased patch. This
> also re-adds a comment line which I mistakenly removed.
(meanwhile Andres commits 69f4b9c)
I should've waited a bit longer.
Here's another that fixes the new conflicts.
--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f9fb276..239f78b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1312,6 +1312,26 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->verbose)
show_plan_tlist(planstate, ancestors, es);
+ /* unique join */
+ if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ switch (nodeTag(plan))
+ {
+ case T_NestLoop:
+ case T_MergeJoin:
+ case T_HashJoin:
+ {
+ const char *value = ((Join *) plan)->inner_unique ? "Yes" : "No";
+ ExplainPropertyText("Inner Unique", value, es);
+ value = ((Join *) plan)->outer_unique ? "Yes" : "No";
+ ExplainPropertyText("Outer Unique", value, es);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
/* quals, sort keys, etc */
switch (nodeTag(plan))
{
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index b41e4e2..5b75b64 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -306,10 +306,10 @@ ExecHashJoin(HashJoinState *node)
}
/*
- * In a semijoin, we'll consider returning the first
- * match, but after that we're done with this outer tuple.
+ * Skip to the next outer tuple if we only need one inner
+ * tuple to match.
*/
- if (node->js.jointype == JOIN_SEMI)
+ if (node->js.match_first_inner_tuple_only)
node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (otherqual == NIL ||
@@ -453,6 +453,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
hjstate->js.ps.state = estate;
/*
+ * When the planner was able to determine that the inner side of the join
+ * will at most contain a single tuple for each outer tuple, then we can
+ * optimize the join by skipping to the next outer tuple after we find the
+ * first matching inner tuple.
+ */
+ hjstate->js.match_first_inner_tuple_only = node->join.inner_unique;
+
+ /*
* Miscellaneous initialization
*
* create expression context for node
@@ -498,8 +506,11 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
- case JOIN_INNER:
case JOIN_SEMI:
+ /* for semi joins we match to the first inner tuple only */
+ hjstate->js.match_first_inner_tuple_only = true;
+ /* fall through */
+ case JOIN_INNER:
break;
case JOIN_LEFT:
case JOIN_ANTI:
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 2fd1856..378706f 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -840,10 +840,10 @@ ExecMergeJoin(MergeJoinState *node)
}
/*
- * In a semijoin, we'll consider returning the first
- * match, but after that we're done with this outer tuple.
+ * Skip to the next outer tuple if we only need one inner
+ * tuple to match.
*/
- if (node->js.jointype == JOIN_SEMI)
+ if (node->js.match_first_inner_tuple_only)
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
qualResult = (otherqual == NIL ||
@@ -1487,6 +1487,15 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
mergestate->js.ps.state = estate;
/*
+ * When the planner was able to determine that the inner or outer side of
+ * the join will at most contain a single tuple for the opposing side of
+ * the join, then we can optimize the merge join by skipping to the next
+ * tuple since we know there are no more to match.
+ */
+ mergestate->js.match_first_inner_tuple_only = node->join.inner_unique;
+ mergestate->js.match_first_outer_tuple_only = node->join.outer_unique;
+
+ /*
* Miscellaneous initialization
*
* create expression context for node
@@ -1537,7 +1546,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
* only if eflags doesn't specify REWIND.
*/
if (IsA(innerPlan(node), Material) &&
- (eflags & EXEC_FLAG_REWIND) == 0)
+ (eflags & EXEC_FLAG_REWIND) == 0 &&
+ !node->join.outer_unique)
mergestate->mj_ExtraMarks = true;
else
mergestate->mj_ExtraMarks = false;
@@ -1553,8 +1563,11 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
switch (node->join.jointype)
{
- case JOIN_INNER:
case JOIN_SEMI:
+ /* for semi joins we match to the first inner tuple only */
+ mergestate->js.match_first_inner_tuple_only = true;
+ /* fall through */
+ case JOIN_INNER:
mergestate->mj_FillOuter = false;
mergestate->mj_FillInner = false;
break;
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index e058427..4345e72 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -247,10 +247,10 @@ ExecNestLoop(NestLoopState *node)
}
/*
- * In a semijoin, we'll consider returning the first match, but
- * after that we're done with this outer tuple.
+ * Skip to the next outer tuple if we only need 1 inner tuple to
+ * match.
*/
- if (node->js.jointype == JOIN_SEMI)
+ if (node->js.match_first_inner_tuple_only)
node->nl_NeedNewOuter = true;
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
@@ -311,6 +311,14 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
nlstate->js.ps.state = estate;
/*
+ * When the planner was able to determine that the inner side of the join
+ * will at most contain a single tuple for each outer tuple, then we can
+ * optimize the join by skipping to the next outer tuple after we find the
+ * first matching inner tuple.
+ */
+ nlstate->js.match_first_inner_tuple_only = node->join.inner_unique;
+
+ /*
* Miscellaneous initialization
*
* create expression context for node
@@ -354,8 +362,11 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
switch (node->join.jointype)
{
- case JOIN_INNER:
case JOIN_SEMI:
+ /* for semi joins we match to the first inner tuple only */
+ nlstate->js.match_first_inner_tuple_only = true;
+ /* fall through */
+ case JOIN_INNER:
break;
case JOIN_LEFT:
case JOIN_ANTI:
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index f871e9d..b85b51c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2104,6 +2104,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from)
COPY_SCALAR_FIELD(jointype);
COPY_SCALAR_FIELD(lhs_strict);
COPY_SCALAR_FIELD(delay_upper_joins);
+ COPY_SCALAR_FIELD(inner_unique);
COPY_SCALAR_FIELD(semi_can_btree);
COPY_SCALAR_FIELD(semi_can_hash);
COPY_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 78ed3c7..efc3e43 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -853,6 +853,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
COMPARE_SCALAR_FIELD(jointype);
COMPARE_SCALAR_FIELD(lhs_strict);
COMPARE_SCALAR_FIELD(delay_upper_joins);
+ COMPARE_SCALAR_FIELD(inner_unique);
COMPARE_SCALAR_FIELD(semi_can_btree);
COMPARE_SCALAR_FIELD(semi_can_hash);
COMPARE_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1560ac3..8dd0633 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2327,6 +2327,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(lhs_strict);
WRITE_BOOL_FIELD(delay_upper_joins);
+ WRITE_BOOL_FIELD(inner_unique);
WRITE_BOOL_FIELD(semi_can_btree);
WRITE_BOOL_FIELD(semi_can_hash);
WRITE_NODE_FIELD(semi_operators);
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7c30ec6..49a1fb4 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -19,6 +19,7 @@
#include "executor/executor.h"
#include "foreign/fdwapi.h"
#include "optimizer/cost.h"
+#include "optimizer/planmain.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -103,6 +104,17 @@ add_paths_to_joinrel(PlannerInfo *root,
extra.param_source_rels = NULL;
/*
+ * Check for proofs which prove that this inner relation cannot cause
+ * duplicate of outer side tuples.
+ */
+ extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
+ jointype, sjinfo, restrictlist);
+
+ /* as above but with relations swapped */
+ extra.outer_unique = innerrel_is_unique(root, innerrel, outerrel,
+ jointype, sjinfo, restrictlist);
+
+ /*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin may be our only
* way of implementing a full outer join, so override enable_mergejoin if
@@ -330,11 +342,9 @@ try_nestloop_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- extra->sjinfo,
- &extra->semifactors,
+ extra,
outer_path,
inner_path,
- extra->restrictlist,
pathkeys,
required_outer));
}
@@ -392,11 +402,9 @@ try_partial_nestloop_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- extra->sjinfo,
- &extra->semifactors,
+ extra,
outer_path,
inner_path,
- extra->restrictlist,
pathkeys,
NULL));
}
@@ -463,10 +471,9 @@ try_mergejoin_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- extra->sjinfo,
+ extra,
outer_path,
inner_path,
- extra->restrictlist,
pathkeys,
required_outer,
mergeclauses,
@@ -528,11 +535,9 @@ try_hashjoin_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- extra->sjinfo,
- &extra->semifactors,
+ extra,
outer_path,
inner_path,
- extra->restrictlist,
required_outer,
hashclauses));
}
@@ -590,11 +595,9 @@ try_partial_hashjoin_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- extra->sjinfo,
- &extra->semifactors,
+ extra,
outer_path,
inner_path,
- extra->restrictlist,
NULL,
hashclauses));
}
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 438baf1..81a0a75 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -34,6 +34,8 @@
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool specialjoin_is_unique_join(PlannerInfo *root,
+ SpecialJoinInfo *sjinfo);
static void remove_rel_from_query(PlannerInfo *root, int relid,
Relids joinrelids);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -41,6 +43,36 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
+static bool is_innerrel_unique_for(PlannerInfo *root,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist);
+
+
+/*
+ * mark_unique_joins
+ * Check for proofs on each left joined relation which prove that the
+ * join can't cause dupliction of RHS tuples.
+ */
+void
+mark_unique_joins(PlannerInfo *root)
+{
+ ListCell *lc;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ /*
+ * Currently we're only interested in LEFT JOINs. If we can prove
+ * these to have a unique inner side, based on the join condition then
+ * this can save the executor from having to attempt fruitless
+ * searches for subsequent matching outer tuples.
+ */
+ if (sjinfo->jointype == JOIN_LEFT && !sjinfo->inner_unique)
+ sjinfo->inner_unique = specialjoin_is_unique_join(root, sjinfo);
+ }
+}
/*
@@ -95,6 +127,15 @@ restart:
root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
/*
+ * mark_unique_joins may have been previously unable to mark a join as
+ * unique due to there being multiple relations on the RHS of the
+ * join. We may have just removed a relation so that there's now just
+ * a singleton relation, so let's try again to mark the joins as
+ * unique.
+ */
+ mark_unique_joins(root);
+
+ /*
* Restart the scan. This is necessary to ensure we find all
* removable joins independently of ordering of the join_info_list
* (note that removal of attr_needed bits may make a join appear
@@ -156,31 +197,22 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
int innerrelid;
RelOptInfo *innerrel;
Relids joinrelids;
- List *clause_list = NIL;
- ListCell *l;
int attroff;
+ ListCell *l;
/*
- * Must be a non-delaying left join to a single baserel, else we aren't
- * going to be able to do anything with it.
+ * Join must have a unique inner side and must be a non-delaying join to a
+ * single baserel, else we aren't going to be able to do anything with it.
*/
- if (sjinfo->jointype != JOIN_LEFT ||
+ if (!sjinfo->inner_unique ||
sjinfo->delay_upper_joins)
return false;
if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
- return false;
+ return false; /* should not happen */
innerrel = find_base_rel(root, innerrelid);
- /*
- * Before we go to the effort of checking whether any innerrel variables
- * are needed above the join, make a quick check to eliminate cases in
- * which we will surely be unable to prove uniqueness of the innerrel.
- */
- if (!rel_supports_distinctness(root, innerrel))
- return false;
-
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
@@ -190,7 +222,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
*
* Note that this test only detects use of inner-rel attributes in higher
* join conditions and the target list. There might be such attributes in
- * pushed-down conditions at this join, too. We check that case below.
+ * pushed-down conditions at this join too, but in this case the join
+ * would not have been marked as unique.
*
* As a micro-optimization, it seems better to start with max_attr and
* count down rather than starting with min_attr and counting up, on the
@@ -231,6 +264,41 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
return false; /* it does reference innerrel */
}
+ return true;
+}
+
+/*
+ * specialjoin_is_unique_join
+ * True if it can be proved that this special join can only ever match at
+ * most one inner row for any single outer row. False is returned if
+ * there's insufficient evidence to prove the join is unique.
+ */
+static bool
+specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+{
+ int innerrelid;
+ RelOptInfo *innerrel;
+ Relids joinrelids;
+ List *clause_list = NIL;
+ ListCell *lc;
+
+ /* if there's more than one relation involved, then punt */
+ if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+ return false;
+
+ innerrel = find_base_rel(root, innerrelid);
+
+ /*
+ * Before we go to the effort of pulling out the join condition's columns,
+ * make a quick check to eliminate cases in which we will surely be unable
+ * to prove uniqueness of the innerrel.
+ */
+ if (!rel_supports_distinctness(root, innerrel))
+ return false;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
/*
* Search for mergejoinable clauses that constrain the inner rel against
* either the outer rel or a pseudoconstant. If an operator is
@@ -238,9 +306,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
* it's what we want. The mergejoinability test also eliminates clauses
* containing volatile functions, which we couldn't depend on.
*/
- foreach(l, innerrel->joininfo)
+ foreach(lc, innerrel->joininfo)
{
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
/*
* If it's not a join clause for this outer join, we can't use it.
@@ -252,10 +320,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
!bms_equal(restrictinfo->required_relids, joinrelids))
{
/*
- * If such a clause actually references the inner rel then join
- * removal has to be disallowed. We have to check this despite
- * the previous attr_needed checks because of the possibility of
- * pushed-down clauses referencing the rel.
+ * If such a clause actually references the inner rel then we
+ * can't say we're unique. (XXX this is confusing conditions for
+ * join removability with conditions for uniqueness, and could
+ * probably stand to be improved. But for the moment, keep on
+ * applying the stricter condition.)
*/
if (bms_is_member(innerrelid, restrictinfo->clause_relids))
return false;
@@ -847,3 +916,168 @@ distinct_col_search(int colno, List *colnos, List *opids)
}
return InvalidOid;
}
+
+
+/*
+ * is_innerrel_unique_for
+ * Determine if this innerrel can, at most, return a single tuple for each
+ * outer tuple, based on the 'restrictlist'.
+ */
+static bool
+is_innerrel_unique_for(PlannerInfo *root,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist)
+{
+ List *clause_list = NIL;
+ ListCell *lc;
+
+ /* Fall out quickly if we certainly can't prove anything */
+ if (restrictlist == NIL ||
+ !rel_supports_distinctness(root, innerrel))
+ return false;
+
+ /*
+ * Search for mergejoinable clauses that constrain the inner rel against
+ * the outer rel. If an operator is mergejoinable then it behaves like
+ * equality for some btree opclass, so it's what we want. The
+ * mergejoinability test also eliminates clauses containing volatile
+ * functions, which we couldn't depend on.
+ */
+ foreach(lc, restrictlist)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Ignore if it's not a mergejoinable clause */
+ if (!restrictinfo->can_join ||
+ restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * Check if clause has the form "outer op inner" or "inner op outer",
+ * and if so mark which side is inner.
+ */
+ if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ innerrel->relids))
+ continue; /* no good for these input relations */
+
+ /* OK, add to list */
+ clause_list = lappend(clause_list, restrictinfo);
+ }
+
+ /* Let rel_is_distinct_for() do the hard work */
+ return rel_is_distinct_for(root, innerrel, clause_list);
+}
+
+/*
+ * innerrel_is_unique
+ * Check for proofs which prove that 'innerrel' can, at most, match a
+ * single tuple in 'outerrel' based on the join condition in
+ * 'restrictlist'.
+ */
+bool
+innerrel_is_unique(PlannerInfo *root,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist)
+{
+ int innerrelid;
+ bool unique = false;
+
+ /* left joins were already checked by mark_unique_joins */
+ if (jointype == JOIN_LEFT)
+ return sjinfo->inner_unique;
+
+ if (!bms_get_singleton_member(innerrel->relids, &innerrelid))
+ return false;
+
+ /*
+ * Any INNER JOINs which can be proven to return at most one inner tuple
+ * for each outer tuple can be said to be unique.
+ */
+ if (jointype == JOIN_INNER)
+ {
+ MemoryContext old_context;
+ ListCell *lc;
+
+ /* can't prove uniqueness for joins with an empty restrictlist */
+ if (restrictlist == NIL)
+ return false;
+
+ /*
+ * First let's query the unique and non-unique caches to see if we've
+ * managed to prove that innerrel is unique for some subset of this
+ * outerrel. We don't need an exact match, as if we have any extra
+ * outerrels than were previously cached, then they can't make the
+ * innerrel any less unique.
+ */
+ foreach(lc, root->unique_rels[innerrelid])
+ {
+ Bitmapset *unique_rels = (Bitmapset *) lfirst(lc);
+
+ if (bms_is_subset(unique_rels, outerrel->relids))
+ return true; /* Success! */
+ }
+
+ /*
+ * We may have previously determined that this outerrel, or some
+ * superset thereof, cannot prove this innerrel to be unique.
+ */
+ foreach(lc, root->non_unique_rels[innerrelid])
+ {
+ Bitmapset *unique_rels = (Bitmapset *) lfirst(lc);
+
+ if (bms_is_subset(outerrel->relids, unique_rels))
+ return false;
+ }
+
+ /* No cached information, so try to make the proof. */
+ if (is_innerrel_unique_for(root, outerrel, innerrel, restrictlist))
+ {
+ unique = true; /* Success! */
+
+ /*
+ * Cache the positive result for future probes, being sure to keep
+ * it in the planner_cxt even if we are working in GEQO.
+ *
+ * Note: one might consider trying to isolate the minimal subset
+ * of the outerrels that proved the innerrel unique. But it's not
+ * worth the trouble, because the planner builds up joinrels
+ * incrementally and so we'll see the minimally sufficient
+ * outerrels before any supersets of them anyway.
+ */
+ old_context = MemoryContextSwitchTo(root->planner_cxt);
+ root->unique_rels[innerrelid] =
+ lappend(root->unique_rels[innerrelid],
+ bms_copy(outerrel->relids));
+ MemoryContextSwitchTo(old_context);
+ }
+ else
+ {
+ /*
+ * None of the join conditions for outerrel proved innerrel
+ * unique, so we can safely reject this outerrel or any subset of
+ * it in future checks.
+ *
+ * However, in normal planning mode, caching this knowledge is
+ * totally pointless; it won't be queried again, because we build
+ * up joinrels from smaller to larger. It is useful in GEQO mode,
+ * where the knowledge can be carried across successive planning
+ * attempts; and it's likely to be useful when using join-search
+ * plugins, too. Hence cache only when join_search_private is
+ * non-NULL. (Yeah, that's a hack, but it seems reasonable.)
+ */
+ if (root->join_search_private)
+ {
+ old_context = MemoryContextSwitchTo(root->planner_cxt);
+ root->non_unique_rels[innerrelid] =
+ lappend(root->non_unique_rels[innerrelid],
+ bms_copy(outerrel->relids));
+ MemoryContextSwitchTo(old_context);
+ }
+ }
+ }
+ return unique;
+}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index fae1f67..8904697 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -206,12 +206,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans);
static NestLoop *make_nestloop(List *tlist,
List *joinclauses, List *otherclauses, List *nestParams,
Plan *lefttree, Plan *righttree,
- JoinType jointype);
+ JoinPath *jpath);
static HashJoin *make_hashjoin(List *tlist,
List *joinclauses, List *otherclauses,
List *hashclauses,
Plan *lefttree, Plan *righttree,
- JoinType jointype);
+ JoinPath *jpath);
static Hash *make_hash(Plan *lefttree,
Oid skewTable,
AttrNumber skewColumn,
@@ -226,7 +226,7 @@ static MergeJoin *make_mergejoin(List *tlist,
int *mergestrategies,
bool *mergenullsfirst,
Plan *lefttree, Plan *righttree,
- JoinType jointype);
+ JoinPath *jpath);
static Sort *make_sort(Plan *lefttree, int numCols,
AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst);
@@ -3532,7 +3532,7 @@ create_nestloop_plan(PlannerInfo *root,
nestParams,
outer_plan,
inner_plan,
- best_path->jointype);
+ best_path);
copy_generic_path_info(&join_plan->join.plan, &best_path->path);
@@ -3835,7 +3835,7 @@ create_mergejoin_plan(PlannerInfo *root,
mergenullsfirst,
outer_plan,
inner_plan,
- best_path->jpath.jointype);
+ &best_path->jpath);
/* Costs of sort and material steps are included in path cost already */
copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
@@ -3975,7 +3975,7 @@ create_hashjoin_plan(PlannerInfo *root,
hashclauses,
outer_plan,
(Plan *) hash_plan,
- best_path->jpath.jointype);
+ &best_path->jpath);
copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
@@ -5112,7 +5112,7 @@ make_nestloop(List *tlist,
List *nestParams,
Plan *lefttree,
Plan *righttree,
- JoinType jointype)
+ JoinPath *jpath)
{
NestLoop *node = makeNode(NestLoop);
Plan *plan = &node->join.plan;
@@ -5121,9 +5121,11 @@ make_nestloop(List *tlist,
plan->qual = otherclauses;
plan->lefttree = lefttree;
plan->righttree = righttree;
- node->join.jointype = jointype;
+ node->join.jointype = jpath->jointype;
node->join.joinqual = joinclauses;
node->nestParams = nestParams;
+ node->join.inner_unique = jpath->inner_unique;
+ node->join.outer_unique = jpath->outer_unique;
return node;
}
@@ -5135,7 +5137,7 @@ make_hashjoin(List *tlist,
List *hashclauses,
Plan *lefttree,
Plan *righttree,
- JoinType jointype)
+ JoinPath *jpath)
{
HashJoin *node = makeNode(HashJoin);
Plan *plan = &node->join.plan;
@@ -5145,8 +5147,10 @@ make_hashjoin(List *tlist,
plan->lefttree = lefttree;
plan->righttree = righttree;
node->hashclauses = hashclauses;
- node->join.jointype = jointype;
+ node->join.jointype = jpath->jointype;
node->join.joinqual = joinclauses;
+ node->join.inner_unique = jpath->inner_unique;
+ node->join.outer_unique = jpath->outer_unique;
return node;
}
@@ -5187,7 +5191,7 @@ make_mergejoin(List *tlist,
bool *mergenullsfirst,
Plan *lefttree,
Plan *righttree,
- JoinType jointype)
+ JoinPath *jpath)
{
MergeJoin *node = makeNode(MergeJoin);
Plan *plan = &node->join.plan;
@@ -5201,8 +5205,10 @@ make_mergejoin(List *tlist,
node->mergeCollations = mergecollations;
node->mergeStrategies = mergestrategies;
node->mergeNullsFirst = mergenullsfirst;
- node->join.jointype = jointype;
+ node->join.jointype = jpath->jointype;
node->join.joinqual = joinclauses;
+ node->join.inner_unique = jpath->inner_unique;
+ node->join.outer_unique = jpath->outer_unique;
return node;
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index c170e96..5993d0b 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1204,6 +1204,8 @@ make_outerjoininfo(PlannerInfo *root,
sjinfo->jointype = jointype;
/* this always starts out false */
sjinfo->delay_upper_joins = false;
+ /* this may be changed later */
+ sjinfo->inner_unique = false;
compute_semijoin_info(sjinfo, clause);
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index e880759..46dbe76 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -124,6 +124,9 @@ query_planner(PlannerInfo *root, List *tlist,
*/
setup_simple_rel_arrays(root);
+ /* Allocate memory for caching which joins are unique. */
+ setup_unique_join_caches(root);
+
/*
* Construct RelOptInfo nodes for all base relations in query, and
* indirectly for all appendrel member relations ("other rels"). This
@@ -185,6 +188,9 @@ query_planner(PlannerInfo *root, List *tlist,
*/
fix_placeholder_input_needed_levels(root);
+ /* check for unique properties on outer joins */
+ mark_unique_joins(root);
+
/*
* Remove any useless outer joins. Ideally this would be done during
* jointree preprocessing, but the necessary information isn't available
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f440875..6ebbcff 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1935,11 +1935,9 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
* 'joinrel' is the join relation.
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_nestloop
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'extra' contains various information about the join
* 'outer_path' is the outer path
* 'inner_path' is the inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'required_outer' is the set of required outer rels
*
@@ -1950,16 +1948,15 @@ create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
List *pathkeys,
Relids required_outer)
{
NestPath *pathnode = makeNode(NestPath);
Relids inner_req_outer = PATH_REQ_OUTER(inner_path);
+ List *restrict_clauses = extra->restrictlist;
/*
* If the inner path is parameterized by the outer, we must drop any
@@ -1995,7 +1992,7 @@ create_nestloop_path(PlannerInfo *root,
joinrel,
outer_path,
inner_path,
- sjinfo,
+ extra->sjinfo,
required_outer,
&restrict_clauses);
pathnode->path.parallel_aware = false;
@@ -2008,8 +2005,10 @@ create_nestloop_path(PlannerInfo *root,
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
+ pathnode->inner_unique = extra->inner_unique;
+ pathnode->outer_unique = extra->outer_unique;
- final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
+ final_cost_nestloop(root, pathnode, workspace, extra->sjinfo, &extra->semifactors);
return pathnode;
}
@@ -2022,10 +2021,9 @@ create_nestloop_path(PlannerInfo *root,
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_mergejoin
- * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'extra' contains various information about the join
* 'outer_path' is the outer path
* 'inner_path' is the inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'required_outer' is the set of required outer rels
* 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
@@ -2038,10 +2036,9 @@ create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
@@ -2049,6 +2046,7 @@ create_mergejoin_path(PlannerInfo *root,
List *innersortkeys)
{
MergePath *pathnode = makeNode(MergePath);
+ List *restrict_clauses = extra->restrictlist;
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
@@ -2058,7 +2056,7 @@ create_mergejoin_path(PlannerInfo *root,
joinrel,
outer_path,
inner_path,
- sjinfo,
+ extra->sjinfo,
required_outer,
&restrict_clauses);
pathnode->jpath.path.parallel_aware = false;
@@ -2071,12 +2069,14 @@ create_mergejoin_path(PlannerInfo *root,
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
+ pathnode->jpath.inner_unique = extra->inner_unique;
+ pathnode->jpath.outer_unique = extra->outer_unique;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
/* pathnode->materialize_inner will be set by final_cost_mergejoin */
- final_cost_mergejoin(root, pathnode, workspace, sjinfo);
+ final_cost_mergejoin(root, pathnode, workspace, extra->sjinfo);
return pathnode;
}
@@ -2088,11 +2088,9 @@ create_mergejoin_path(PlannerInfo *root,
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_hashjoin
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'extra' contains various information about the join
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'required_outer' is the set of required outer rels
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
@@ -2102,15 +2100,14 @@ create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
Relids required_outer,
List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
+ List *restrict_clauses = extra->restrictlist;
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
@@ -2120,7 +2117,7 @@ create_hashjoin_path(PlannerInfo *root,
joinrel,
outer_path,
inner_path,
- sjinfo,
+ extra->sjinfo,
required_outer,
&restrict_clauses);
pathnode->jpath.path.parallel_aware = false;
@@ -2145,10 +2142,12 @@ create_hashjoin_path(PlannerInfo *root,
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
+ pathnode->jpath.inner_unique = extra->inner_unique;
+ pathnode->jpath.outer_unique = extra->outer_unique;
pathnode->path_hashclauses = hashclauses;
/* final_cost_hashjoin will fill in pathnode->num_batches */
- final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
+ final_cost_hashjoin(root, pathnode, workspace, extra->sjinfo, &extra->semifactors);
return pathnode;
}
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index adc1db9..d1bda16 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -81,6 +81,22 @@ setup_simple_rel_arrays(PlannerInfo *root)
}
/*
+ * setup_unique_join_caches
+ * Prepare the arrays we use for caching which joins are proved to be
+ * unique and non-unique.
+ */
+void
+setup_unique_join_caches(PlannerInfo *root)
+{
+ int size = list_length(root->parse->rtable) + 1;
+
+ /* initialize the unique relation cache to all NULLs */
+ root->unique_rels = (List **) palloc0(size * sizeof(List *));
+
+ root->non_unique_rels = (List **) palloc0(size * sizeof(List *));
+}
+
+/*
* build_simple_rel
* Construct a new RelOptInfo for a base relation or 'other' relation.
*/
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 1da1e1f..8f46e72 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1680,6 +1680,17 @@ typedef struct JoinState
PlanState ps;
JoinType jointype;
List *joinqual; /* JOIN quals (in addition to ps.qual) */
+ bool match_first_inner_tuple_only; /* True if we can safely move
+ * to the next outer tuple
+ * after matching first inner
+ * tuple
+ */
+ bool match_first_outer_tuple_only; /* True if we can safely move
+ * to the next inner tuple
+ * after matching first outer
+ * tuple
+ */
+
} JoinState;
/* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f72f7a8..ce2285e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -618,6 +618,8 @@ typedef struct Join
Plan plan;
JoinType jointype;
List *joinqual; /* JOIN quals (in addition to plan.qual) */
+ bool inner_unique;
+ bool outer_unique;
} Join;
/* ----------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 643be54..845af01 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -222,6 +222,19 @@ typedef struct PlannerInfo
List **join_rel_level; /* lists of join-relation RelOptInfos */
int join_cur_level; /* index of list being extended */
+ /*
+ * During the join search we attempt to determine which joins can be
+ * proven to be unique on their inner or outer sides based on the join
+ * condition. This is a rather expensive test to perform, as it requires
+ * checking each relation's unique indexes to see if the relation can, at
+ * most, return one tuple for each opposing tuple in the join. We use this
+ * cache during the join search to record lists of the sets of relations
+ * which both prove, and disprove the uniqueness properties for the relid
+ * indexed by these arrays.
+ */
+ List **unique_rels; /* cache for proven unique rels */
+ List **non_unique_rels; /* cache for proven non-unique rels */
+
List *init_plans; /* init SubPlans for query */
List *cte_plan_ids; /* per-CTE-item list of subplan IDs */
@@ -1217,6 +1230,11 @@ typedef struct JoinPath
List *joinrestrictinfo; /* RestrictInfos to apply to join */
+ bool inner_unique; /* inner side of join matches no more than one
+ * outer side tuple */
+ bool outer_unique; /* outer side of join matches no more than one
+ * inner side tuple */
+
/*
* See the notes for RelOptInfo and ParamPathInfo to understand why
* joinrestrictinfo is needed in JoinPath, and can't be merged into the
@@ -1810,6 +1828,8 @@ typedef struct SpecialJoinInfo
JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */
bool lhs_strict; /* joinclause is strict for some LHS rel */
bool delay_upper_joins; /* can't commute with upper RHS */
+ bool inner_unique; /* inner side of join matches no more than one
+ * outer side tuple */
/* Remaining fields are set only for JOIN_SEMI jointype: */
bool semi_can_btree; /* true if semi_operators are all btree */
bool semi_can_hash; /* true if semi_operators are all hash */
@@ -2042,6 +2062,10 @@ typedef struct SemiAntiJoinFactors
* sjinfo is extra info about special joins for selectivity estimation
* semifactors is as shown above (only valid for SEMI or ANTI joins)
* param_source_rels are OK targets for parameterization of result paths
+ * inner_unique marks if the inner side of join matches no more than one outer
+ * side tuple
+ * outer_unique marks if the outer side of join matches no more than one inner
+ * side tuple
*/
typedef struct JoinPathExtraData
{
@@ -2050,6 +2074,8 @@ typedef struct JoinPathExtraData
SpecialJoinInfo *sjinfo;
SemiAntiJoinFactors semifactors;
Relids param_source_rels;
+ bool inner_unique;
+ bool outer_unique;
} JoinPathExtraData;
/*
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 7b41317..5ce410c 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -102,11 +102,9 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
List *pathkeys,
Relids required_outer);
@@ -114,10 +112,9 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
@@ -128,11 +125,9 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
JoinCostWorkspace *workspace,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
+ JoinPathExtraData *extra,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
Relids required_outer,
List *hashclauses);
@@ -238,6 +233,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path,
* prototypes for relnode.c
*/
extern void setup_simple_rel_arrays(PlannerInfo *root);
+extern void setup_unique_join_caches(PlannerInfo *root);
extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid,
RelOptKind reloptkind);
extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 94ef84b..128753a 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -102,10 +102,15 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
/*
* prototypes for plan/analyzejoins.c
*/
+extern void mark_unique_joins(PlannerInfo *root);
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
-
+extern bool innerrel_is_unique(PlannerInfo *root, RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
/*
* prototypes for plan/setrefs.c
*/
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0ff8062..7604d20 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -381,6 +381,8 @@ order by 1, 2;
Sort Key: s1.s1, s2.s2
-> Nested Loop
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+ Inner Unique: No
+ Outer Unique: No
-> Function Scan on pg_catalog.generate_series s1
Output: s1.s1
Function Call: generate_series(1, 3)
@@ -390,7 +392,7 @@ order by 1, 2;
-> Function Scan on pg_catalog.generate_series s2
Output: s2.s2
Function Call: generate_series(1, 3)
-(14 rows)
+(16 rows)
select s1, s2, sm
from generate_series(1, 3) s1,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d9bbae0..233c796 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3362,6 +3362,8 @@ using (join_key);
--------------------------------------------------------------------------
Nested Loop Left Join
Output: "*VALUES*".column1, i1.f1, (666)
+ Inner Unique: No
+ Outer Unique: No
Join Filter: ("*VALUES*".column1 = i1.f1)
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
@@ -3369,12 +3371,14 @@ using (join_key);
Output: i1.f1, (666)
-> Nested Loop Left Join
Output: i1.f1, 666
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int4_tbl i1
Output: i1.f1
-> Index Only Scan using tenk1_unique2 on public.tenk1 i2
Output: i2.unique2
Index Cond: (i2.unique2 = i1.f1)
-(14 rows)
+(18 rows)
select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
(values (0),(1)) foo1(join_key)
@@ -3412,9 +3416,13 @@ select t1.* from
----------------------------------------------------------------------
Hash Left Join
Output: t1.f1
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
@@ -3422,9 +3430,13 @@ select t1.* from
Output: i8.q2
-> Hash Right Join
Output: i8.q2
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: ((NULL::integer) = i8b1.q2)
-> Hash Left Join
Output: i8.q2, (NULL::integer)
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8.q1 = i8b2.q1)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
@@ -3440,7 +3452,7 @@ select t1.* from
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
-(30 rows)
+(38 rows)
select t1.* from
text_tbl t1
@@ -3473,9 +3485,13 @@ select t1.* from
----------------------------------------------------------------------------
Hash Left Join
Output: t1.f1
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
@@ -3483,12 +3499,18 @@ select t1.* from
Output: i8.q2
-> Hash Right Join
Output: i8.q2
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: ((NULL::integer) = i8b1.q2)
-> Hash Right Join
Output: i8.q2, (NULL::integer)
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8b2.q1 = i8.q1)
-> Nested Loop
Output: i8b2.q1, NULL::integer
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl i8b2
Output: i8b2.q1, i8b2.q2
-> Materialize
@@ -3505,7 +3527,7 @@ select t1.* from
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
-(34 rows)
+(44 rows)
select t1.* from
text_tbl t1
@@ -3539,9 +3561,13 @@ select t1.* from
----------------------------------------------------------------------------
Hash Left Join
Output: t1.f1
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8.q2 = i4.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q2
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (t1.f1 = '***'::text)
-> Seq Scan on public.text_tbl t1
Output: t1.f1
@@ -3549,12 +3575,18 @@ select t1.* from
Output: i8.q2
-> Hash Right Join
Output: i8.q2
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: ((NULL::integer) = i8b1.q2)
-> Hash Right Join
Output: i8.q2, (NULL::integer)
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8b2.q1 = i8.q1)
-> Hash Join
Output: i8b2.q1, NULL::integer
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i8b2.q1 = i4b2.f1)
-> Seq Scan on public.int8_tbl i8b2
Output: i8b2.q1, i8b2.q2
@@ -3574,7 +3606,7 @@ select t1.* from
Output: i4.f1
-> Seq Scan on public.int4_tbl i4
Output: i4.f1
-(37 rows)
+(47 rows)
select t1.* from
text_tbl t1
@@ -3606,14 +3638,20 @@ select * from
--------------------------------------------------------
Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.text_tbl t2
Output: t2.f1
-> Materialize
Output: i8.q1, i8.q2, i4.f1, t1.f1
-> Nested Loop
Output: i8.q1, i8.q2, i4.f1, t1.f1
+ Inner Unique: No
+ Outer Unique: No
-> Nested Loop Left Join
Output: i8.q1, i8.q2, i4.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (i8.q1 = i4.f1)
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
@@ -3623,7 +3661,7 @@ select * from
-> Seq Scan on public.text_tbl t1
Output: t1.f1
Filter: (t1.f1 = 'doh!'::text)
-(19 rows)
+(25 rows)
select * from
text_tbl t1
@@ -3653,9 +3691,13 @@ where t1.f1 = ss.f1;
--------------------------------------------------
Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (t1.f1 = t2.f1)
-> Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
@@ -3667,7 +3709,7 @@ where t1.f1 = ss.f1;
Output: (i8.q1), t2.f1
-> Seq Scan on public.text_tbl t2
Output: i8.q1, t2.f1
-(16 rows)
+(20 rows)
select * from
text_tbl t1
@@ -3692,11 +3734,17 @@ where t1.f1 = ss2.f1;
-------------------------------------------------------------------
Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (t1.f1 = (t2.f1))
-> Nested Loop
Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+ Inner Unique: No
+ Outer Unique: No
-> Nested Loop Left Join
Output: t1.f1, i8.q1, i8.q2
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.text_tbl t1
Output: t1.f1
-> Materialize
@@ -3712,7 +3760,7 @@ where t1.f1 = ss2.f1;
Output: ((i8.q1)), (t2.f1)
-> Seq Scan on public.text_tbl t3
Output: (i8.q1), t2.f1
-(22 rows)
+(28 rows)
select * from
text_tbl t1
@@ -3738,10 +3786,16 @@ where tt1.f1 = ss1.c0;
----------------------------------------------------------
Nested Loop
Output: 1
+ Inner Unique: No
+ Outer Unique: No
-> Nested Loop Left Join
Output: tt1.f1, tt4.f1
+ Inner Unique: No
+ Outer Unique: No
-> Nested Loop
Output: tt1.f1
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.text_tbl tt1
Output: tt1.f1
Filter: (tt1.f1 = 'foo'::text)
@@ -3751,6 +3805,8 @@ where tt1.f1 = ss1.c0;
Output: tt4.f1
-> Nested Loop Left Join
Output: tt4.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (tt3.f1 = tt4.f1)
-> Seq Scan on public.text_tbl tt3
Output: tt3.f1
@@ -3765,7 +3821,7 @@ where tt1.f1 = ss1.c0;
Output: (tt4.f1)
-> Seq Scan on public.text_tbl tt5
Output: tt4.f1
-(29 rows)
+(37 rows)
select 1 from
text_tbl as tt1
@@ -3795,13 +3851,21 @@ where ss1.c2 = 0;
------------------------------------------------------------------------
Nested Loop
Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+ Inner Unique: No
+ Outer Unique: No
-> Hash Join
Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i41.f1 = i42.f1)
-> Nested Loop
Output: i8.q1, i8.q2, i43.f1, i41.f1
+ Inner Unique: No
+ Outer Unique: No
-> Nested Loop
Output: i8.q1, i8.q2, i43.f1
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl i8
Output: i8.q1, i8.q2
Filter: (i8.q1 = 0)
@@ -3818,7 +3882,7 @@ where ss1.c2 = 0;
Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
-> Seq Scan on public.text_tbl
Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42)
-(25 rows)
+(33 rows)
select ss2.* from
int4_tbl i41
@@ -3906,6 +3970,8 @@ explain (verbose, costs off)
---------------------------------------------------------
Merge Left Join
Output: a.q2, b.q1
+ Inner Unique: No
+ Outer Unique: No
Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint)))
Filter: (COALESCE(b.q1, '1'::bigint) > 0)
-> Sort
@@ -3918,7 +3984,7 @@ explain (verbose, costs off)
Sort Key: (COALESCE(b.q1, '1'::bigint))
-> Seq Scan on public.int8_tbl b
Output: b.q1, COALESCE(b.q1, '1'::bigint)
-(14 rows)
+(16 rows)
select a.q2, b.q1
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
@@ -4801,12 +4867,14 @@ select * from
------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, b.q2, (a.q2)
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2, a.q2
Filter: (a.q2 = b.q1)
-(7 rows)
+(9 rows)
select * from
int8_tbl a left join
@@ -4833,12 +4901,14 @@ select * from
------------------------------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, '42'::bigint))
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2, COALESCE(a.q2, '42'::bigint)
Filter: (a.q2 = b.q1)
-(7 rows)
+(9 rows)
select * from
int8_tbl a left join
@@ -4866,6 +4936,8 @@ select * from int4_tbl i left join
-------------------------------------------
Hash Left Join
Output: i.f1, j.f1
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (i.f1 = j.f1)
-> Seq Scan on public.int4_tbl i
Output: i.f1
@@ -4873,7 +4945,7 @@ select * from int4_tbl i left join
Output: j.f1
-> Seq Scan on public.int2_tbl j
Output: j.f1
-(9 rows)
+(11 rows)
select * from int4_tbl i left join
lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
@@ -4893,12 +4965,14 @@ select * from int4_tbl i left join
-------------------------------------
Nested Loop Left Join
Output: i.f1, (COALESCE(i.*))
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int4_tbl i
Output: i.f1, i.*
-> Seq Scan on public.int2_tbl j
Output: j.f1, COALESCE(i.*)
Filter: (i.f1 = j.f1)
-(7 rows)
+(9 rows)
select * from int4_tbl i left join
lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
@@ -4920,10 +4994,14 @@ select * from int4_tbl a,
-------------------------------------------------
Nested Loop
Output: a.f1, b.f1, c.q1, c.q2
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int4_tbl a
Output: a.f1
-> Hash Left Join
Output: b.f1, c.q1, c.q2
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (b.f1 = c.q1)
-> Seq Scan on public.int4_tbl b
Output: b.f1
@@ -4932,7 +5010,7 @@ select * from int4_tbl a,
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
Filter: (a.f1 = c.q2)
-(14 rows)
+(18 rows)
select * from int4_tbl a,
lateral (
@@ -4978,16 +5056,20 @@ select * from
-------------------------------------------------------------
Nested Loop Left Join
Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1))
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
-> Nested Loop
Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2
Filter: (a.q2 = b.q1)
-> Seq Scan on public.int8_tbl c
Output: c.q1, c.q2
-(11 rows)
+(15 rows)
select * from
int8_tbl a left join lateral
@@ -5054,13 +5136,21 @@ select * from
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop
Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)))
+ Inner Unique: No
+ Outer Unique: No
-> Hash Right Join
Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+ Inner Unique: No
+ Outer Unique: No
-> Hash Left Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (a.q2 = b.q1)
-> Seq Scan on public.int8_tbl a
Output: a.q1, a.q2
@@ -5076,7 +5166,7 @@ select * from
Output: c.q1, c.q2
-> Result
Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
-(24 rows)
+(32 rows)
-- case that breaks the old ph_may_need optimization
explain (verbose, costs off)
@@ -5094,17 +5184,27 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from
---------------------------------------------------------------------------------------------------------
Nested Loop
Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1)
-> Hash Right Join
Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
+ Inner Unique: No
+ Outer Unique: No
-> Hash Right Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint))
+ Inner Unique: No
+ Outer Unique: No
Hash Cond: (b.q1 = a.q2)
-> Nested Loop
Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint)
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (b.q1 < b2.f1)
-> Seq Scan on public.int8_tbl b
Output: b.q1, b.q2
@@ -5126,7 +5226,7 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from
Output: i.f1
-> Seq Scan on public.int4_tbl i
Output: i.f1
-(34 rows)
+(44 rows)
-- check processing of postponed quals (bug #9041)
explain (verbose, costs off)
@@ -5139,16 +5239,20 @@ select * from
----------------------------------------------
Nested Loop Left Join
Output: (1), (2), (3)
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (((3) = (1)) AND ((3) = (2)))
-> Nested Loop
Output: (1), (2)
+ Inner Unique: No
+ Outer Unique: No
-> Result
Output: 1
-> Result
Output: 2
-> Result
Output: 3
-(11 rows)
+(15 rows)
-- check we don't try to do a unique-ified semijoin with LATERAL
explain (verbose, costs off)
@@ -5161,10 +5265,14 @@ select * from
----------------------------------------------------------------------
Nested Loop
Output: "*VALUES*".column1, "*VALUES*".column2, int4_tbl.f1
+ Inner Unique: No
+ Outer Unique: No
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1, "*VALUES*".column2
-> Nested Loop Semi Join
Output: int4_tbl.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (int4_tbl.f1 = tenk1.unique1)
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
@@ -5173,7 +5281,7 @@ select * from
-> Index Scan using tenk1_unique2 on public.tenk1
Output: tenk1.unique1
Index Cond: (tenk1.unique2 = "*VALUES*".column2)
-(14 rows)
+(18 rows)
select * from
(values (0,9998), (1,1000)) v(id,x),
@@ -5200,10 +5308,14 @@ lateral (select * from int8_tbl t1,
-----------------------------------------------------------------
Nested Loop
Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl t1
Output: t1.q1, t1.q2
-> Nested Loop
Output: "*VALUES*".column1, ss2.q1, ss2.q2
+ Inner Unique: No
+ Outer Unique: No
-> Values Scan on "*VALUES*"
Output: "*VALUES*".column1
-> Subquery Scan on ss2
@@ -5225,7 +5337,7 @@ lateral (select * from int8_tbl t1,
-> Seq Scan on public.int8_tbl t3
Output: t3.q1, t3.q2
Filter: (t3.q2 = $2)
-(27 rows)
+(31 rows)
select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
@@ -5323,3 +5435,255 @@ ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
+--
+-- test planner's ability to mark joins as unique
+--
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+ QUERY PLAN
+-----------------------------------
+ Hash Join
+ Output: j1.id, j2.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Hash Cond: (j1.id = j2.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Hash
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(11 rows)
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+ QUERY PLAN
+-----------------------------------
+ Nested Loop
+ Output: j1.id, j2.id
+ Inner Unique: No
+ Outer Unique: No
+ Join Filter: (j1.id > j2.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Materialize
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(11 rows)
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+ QUERY PLAN
+-----------------------------------
+ Hash Join
+ Output: j1.id, j3.id
+ Inner Unique: No
+ Outer Unique: Yes
+ Hash Cond: (j1.id = j3.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Hash
+ Output: j3.id
+ -> Seq Scan on public.j3
+ Output: j3.id
+(11 rows)
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+ QUERY PLAN
+-----------------------------------
+ Hash Left Join
+ Output: j1.id, j2.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Hash Cond: (j1.id = j2.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Hash
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(11 rows)
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+ QUERY PLAN
+-----------------------------------
+ Hash Left Join
+ Output: j1.id, j2.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Hash Cond: (j2.id = j1.id)
+ -> Seq Scan on public.j2
+ Output: j2.id
+ -> Hash
+ Output: j1.id
+ -> Seq Scan on public.j1
+ Output: j1.id
+(11 rows)
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+ QUERY PLAN
+-----------------------------------
+ Nested Loop
+ Output: j1.id, j2.id
+ Inner Unique: No
+ Outer Unique: No
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Materialize
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(10 rows)
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+ QUERY PLAN
+-----------------------------------
+ Hash Join
+ Output: j1.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Hash Cond: (j1.id = j2.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Hash
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(11 rows)
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+ QUERY PLAN
+-----------------------------------------
+ Nested Loop
+ Output: j1.id, j3.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Join Filter: (j1.id = j3.id)
+ -> Unique
+ Output: j3.id
+ -> Sort
+ Output: j3.id
+ Sort Key: j3.id
+ -> Seq Scan on public.j3
+ Output: j3.id
+ -> Seq Scan on public.j1
+ Output: j1.id
+(14 rows)
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+ QUERY PLAN
+-----------------------------------------
+ Nested Loop
+ Output: j1.id, j3.id
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Join Filter: (j1.id = j3.id)
+ -> Group
+ Output: j3.id
+ Group Key: j3.id
+ -> Sort
+ Output: j3.id
+ Sort Key: j3.id
+ -> Seq Scan on public.j3
+ Output: j3.id
+ -> Seq Scan on public.j1
+ Output: j1.id
+(15 rows)
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+ QUERY PLAN
+-----------------------------------
+ Hash Full Join
+ Output: j1.id, j2.id
+ Inner Unique: No
+ Outer Unique: No
+ Hash Cond: (j1.id = j2.id)
+ -> Seq Scan on public.j1
+ Output: j1.id
+ -> Hash
+ Output: j2.id
+ -> Seq Scan on public.j2
+ Output: j2.id
+(11 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
+-- test a more complex permutations of join conversions
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+ QUERY PLAN
+------------------------------------------
+ Nested Loop
+ Output: j1.id1, j1.id2, j2.id1, j2.id2
+ Inner Unique: No
+ Outer Unique: No
+ Join Filter: (j1.id1 = j2.id1)
+ -> Seq Scan on public.j2
+ Output: j2.id1, j2.id2
+ -> Seq Scan on public.j1
+ Output: j1.id1, j1.id2
+(9 rows)
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+ QUERY PLAN
+----------------------------------------------------------
+ Nested Loop
+ Output: j1.id1, j1.id2, j2.id1, j2.id2
+ Inner Unique: Yes
+ Outer Unique: Yes
+ Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
+ -> Seq Scan on public.j2
+ Output: j2.id1, j2.id2
+ -> Seq Scan on public.j1
+ Output: j1.id1, j1.id2
+(9 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index 79513e4..7e948ed 100644
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -5391,12 +5391,14 @@ select i, a from
-----------------------------------------------------------------
Nested Loop
Output: i.i, (returns_rw_array(1))
+ Inner Unique: No
+ Outer Unique: No
-> Result
Output: returns_rw_array(1)
-> Function Scan on public.consumes_rw_array i
Output: i.i
Function Call: consumes_rw_array((returns_rw_array(1)))
-(7 rows)
+(9 rows)
select i, a from
(select returns_rw_array(1) as a offset 0) ss,
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index 56481de..434de62 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -2010,12 +2010,14 @@ select x from int8_tbl, extractq2(int8_tbl) f(x);
------------------------------------------
Nested Loop
Output: f.x
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl
Output: int8_tbl.q1, int8_tbl.q2
-> Function Scan on f
Output: f.x
Function Call: int8_tbl.q2
-(7 rows)
+(9 rows)
select x from int8_tbl, extractq2(int8_tbl) f(x);
x
@@ -2036,11 +2038,13 @@ select x from int8_tbl, extractq2_2(int8_tbl) f(x);
-----------------------------------
Nested Loop
Output: ((int8_tbl.*).q2)
+ Inner Unique: No
+ Outer Unique: No
-> Seq Scan on public.int8_tbl
Output: int8_tbl.*
-> Result
Output: (int8_tbl.*).q2
-(6 rows)
+(8 rows)
select x from int8_tbl, extractq2_2(int8_tbl) f(x);
x
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index abd3217..e95c42b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -783,6 +783,8 @@ select * from int4_tbl where
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop Semi Join
Output: int4_tbl.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten)
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
@@ -791,7 +793,7 @@ select * from int4_tbl where
SubPlan 1
-> Index Only Scan using tenk1_unique1 on public.tenk1 a
Output: a.unique1
-(10 rows)
+(12 rows)
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
@@ -811,6 +813,8 @@ select * from int4_tbl o where (f1, f1) in
-------------------------------------------------------------------
Nested Loop Semi Join
Output: o.f1
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (o.f1 = "ANY_subquery".f1)
-> Seq Scan on public.int4_tbl o
Output: o.f1
@@ -828,7 +832,7 @@ select * from int4_tbl o where (f1, f1) in
Group Key: i.f1
-> Seq Scan on public.int4_tbl i
Output: i.f1
-(19 rows)
+(21 rows)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 02fa08e..aa012d4 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -2183,6 +2183,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
Output: '42'::bigint, '47'::bigint
-> Nested Loop
Output: a.ctid, wcte.*
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (a.aa = wcte.q2)
-> Seq Scan on public.a
Output: a.ctid, a.aa
@@ -2190,6 +2192,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
Output: wcte.*, wcte.q2
-> Nested Loop
Output: b.ctid, wcte.*
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (b.aa = wcte.q2)
-> Seq Scan on public.b
Output: b.ctid, b.aa
@@ -2197,6 +2201,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
Output: wcte.*, wcte.q2
-> Nested Loop
Output: c.ctid, wcte.*
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (c.aa = wcte.q2)
-> Seq Scan on public.c
Output: c.ctid, c.aa
@@ -2204,12 +2210,14 @@ DELETE FROM a USING wcte WHERE aa = q2;
Output: wcte.*, wcte.q2
-> Nested Loop
Output: d.ctid, wcte.*
+ Inner Unique: No
+ Outer Unique: No
Join Filter: (d.aa = wcte.q2)
-> Seq Scan on public.d
Output: d.ctid, d.aa
-> CTE Scan on wcte
Output: wcte.*, wcte.q2
-(38 rows)
+(46 rows)
-- error cases
-- data-modifying WITH tries to use its own output
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 97bccec..69e0109 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1730,3 +1730,95 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1)
delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
+
+--
+-- test planner's ability to mark joins as unique
+--
+
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+
+drop table j1;
+drop table j2;
+drop table j3;
+
+-- test a more complex permutations of join conversions
+
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+
+drop table j1;
+drop table j2;
+drop table j3;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers