Re: [HACKERS] Performance improvement for joins where outer side is unique

David Rowley Wed, 18 Jan 2017 15:30:06 -0800

On 19 January 2017 at 11:06, David Rowley <[email protected]> wrote:
> Old patch no longer applies, so I've attached a rebased patch. This
> also re-adds a comment line which I mistakenly removed.


(meanwhile Andres commits 69f4b9c)

I should've waited a bit longer.

Here's another that fixes the new conflicts.

-- 
 David Rowley                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f9fb276..239f78b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1312,6 +1312,26 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	if (es->verbose)
 		show_plan_tlist(planstate, ancestors, es);
 
+	/* unique join */
+	if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		switch (nodeTag(plan))
+		{
+			case T_NestLoop:
+			case T_MergeJoin:
+			case T_HashJoin:
+			{
+				const char *value = ((Join *) plan)->inner_unique ? "Yes" : "No";
+				ExplainPropertyText("Inner Unique", value, es);
+				value =  ((Join *) plan)->outer_unique ? "Yes" : "No";
+				ExplainPropertyText("Outer Unique", value, es);
+				break;
+			}
+			default:
+				break;
+		}
+	}
+
 	/* quals, sort keys, etc */
 	switch (nodeTag(plan))
 	{
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index b41e4e2..5b75b64 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -306,10 +306,10 @@ ExecHashJoin(HashJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * Skip to the next outer tuple if we only need one inner
+					 * tuple to match.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.match_first_inner_tuple_only)
 						node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
 					if (otherqual == NIL ||
@@ -453,6 +453,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	hjstate->js.ps.state = estate;
 
 	/*
+	 * When the planner was able to determine that the inner side of the join
+	 * will at most contain a single tuple for each outer tuple, then we can
+	 * optimize the join by skipping to the next outer tuple after we find the
+	 * first matching inner tuple.
+	 */
+	hjstate->js.match_first_inner_tuple_only = node->join.inner_unique;
+
+	/*
 	 * Miscellaneous initialization
 	 *
 	 * create expression context for node
@@ -498,8 +506,11 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	/* set up null tuples for outer joins, if needed */
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			/* for semi joins we match to the first inner tuple only */
+			hjstate->js.match_first_inner_tuple_only = true;
+			/* fall through */
+		case JOIN_INNER:
 			break;
 		case JOIN_LEFT:
 		case JOIN_ANTI:
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 2fd1856..378706f 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -840,10 +840,10 @@ ExecMergeJoin(MergeJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * Skip to the next outer tuple if we only need one inner
+					 * tuple to match.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.match_first_inner_tuple_only)
 						node->mj_JoinState = EXEC_MJ_NEXTOUTER;
 
 					qualResult = (otherqual == NIL ||
@@ -1487,6 +1487,15 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	mergestate->js.ps.state = estate;
 
 	/*
+	 * When the planner was able to determine that the inner or outer side of
+	 * the join will at most contain a single tuple for the opposing side of
+	 * the join, then we can optimize the merge join by skipping to the next
+	 * tuple since we know there are no more to match.
+	 */
+	mergestate->js.match_first_inner_tuple_only = node->join.inner_unique;
+	mergestate->js.match_first_outer_tuple_only = node->join.outer_unique;
+
+	/*
 	 * Miscellaneous initialization
 	 *
 	 * create expression context for node
@@ -1537,7 +1546,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	 * only if eflags doesn't specify REWIND.
 	 */
 	if (IsA(innerPlan(node), Material) &&
-		(eflags & EXEC_FLAG_REWIND) == 0)
+		(eflags & EXEC_FLAG_REWIND) == 0 &&
+		!node->join.outer_unique)
 		mergestate->mj_ExtraMarks = true;
 	else
 		mergestate->mj_ExtraMarks = false;
@@ -1553,8 +1563,11 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			/* for semi joins we match to the first inner tuple only */
+			mergestate->js.match_first_inner_tuple_only = true;
+			/* fall through */
+		case JOIN_INNER:
 			mergestate->mj_FillOuter = false;
 			mergestate->mj_FillInner = false;
 			break;
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index e058427..4345e72 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -247,10 +247,10 @@ ExecNestLoop(NestLoopState *node)
 			}
 
 			/*
-			 * In a semijoin, we'll consider returning the first match, but
-			 * after that we're done with this outer tuple.
+			 * Skip to the next outer tuple if we only need 1 inner tuple to
+			 * match.
 			 */
-			if (node->js.jointype == JOIN_SEMI)
+			if (node->js.match_first_inner_tuple_only)
 				node->nl_NeedNewOuter = true;
 
 			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
@@ -311,6 +311,14 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 	nlstate->js.ps.state = estate;
 
 	/*
+	 * When the planner was able to determine that the inner side of the join
+	 * will at most contain a single tuple for each outer tuple, then we can
+	 * optimize the join by skipping to the next outer tuple after we find the
+	 * first matching inner tuple.
+	 */
+	nlstate->js.match_first_inner_tuple_only = node->join.inner_unique;
+
+	/*
 	 * Miscellaneous initialization
 	 *
 	 * create expression context for node
@@ -354,8 +362,11 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			/* for semi joins we match to the first inner tuple only */
+			nlstate->js.match_first_inner_tuple_only = true;
+			/* fall through */
+		case JOIN_INNER:
 			break;
 		case JOIN_LEFT:
 		case JOIN_ANTI:
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index f871e9d..b85b51c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2104,6 +2104,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from)
 	COPY_SCALAR_FIELD(jointype);
 	COPY_SCALAR_FIELD(lhs_strict);
 	COPY_SCALAR_FIELD(delay_upper_joins);
+	COPY_SCALAR_FIELD(inner_unique);
 	COPY_SCALAR_FIELD(semi_can_btree);
 	COPY_SCALAR_FIELD(semi_can_hash);
 	COPY_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 78ed3c7..efc3e43 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -853,6 +853,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
 	COMPARE_SCALAR_FIELD(jointype);
 	COMPARE_SCALAR_FIELD(lhs_strict);
 	COMPARE_SCALAR_FIELD(delay_upper_joins);
+	COMPARE_SCALAR_FIELD(inner_unique);
 	COMPARE_SCALAR_FIELD(semi_can_btree);
 	COMPARE_SCALAR_FIELD(semi_can_hash);
 	COMPARE_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1560ac3..8dd0633 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2327,6 +2327,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
 	WRITE_ENUM_FIELD(jointype, JoinType);
 	WRITE_BOOL_FIELD(lhs_strict);
 	WRITE_BOOL_FIELD(delay_upper_joins);
+	WRITE_BOOL_FIELD(inner_unique);
 	WRITE_BOOL_FIELD(semi_can_btree);
 	WRITE_BOOL_FIELD(semi_can_hash);
 	WRITE_NODE_FIELD(semi_operators);
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7c30ec6..49a1fb4 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -19,6 +19,7 @@
 #include "executor/executor.h"
 #include "foreign/fdwapi.h"
 #include "optimizer/cost.h"
+#include "optimizer/planmain.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 
@@ -103,6 +104,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	extra.param_source_rels = NULL;
 
 	/*
+	 * Check for proofs which prove that this inner relation cannot cause
+	 * duplicate of outer side tuples.
+	 */
+	extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
+											jointype, sjinfo, restrictlist);
+
+	/* as above but with relations swapped */
+	extra.outer_unique = innerrel_is_unique(root, innerrel, outerrel,
+											jointype, sjinfo, restrictlist);
+
+	/*
 	 * Find potential mergejoin clauses.  We can skip this if we are not
 	 * interested in doing a mergejoin.  However, mergejoin may be our only
 	 * way of implementing a full outer join, so override enable_mergejoin if
@@ -330,11 +342,9 @@ try_nestloop_path(PlannerInfo *root,
 									  joinrel,
 									  jointype,
 									  &workspace,
-									  extra->sjinfo,
-									  &extra->semifactors,
+									  extra,
 									  outer_path,
 									  inner_path,
-									  extra->restrictlist,
 									  pathkeys,
 									  required_outer));
 	}
@@ -392,11 +402,9 @@ try_partial_nestloop_path(PlannerInfo *root,
 										  joinrel,
 										  jointype,
 										  &workspace,
-										  extra->sjinfo,
-										  &extra->semifactors,
+										  extra,
 										  outer_path,
 										  inner_path,
-										  extra->restrictlist,
 										  pathkeys,
 										  NULL));
 }
@@ -463,10 +471,9 @@ try_mergejoin_path(PlannerInfo *root,
 									   joinrel,
 									   jointype,
 									   &workspace,
-									   extra->sjinfo,
+									   extra,
 									   outer_path,
 									   inner_path,
-									   extra->restrictlist,
 									   pathkeys,
 									   required_outer,
 									   mergeclauses,
@@ -528,11 +535,9 @@ try_hashjoin_path(PlannerInfo *root,
 									  joinrel,
 									  jointype,
 									  &workspace,
-									  extra->sjinfo,
-									  &extra->semifactors,
+									  extra,
 									  outer_path,
 									  inner_path,
-									  extra->restrictlist,
 									  required_outer,
 									  hashclauses));
 	}
@@ -590,11 +595,9 @@ try_partial_hashjoin_path(PlannerInfo *root,
 										  joinrel,
 										  jointype,
 										  &workspace,
-										  extra->sjinfo,
-										  &extra->semifactors,
+										  extra,
 										  outer_path,
 										  inner_path,
-										  extra->restrictlist,
 										  NULL,
 										  hashclauses));
 }
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 438baf1..81a0a75 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -34,6 +34,8 @@
 
 /* local functions */
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool specialjoin_is_unique_join(PlannerInfo *root,
+						   SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 					  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -41,6 +43,36 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
 					List *clause_list);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
+static bool is_innerrel_unique_for(PlannerInfo *root,
+					   RelOptInfo *outerrel,
+					   RelOptInfo *innerrel,
+					   List *restrictlist);
+
+
+/*
+ * mark_unique_joins
+ *		Check for proofs on each left joined relation which prove that the
+ *		join can't cause dupliction of RHS tuples.
+ */
+void
+mark_unique_joins(PlannerInfo *root)
+{
+	ListCell   *lc;
+
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+		/*
+		 * Currently we're only interested in LEFT JOINs. If we can prove
+		 * these to have a unique inner side, based on the join condition then
+		 * this can save the executor from having to attempt fruitless
+		 * searches for subsequent matching outer tuples.
+		 */
+		if (sjinfo->jointype == JOIN_LEFT && !sjinfo->inner_unique)
+			sjinfo->inner_unique = specialjoin_is_unique_join(root, sjinfo);
+	}
+}
 
 
 /*
@@ -95,6 +127,15 @@ restart:
 		root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
 
 		/*
+		 * mark_unique_joins may have been previously unable to mark a join as
+		 * unique due to there being multiple relations on the RHS of the
+		 * join. We may have just removed a relation so that there's now just
+		 * a singleton relation, so let's try again to mark the joins as
+		 * unique.
+		 */
+		mark_unique_joins(root);
+
+		/*
 		 * Restart the scan.  This is necessary to ensure we find all
 		 * removable joins independently of ordering of the join_info_list
 		 * (note that removal of attr_needed bits may make a join appear
@@ -156,31 +197,22 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	int			innerrelid;
 	RelOptInfo *innerrel;
 	Relids		joinrelids;
-	List	   *clause_list = NIL;
-	ListCell   *l;
 	int			attroff;
+	ListCell   *l;
 
 	/*
-	 * Must be a non-delaying left join to a single baserel, else we aren't
-	 * going to be able to do anything with it.
+	 * Join must have a unique inner side and must be a non-delaying join to a
+	 * single baserel, else we aren't going to be able to do anything with it.
 	 */
-	if (sjinfo->jointype != JOIN_LEFT ||
+	if (!sjinfo->inner_unique ||
 		sjinfo->delay_upper_joins)
 		return false;
 
 	if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
-		return false;
+		return false;			/* should not happen */
 
 	innerrel = find_base_rel(root, innerrelid);
 
-	/*
-	 * Before we go to the effort of checking whether any innerrel variables
-	 * are needed above the join, make a quick check to eliminate cases in
-	 * which we will surely be unable to prove uniqueness of the innerrel.
-	 */
-	if (!rel_supports_distinctness(root, innerrel))
-		return false;
-
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 
@@ -190,7 +222,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 *
 	 * Note that this test only detects use of inner-rel attributes in higher
 	 * join conditions and the target list.  There might be such attributes in
-	 * pushed-down conditions at this join, too.  We check that case below.
+	 * pushed-down conditions at this join too, but in this case the join
+	 * would not have been marked as unique.
 	 *
 	 * As a micro-optimization, it seems better to start with max_attr and
 	 * count down rather than starting with min_attr and counting up, on the
@@ -231,6 +264,41 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			return false;		/* it does reference innerrel */
 	}
 
+	return true;
+}
+
+/*
+ * specialjoin_is_unique_join
+ *		True if it can be proved that this special join can only ever match at
+ *		most one inner row for any single outer row. False is returned if
+ *		there's insufficient evidence to prove the join is unique.
+ */
+static bool
+specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+{
+	int			innerrelid;
+	RelOptInfo *innerrel;
+	Relids		joinrelids;
+	List	   *clause_list = NIL;
+	ListCell   *lc;
+
+	/* if there's more than one relation involved, then punt */
+	if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+		return false;
+
+	innerrel = find_base_rel(root, innerrelid);
+
+	/*
+	 * Before we go to the effort of pulling out the join condition's columns,
+	 * make a quick check to eliminate cases in which we will surely be unable
+	 * to prove uniqueness of the innerrel.
+	 */
+	if (!rel_supports_distinctness(root, innerrel))
+		return false;
+
+	/* Compute the relid set for the join we are considering */
+	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
 	/*
 	 * Search for mergejoinable clauses that constrain the inner rel against
 	 * either the outer rel or a pseudoconstant.  If an operator is
@@ -238,9 +306,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * it's what we want.  The mergejoinability test also eliminates clauses
 	 * containing volatile functions, which we couldn't depend on.
 	 */
-	foreach(l, innerrel->joininfo)
+	foreach(lc, innerrel->joininfo)
 	{
-		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
 
 		/*
 		 * If it's not a join clause for this outer join, we can't use it.
@@ -252,10 +320,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			!bms_equal(restrictinfo->required_relids, joinrelids))
 		{
 			/*
-			 * If such a clause actually references the inner rel then join
-			 * removal has to be disallowed.  We have to check this despite
-			 * the previous attr_needed checks because of the possibility of
-			 * pushed-down clauses referencing the rel.
+			 * If such a clause actually references the inner rel then we
+			 * can't say we're unique.  (XXX this is confusing conditions for
+			 * join removability with conditions for uniqueness, and could
+			 * probably stand to be improved.  But for the moment, keep on
+			 * applying the stricter condition.)
 			 */
 			if (bms_is_member(innerrelid, restrictinfo->clause_relids))
 				return false;
@@ -847,3 +916,168 @@ distinct_col_search(int colno, List *colnos, List *opids)
 	}
 	return InvalidOid;
 }
+
+
+/*
+ * is_innerrel_unique_for
+ *	  Determine if this innerrel can, at most, return a single tuple for each
+ *	  outer tuple, based on the 'restrictlist'.
+ */
+static bool
+is_innerrel_unique_for(PlannerInfo *root,
+					   RelOptInfo *outerrel,
+					   RelOptInfo *innerrel,
+					   List *restrictlist)
+{
+	List	   *clause_list = NIL;
+	ListCell   *lc;
+
+	/* Fall out quickly if we certainly can't prove anything */
+	if (restrictlist == NIL ||
+		!rel_supports_distinctness(root, innerrel))
+		return false;
+
+	/*
+	 * Search for mergejoinable clauses that constrain the inner rel against
+	 * the outer rel.  If an operator is mergejoinable then it behaves like
+	 * equality for some btree opclass, so it's what we want.  The
+	 * mergejoinability test also eliminates clauses containing volatile
+	 * functions, which we couldn't depend on.
+	 */
+	foreach(lc, restrictlist)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+		/* Ignore if it's not a mergejoinable clause */
+		if (!restrictinfo->can_join ||
+			restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/*
+		 * Check if clause has the form "outer op inner" or "inner op outer",
+		 * and if so mark which side is inner.
+		 */
+		if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+									 innerrel->relids))
+			continue;			/* no good for these input relations */
+
+		/* OK, add to list */
+		clause_list = lappend(clause_list, restrictinfo);
+	}
+
+	/* Let rel_is_distinct_for() do the hard work */
+	return rel_is_distinct_for(root, innerrel, clause_list);
+}
+
+/*
+ * innerrel_is_unique
+ *	  Check for proofs which prove that 'innerrel' can, at most, match a
+ *	  single tuple in 'outerrel' based on the join condition in
+ *	  'restrictlist'.
+ */
+bool
+innerrel_is_unique(PlannerInfo *root,
+				   RelOptInfo *outerrel,
+				   RelOptInfo *innerrel,
+				   JoinType jointype,
+				   SpecialJoinInfo *sjinfo,
+				   List *restrictlist)
+{
+	int			innerrelid;
+	bool		unique = false;
+
+	/* left joins were already checked by mark_unique_joins */
+	if (jointype == JOIN_LEFT)
+		return sjinfo->inner_unique;
+
+	if (!bms_get_singleton_member(innerrel->relids, &innerrelid))
+		return false;
+
+	/*
+	 * Any INNER JOINs which can be proven to return at most one inner tuple
+	 * for each outer tuple can be said to be unique.
+	 */
+	if (jointype == JOIN_INNER)
+	{
+		MemoryContext old_context;
+		ListCell   *lc;
+
+		/* can't prove uniqueness for joins with an empty restrictlist */
+		if (restrictlist == NIL)
+			return false;
+
+		/*
+		 * First let's query the unique and non-unique caches to see if we've
+		 * managed to prove that innerrel is unique for some subset of this
+		 * outerrel. We don't need an exact match, as if we have any extra
+		 * outerrels than were previously cached, then they can't make the
+		 * innerrel any less unique.
+		 */
+		foreach(lc, root->unique_rels[innerrelid])
+		{
+			Bitmapset  *unique_rels = (Bitmapset *) lfirst(lc);
+
+			if (bms_is_subset(unique_rels, outerrel->relids))
+				return true;		/* Success! */
+		}
+
+		/*
+		 * We may have previously determined that this outerrel, or some
+		 * superset thereof, cannot prove this innerrel to be unique.
+		 */
+		foreach(lc, root->non_unique_rels[innerrelid])
+		{
+			Bitmapset  *unique_rels = (Bitmapset *) lfirst(lc);
+
+			if (bms_is_subset(outerrel->relids, unique_rels))
+				return false;
+		}
+
+		/* No cached information, so try to make the proof. */
+		if (is_innerrel_unique_for(root, outerrel, innerrel, restrictlist))
+		{
+			unique = true;		/* Success! */
+
+			/*
+			 * Cache the positive result for future probes, being sure to keep
+			 * it in the planner_cxt even if we are working in GEQO.
+			 *
+			 * Note: one might consider trying to isolate the minimal subset
+			 * of the outerrels that proved the innerrel unique.  But it's not
+			 * worth the trouble, because the planner builds up joinrels
+			 * incrementally and so we'll see the minimally sufficient
+			 * outerrels before any supersets of them anyway.
+			 */
+			old_context = MemoryContextSwitchTo(root->planner_cxt);
+			root->unique_rels[innerrelid] =
+				lappend(root->unique_rels[innerrelid],
+						bms_copy(outerrel->relids));
+			MemoryContextSwitchTo(old_context);
+		}
+		else
+		{
+			/*
+			 * None of the join conditions for outerrel proved innerrel
+			 * unique, so we can safely reject this outerrel or any subset of
+			 * it in future checks.
+			 *
+			 * However, in normal planning mode, caching this knowledge is
+			 * totally pointless; it won't be queried again, because we build
+			 * up joinrels from smaller to larger.  It is useful in GEQO mode,
+			 * where the knowledge can be carried across successive planning
+			 * attempts; and it's likely to be useful when using join-search
+			 * plugins, too.  Hence cache only when join_search_private is
+			 * non-NULL.  (Yeah, that's a hack, but it seems reasonable.)
+			 */
+			if (root->join_search_private)
+			{
+				old_context = MemoryContextSwitchTo(root->planner_cxt);
+				root->non_unique_rels[innerrelid] =
+					lappend(root->non_unique_rels[innerrelid],
+							bms_copy(outerrel->relids));
+				MemoryContextSwitchTo(old_context);
+			}
+		}
+	}
+	return unique;
+}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index fae1f67..8904697 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -206,12 +206,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
 			  List *joinclauses, List *otherclauses, List *nestParams,
 			  Plan *lefttree, Plan *righttree,
-			  JoinType jointype);
+			  JoinPath *jpath);
 static HashJoin *make_hashjoin(List *tlist,
 			  List *joinclauses, List *otherclauses,
 			  List *hashclauses,
 			  Plan *lefttree, Plan *righttree,
-			  JoinType jointype);
+			  JoinPath *jpath);
 static Hash *make_hash(Plan *lefttree,
 		  Oid skewTable,
 		  AttrNumber skewColumn,
@@ -226,7 +226,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 			   int *mergestrategies,
 			   bool *mergenullsfirst,
 			   Plan *lefttree, Plan *righttree,
-			   JoinType jointype);
+			   JoinPath *jpath);
 static Sort *make_sort(Plan *lefttree, int numCols,
 		  AttrNumber *sortColIdx, Oid *sortOperators,
 		  Oid *collations, bool *nullsFirst);
@@ -3532,7 +3532,7 @@ create_nestloop_plan(PlannerInfo *root,
 							  nestParams,
 							  outer_plan,
 							  inner_plan,
-							  best_path->jointype);
+							  best_path);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->path);
 
@@ -3835,7 +3835,7 @@ create_mergejoin_plan(PlannerInfo *root,
 							   mergenullsfirst,
 							   outer_plan,
 							   inner_plan,
-							   best_path->jpath.jointype);
+							   &best_path->jpath);
 
 	/* Costs of sort and material steps are included in path cost already */
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
@@ -3975,7 +3975,7 @@ create_hashjoin_plan(PlannerInfo *root,
 							  hashclauses,
 							  outer_plan,
 							  (Plan *) hash_plan,
-							  best_path->jpath.jointype);
+							  &best_path->jpath);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
 
@@ -5112,7 +5112,7 @@ make_nestloop(List *tlist,
 			  List *nestParams,
 			  Plan *lefttree,
 			  Plan *righttree,
-			  JoinType jointype)
+			  JoinPath *jpath)
 {
 	NestLoop   *node = makeNode(NestLoop);
 	Plan	   *plan = &node->join.plan;
@@ -5121,9 +5121,11 @@ make_nestloop(List *tlist,
 	plan->qual = otherclauses;
 	plan->lefttree = lefttree;
 	plan->righttree = righttree;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
 	node->nestParams = nestParams;
+	node->join.inner_unique = jpath->inner_unique;
+	node->join.outer_unique = jpath->outer_unique;
 
 	return node;
 }
@@ -5135,7 +5137,7 @@ make_hashjoin(List *tlist,
 			  List *hashclauses,
 			  Plan *lefttree,
 			  Plan *righttree,
-			  JoinType jointype)
+			  JoinPath *jpath)
 {
 	HashJoin   *node = makeNode(HashJoin);
 	Plan	   *plan = &node->join.plan;
@@ -5145,8 +5147,10 @@ make_hashjoin(List *tlist,
 	plan->lefttree = lefttree;
 	plan->righttree = righttree;
 	node->hashclauses = hashclauses;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
+	node->join.inner_unique = jpath->inner_unique;
+	node->join.outer_unique = jpath->outer_unique;
 
 	return node;
 }
@@ -5187,7 +5191,7 @@ make_mergejoin(List *tlist,
 			   bool *mergenullsfirst,
 			   Plan *lefttree,
 			   Plan *righttree,
-			   JoinType jointype)
+			   JoinPath *jpath)
 {
 	MergeJoin  *node = makeNode(MergeJoin);
 	Plan	   *plan = &node->join.plan;
@@ -5201,8 +5205,10 @@ make_mergejoin(List *tlist,
 	node->mergeCollations = mergecollations;
 	node->mergeStrategies = mergestrategies;
 	node->mergeNullsFirst = mergenullsfirst;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
+	node->join.inner_unique = jpath->inner_unique;
+	node->join.outer_unique = jpath->outer_unique;
 
 	return node;
 }
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index c170e96..5993d0b 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1204,6 +1204,8 @@ make_outerjoininfo(PlannerInfo *root,
 	sjinfo->jointype = jointype;
 	/* this always starts out false */
 	sjinfo->delay_upper_joins = false;
+	/* this may be changed later */
+	sjinfo->inner_unique = false;
 
 	compute_semijoin_info(sjinfo, clause);
 
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index e880759..46dbe76 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -124,6 +124,9 @@ query_planner(PlannerInfo *root, List *tlist,
 	 */
 	setup_simple_rel_arrays(root);
 
+	/* Allocate memory for caching which joins are unique. */
+	setup_unique_join_caches(root);
+
 	/*
 	 * Construct RelOptInfo nodes for all base relations in query, and
 	 * indirectly for all appendrel member relations ("other rels").  This
@@ -185,6 +188,9 @@ query_planner(PlannerInfo *root, List *tlist,
 	 */
 	fix_placeholder_input_needed_levels(root);
 
+	/* check for unique properties on outer joins */
+	mark_unique_joins(root);
+
 	/*
 	 * Remove any useless outer joins.  Ideally this would be done during
 	 * jointree preprocessing, but the necessary information isn't available
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f440875..6ebbcff 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1935,11 +1935,9 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
  * 'joinrel' is the join relation.
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_nestloop
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'extra' contains various information about the join
  * 'outer_path' is the outer path
  * 'inner_path' is the inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'pathkeys' are the path keys of the new join path
  * 'required_outer' is the set of required outer rels
  *
@@ -1950,16 +1948,15 @@ create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
-					 SpecialJoinInfo *sjinfo,
-					 SemiAntiJoinFactors *semifactors,
+					 JoinPathExtraData *extra,
 					 Path *outer_path,
 					 Path *inner_path,
-					 List *restrict_clauses,
 					 List *pathkeys,
 					 Relids required_outer)
 {
 	NestPath   *pathnode = makeNode(NestPath);
 	Relids		inner_req_outer = PATH_REQ_OUTER(inner_path);
+	List	   *restrict_clauses = extra->restrictlist;
 
 	/*
 	 * If the inner path is parameterized by the outer, we must drop any
@@ -1995,7 +1992,7 @@ create_nestloop_path(PlannerInfo *root,
 								  joinrel,
 								  outer_path,
 								  inner_path,
-								  sjinfo,
+								  extra->sjinfo,
 								  required_outer,
 								  &restrict_clauses);
 	pathnode->path.parallel_aware = false;
@@ -2008,8 +2005,10 @@ create_nestloop_path(PlannerInfo *root,
 	pathnode->outerjoinpath = outer_path;
 	pathnode->innerjoinpath = inner_path;
 	pathnode->joinrestrictinfo = restrict_clauses;
+	pathnode->inner_unique = extra->inner_unique;
+	pathnode->outer_unique = extra->outer_unique;
 
-	final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
+	final_cost_nestloop(root, pathnode, workspace, extra->sjinfo, &extra->semifactors);
 
 	return pathnode;
 }
@@ -2022,10 +2021,9 @@ create_nestloop_path(PlannerInfo *root,
  * 'joinrel' is the join relation
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_mergejoin
- * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'extra' contains various information about the join
  * 'outer_path' is the outer path
  * 'inner_path' is the inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'pathkeys' are the path keys of the new join path
  * 'required_outer' is the set of required outer rels
  * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
@@ -2038,10 +2036,9 @@ create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
 					  JoinType jointype,
 					  JoinCostWorkspace *workspace,
-					  SpecialJoinInfo *sjinfo,
+					  JoinPathExtraData *extra,
 					  Path *outer_path,
 					  Path *inner_path,
-					  List *restrict_clauses,
 					  List *pathkeys,
 					  Relids required_outer,
 					  List *mergeclauses,
@@ -2049,6 +2046,7 @@ create_mergejoin_path(PlannerInfo *root,
 					  List *innersortkeys)
 {
 	MergePath  *pathnode = makeNode(MergePath);
+	List *restrict_clauses = extra->restrictlist;
 
 	pathnode->jpath.path.pathtype = T_MergeJoin;
 	pathnode->jpath.path.parent = joinrel;
@@ -2058,7 +2056,7 @@ create_mergejoin_path(PlannerInfo *root,
 								  joinrel,
 								  outer_path,
 								  inner_path,
-								  sjinfo,
+								  extra->sjinfo,
 								  required_outer,
 								  &restrict_clauses);
 	pathnode->jpath.path.parallel_aware = false;
@@ -2071,12 +2069,14 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->jpath.outerjoinpath = outer_path;
 	pathnode->jpath.innerjoinpath = inner_path;
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
+	pathnode->jpath.inner_unique = extra->inner_unique;
+	pathnode->jpath.outer_unique = extra->outer_unique;
 	pathnode->path_mergeclauses = mergeclauses;
 	pathnode->outersortkeys = outersortkeys;
 	pathnode->innersortkeys = innersortkeys;
 	/* pathnode->materialize_inner will be set by final_cost_mergejoin */
 
-	final_cost_mergejoin(root, pathnode, workspace, sjinfo);
+	final_cost_mergejoin(root, pathnode, workspace, extra->sjinfo);
 
 	return pathnode;
 }
@@ -2088,11 +2088,9 @@ create_mergejoin_path(PlannerInfo *root,
  * 'joinrel' is the join relation
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_hashjoin
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'extra' contains various information about the join
  * 'outer_path' is the cheapest outer path
  * 'inner_path' is the cheapest inner path
- * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
  * 'required_outer' is the set of required outer rels
  * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
  *		(this should be a subset of the restrict_clauses list)
@@ -2102,15 +2100,14 @@ create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
-					 SpecialJoinInfo *sjinfo,
-					 SemiAntiJoinFactors *semifactors,
+					 JoinPathExtraData *extra,
 					 Path *outer_path,
 					 Path *inner_path,
-					 List *restrict_clauses,
 					 Relids required_outer,
 					 List *hashclauses)
 {
 	HashPath   *pathnode = makeNode(HashPath);
+	List *restrict_clauses = extra->restrictlist;
 
 	pathnode->jpath.path.pathtype = T_HashJoin;
 	pathnode->jpath.path.parent = joinrel;
@@ -2120,7 +2117,7 @@ create_hashjoin_path(PlannerInfo *root,
 								  joinrel,
 								  outer_path,
 								  inner_path,
-								  sjinfo,
+								  extra->sjinfo,
 								  required_outer,
 								  &restrict_clauses);
 	pathnode->jpath.path.parallel_aware = false;
@@ -2145,10 +2142,12 @@ create_hashjoin_path(PlannerInfo *root,
 	pathnode->jpath.outerjoinpath = outer_path;
 	pathnode->jpath.innerjoinpath = inner_path;
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
+	pathnode->jpath.inner_unique = extra->inner_unique;
+	pathnode->jpath.outer_unique = extra->outer_unique;
 	pathnode->path_hashclauses = hashclauses;
 	/* final_cost_hashjoin will fill in pathnode->num_batches */
 
-	final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
+	final_cost_hashjoin(root, pathnode, workspace, extra->sjinfo, &extra->semifactors);
 
 	return pathnode;
 }
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index adc1db9..d1bda16 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -81,6 +81,22 @@ setup_simple_rel_arrays(PlannerInfo *root)
 }
 
 /*
+ * setup_unique_join_caches
+ *	  Prepare the arrays we use for caching which joins are proved to be
+ *	  unique and non-unique.
+ */
+void
+setup_unique_join_caches(PlannerInfo *root)
+{
+	int			size = list_length(root->parse->rtable) + 1;
+
+	/* initialize the unique relation cache to all NULLs */
+	root->unique_rels = (List **) palloc0(size * sizeof(List *));
+
+	root->non_unique_rels = (List **) palloc0(size * sizeof(List *));
+}
+
+/*
  * build_simple_rel
  *	  Construct a new RelOptInfo for a base relation or 'other' relation.
  */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 1da1e1f..8f46e72 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1680,6 +1680,17 @@ typedef struct JoinState
 	PlanState	ps;
 	JoinType	jointype;
 	List	   *joinqual;		/* JOIN quals (in addition to ps.qual) */
+	bool		match_first_inner_tuple_only;	/* True if we can safely move
+												 * to the next outer tuple
+												 * after matching first inner
+												 * tuple
+												 */
+	bool		match_first_outer_tuple_only;	/* True if we can safely move
+												 * to the next inner tuple
+												 * after matching first outer
+												 * tuple
+												 */
+
 } JoinState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f72f7a8..ce2285e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -618,6 +618,8 @@ typedef struct Join
 	Plan		plan;
 	JoinType	jointype;
 	List	   *joinqual;		/* JOIN quals (in addition to plan.qual) */
+	bool		inner_unique;
+	bool		outer_unique;
 } Join;
 
 /* ----------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 643be54..845af01 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -222,6 +222,19 @@ typedef struct PlannerInfo
 	List	  **join_rel_level; /* lists of join-relation RelOptInfos */
 	int			join_cur_level; /* index of list being extended */
 
+	/*
+	 * During the join search we attempt to determine which joins can be
+	 * proven to be unique on their inner or outer sides based on the join
+	 * condition. This is a rather expensive test to perform, as it requires
+	 * checking each relation's unique indexes to see if the relation can, at
+	 * most, return one tuple for each opposing tuple in the join. We use this
+	 * cache during the join search to record lists of the sets of relations
+	 * which both prove, and disprove the uniqueness properties for the relid
+	 * indexed by these arrays.
+	 */
+	List	  **unique_rels;	/* cache for proven unique rels */
+	List	  **non_unique_rels;	/* cache for proven non-unique rels */
+
 	List	   *init_plans;		/* init SubPlans for query */
 
 	List	   *cte_plan_ids;	/* per-CTE-item list of subplan IDs */
@@ -1217,6 +1230,11 @@ typedef struct JoinPath
 
 	List	   *joinrestrictinfo;		/* RestrictInfos to apply to join */
 
+	bool		inner_unique;	/* inner side of join matches no more than one
+								 * outer side tuple */
+	bool		outer_unique;	/* outer side of join matches no more than one
+								 * inner side tuple */
+
 	/*
 	 * See the notes for RelOptInfo and ParamPathInfo to understand why
 	 * joinrestrictinfo is needed in JoinPath, and can't be merged into the
@@ -1810,6 +1828,8 @@ typedef struct SpecialJoinInfo
 	JoinType	jointype;		/* always INNER, LEFT, FULL, SEMI, or ANTI */
 	bool		lhs_strict;		/* joinclause is strict for some LHS rel */
 	bool		delay_upper_joins;		/* can't commute with upper RHS */
+	bool		inner_unique;	/* inner side of join matches no more than one
+								 * outer side tuple */
 	/* Remaining fields are set only for JOIN_SEMI jointype: */
 	bool		semi_can_btree; /* true if semi_operators are all btree */
 	bool		semi_can_hash;	/* true if semi_operators are all hash */
@@ -2042,6 +2062,10 @@ typedef struct SemiAntiJoinFactors
  * sjinfo is extra info about special joins for selectivity estimation
  * semifactors is as shown above (only valid for SEMI or ANTI joins)
  * param_source_rels are OK targets for parameterization of result paths
+ * inner_unique marks if the inner side of join matches no more than one outer
+ * side tuple
+ * outer_unique marks if the outer side of join matches no more than one inner
+ * side tuple
  */
 typedef struct JoinPathExtraData
 {
@@ -2050,6 +2074,8 @@ typedef struct JoinPathExtraData
 	SpecialJoinInfo *sjinfo;
 	SemiAntiJoinFactors semifactors;
 	Relids		param_source_rels;
+	bool		inner_unique;
+	bool		outer_unique;
 } JoinPathExtraData;
 
 /*
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 7b41317..5ce410c 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -102,11 +102,9 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
-					 SpecialJoinInfo *sjinfo,
-					 SemiAntiJoinFactors *semifactors,
+					 JoinPathExtraData *extra,
 					 Path *outer_path,
 					 Path *inner_path,
-					 List *restrict_clauses,
 					 List *pathkeys,
 					 Relids required_outer);
 
@@ -114,10 +112,9 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
 					  JoinType jointype,
 					  JoinCostWorkspace *workspace,
-					  SpecialJoinInfo *sjinfo,
+					  JoinPathExtraData *extra,
 					  Path *outer_path,
 					  Path *inner_path,
-					  List *restrict_clauses,
 					  List *pathkeys,
 					  Relids required_outer,
 					  List *mergeclauses,
@@ -128,11 +125,9 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
-					 SpecialJoinInfo *sjinfo,
-					 SemiAntiJoinFactors *semifactors,
+					  JoinPathExtraData *extra,
 					 Path *outer_path,
 					 Path *inner_path,
-					 List *restrict_clauses,
 					 Relids required_outer,
 					 List *hashclauses);
 
@@ -238,6 +233,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path,
  * prototypes for relnode.c
  */
 extern void setup_simple_rel_arrays(PlannerInfo *root);
+extern void setup_unique_join_caches(PlannerInfo *root);
 extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid,
 				 RelOptKind reloptkind);
 extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 94ef84b..128753a 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -102,10 +102,15 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
 /*
  * prototypes for plan/analyzejoins.c
  */
+extern void mark_unique_joins(PlannerInfo *root);
 extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
-
+extern bool innerrel_is_unique(PlannerInfo *root, RelOptInfo *outerrel,
+				   RelOptInfo *innerrel,
+				   JoinType jointype,
+				   SpecialJoinInfo *sjinfo,
+				   List *restrictlist);
 /*
  * prototypes for plan/setrefs.c
  */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0ff8062..7604d20 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -381,6 +381,8 @@ order by 1, 2;
    Sort Key: s1.s1, s2.s2
    ->  Nested Loop
          Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+         Inner Unique: No
+         Outer Unique: No
          ->  Function Scan on pg_catalog.generate_series s1
                Output: s1.s1
                Function Call: generate_series(1, 3)
@@ -390,7 +392,7 @@ order by 1, 2;
                ->  Function Scan on pg_catalog.generate_series s2
                      Output: s2.s2
                      Function Call: generate_series(1, 3)
-(14 rows)
+(16 rows)
 
 select s1, s2, sm
 from generate_series(1, 3) s1,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d9bbae0..233c796 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3362,6 +3362,8 @@ using (join_key);
 --------------------------------------------------------------------------
  Nested Loop Left Join
    Output: "*VALUES*".column1, i1.f1, (666)
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: ("*VALUES*".column1 = i1.f1)
    ->  Values Scan on "*VALUES*"
          Output: "*VALUES*".column1
@@ -3369,12 +3371,14 @@ using (join_key);
          Output: i1.f1, (666)
          ->  Nested Loop Left Join
                Output: i1.f1, 666
+               Inner Unique: No
+               Outer Unique: No
                ->  Seq Scan on public.int4_tbl i1
                      Output: i1.f1
                ->  Index Only Scan using tenk1_unique2 on public.tenk1 i2
                      Output: i2.unique2
                      Index Cond: (i2.unique2 = i1.f1)
-(14 rows)
+(18 rows)
 
 select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from
   (values (0),(1)) foo1(join_key)
@@ -3412,9 +3416,13 @@ select t1.* from
 ----------------------------------------------------------------------
  Hash Left Join
    Output: t1.f1
+   Inner Unique: No
+   Outer Unique: No
    Hash Cond: (i8.q2 = i4.f1)
    ->  Nested Loop Left Join
          Output: t1.f1, i8.q2
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (t1.f1 = '***'::text)
          ->  Seq Scan on public.text_tbl t1
                Output: t1.f1
@@ -3422,9 +3430,13 @@ select t1.* from
                Output: i8.q2
                ->  Hash Right Join
                      Output: i8.q2
+                     Inner Unique: No
+                     Outer Unique: No
                      Hash Cond: ((NULL::integer) = i8b1.q2)
                      ->  Hash Left Join
                            Output: i8.q2, (NULL::integer)
+                           Inner Unique: No
+                           Outer Unique: No
                            Hash Cond: (i8.q1 = i8b2.q1)
                            ->  Seq Scan on public.int8_tbl i8
                                  Output: i8.q1, i8.q2
@@ -3440,7 +3452,7 @@ select t1.* from
          Output: i4.f1
          ->  Seq Scan on public.int4_tbl i4
                Output: i4.f1
-(30 rows)
+(38 rows)
 
 select t1.* from
   text_tbl t1
@@ -3473,9 +3485,13 @@ select t1.* from
 ----------------------------------------------------------------------------
  Hash Left Join
    Output: t1.f1
+   Inner Unique: No
+   Outer Unique: No
    Hash Cond: (i8.q2 = i4.f1)
    ->  Nested Loop Left Join
          Output: t1.f1, i8.q2
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (t1.f1 = '***'::text)
          ->  Seq Scan on public.text_tbl t1
                Output: t1.f1
@@ -3483,12 +3499,18 @@ select t1.* from
                Output: i8.q2
                ->  Hash Right Join
                      Output: i8.q2
+                     Inner Unique: No
+                     Outer Unique: No
                      Hash Cond: ((NULL::integer) = i8b1.q2)
                      ->  Hash Right Join
                            Output: i8.q2, (NULL::integer)
+                           Inner Unique: No
+                           Outer Unique: No
                            Hash Cond: (i8b2.q1 = i8.q1)
                            ->  Nested Loop
                                  Output: i8b2.q1, NULL::integer
+                                 Inner Unique: No
+                                 Outer Unique: No
                                  ->  Seq Scan on public.int8_tbl i8b2
                                        Output: i8b2.q1, i8b2.q2
                                  ->  Materialize
@@ -3505,7 +3527,7 @@ select t1.* from
          Output: i4.f1
          ->  Seq Scan on public.int4_tbl i4
                Output: i4.f1
-(34 rows)
+(44 rows)
 
 select t1.* from
   text_tbl t1
@@ -3539,9 +3561,13 @@ select t1.* from
 ----------------------------------------------------------------------------
  Hash Left Join
    Output: t1.f1
+   Inner Unique: No
+   Outer Unique: No
    Hash Cond: (i8.q2 = i4.f1)
    ->  Nested Loop Left Join
          Output: t1.f1, i8.q2
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (t1.f1 = '***'::text)
          ->  Seq Scan on public.text_tbl t1
                Output: t1.f1
@@ -3549,12 +3575,18 @@ select t1.* from
                Output: i8.q2
                ->  Hash Right Join
                      Output: i8.q2
+                     Inner Unique: No
+                     Outer Unique: No
                      Hash Cond: ((NULL::integer) = i8b1.q2)
                      ->  Hash Right Join
                            Output: i8.q2, (NULL::integer)
+                           Inner Unique: No
+                           Outer Unique: No
                            Hash Cond: (i8b2.q1 = i8.q1)
                            ->  Hash Join
                                  Output: i8b2.q1, NULL::integer
+                                 Inner Unique: No
+                                 Outer Unique: No
                                  Hash Cond: (i8b2.q1 = i4b2.f1)
                                  ->  Seq Scan on public.int8_tbl i8b2
                                        Output: i8b2.q1, i8b2.q2
@@ -3574,7 +3606,7 @@ select t1.* from
          Output: i4.f1
          ->  Seq Scan on public.int4_tbl i4
                Output: i4.f1
-(37 rows)
+(47 rows)
 
 select t1.* from
   text_tbl t1
@@ -3606,14 +3638,20 @@ select * from
 --------------------------------------------------------
  Nested Loop Left Join
    Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.text_tbl t2
          Output: t2.f1
    ->  Materialize
          Output: i8.q1, i8.q2, i4.f1, t1.f1
          ->  Nested Loop
                Output: i8.q1, i8.q2, i4.f1, t1.f1
+               Inner Unique: No
+               Outer Unique: No
                ->  Nested Loop Left Join
                      Output: i8.q1, i8.q2, i4.f1
+                     Inner Unique: No
+                     Outer Unique: No
                      Join Filter: (i8.q1 = i4.f1)
                      ->  Seq Scan on public.int8_tbl i8
                            Output: i8.q1, i8.q2
@@ -3623,7 +3661,7 @@ select * from
                ->  Seq Scan on public.text_tbl t1
                      Output: t1.f1
                      Filter: (t1.f1 = 'doh!'::text)
-(19 rows)
+(25 rows)
 
 select * from
   text_tbl t1
@@ -3653,9 +3691,13 @@ where t1.f1 = ss.f1;
 --------------------------------------------------
  Nested Loop
    Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: (t1.f1 = t2.f1)
    ->  Nested Loop Left Join
          Output: t1.f1, i8.q1, i8.q2
+         Inner Unique: No
+         Outer Unique: No
          ->  Seq Scan on public.text_tbl t1
                Output: t1.f1
          ->  Materialize
@@ -3667,7 +3709,7 @@ where t1.f1 = ss.f1;
          Output: (i8.q1), t2.f1
          ->  Seq Scan on public.text_tbl t2
                Output: i8.q1, t2.f1
-(16 rows)
+(20 rows)
 
 select * from
   text_tbl t1
@@ -3692,11 +3734,17 @@ where t1.f1 = ss2.f1;
 -------------------------------------------------------------------
  Nested Loop
    Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: (t1.f1 = (t2.f1))
    ->  Nested Loop
          Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+         Inner Unique: No
+         Outer Unique: No
          ->  Nested Loop Left Join
                Output: t1.f1, i8.q1, i8.q2
+               Inner Unique: No
+               Outer Unique: No
                ->  Seq Scan on public.text_tbl t1
                      Output: t1.f1
                ->  Materialize
@@ -3712,7 +3760,7 @@ where t1.f1 = ss2.f1;
          Output: ((i8.q1)), (t2.f1)
          ->  Seq Scan on public.text_tbl t3
                Output: (i8.q1), t2.f1
-(22 rows)
+(28 rows)
 
 select * from
   text_tbl t1
@@ -3738,10 +3786,16 @@ where tt1.f1 = ss1.c0;
 ----------------------------------------------------------
  Nested Loop
    Output: 1
+   Inner Unique: No
+   Outer Unique: No
    ->  Nested Loop Left Join
          Output: tt1.f1, tt4.f1
+         Inner Unique: No
+         Outer Unique: No
          ->  Nested Loop
                Output: tt1.f1
+               Inner Unique: No
+               Outer Unique: No
                ->  Seq Scan on public.text_tbl tt1
                      Output: tt1.f1
                      Filter: (tt1.f1 = 'foo'::text)
@@ -3751,6 +3805,8 @@ where tt1.f1 = ss1.c0;
                Output: tt4.f1
                ->  Nested Loop Left Join
                      Output: tt4.f1
+                     Inner Unique: No
+                     Outer Unique: No
                      Join Filter: (tt3.f1 = tt4.f1)
                      ->  Seq Scan on public.text_tbl tt3
                            Output: tt3.f1
@@ -3765,7 +3821,7 @@ where tt1.f1 = ss1.c0;
                Output: (tt4.f1)
                ->  Seq Scan on public.text_tbl tt5
                      Output: tt4.f1
-(29 rows)
+(37 rows)
 
 select 1 from
   text_tbl as tt1
@@ -3795,13 +3851,21 @@ where ss1.c2 = 0;
 ------------------------------------------------------------------------
  Nested Loop
    Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+   Inner Unique: No
+   Outer Unique: No
    ->  Hash Join
          Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42
+         Inner Unique: No
+         Outer Unique: No
          Hash Cond: (i41.f1 = i42.f1)
          ->  Nested Loop
                Output: i8.q1, i8.q2, i43.f1, i41.f1
+               Inner Unique: No
+               Outer Unique: No
                ->  Nested Loop
                      Output: i8.q1, i8.q2, i43.f1
+                     Inner Unique: No
+                     Outer Unique: No
                      ->  Seq Scan on public.int8_tbl i8
                            Output: i8.q1, i8.q2
                            Filter: (i8.q1 = 0)
@@ -3818,7 +3882,7 @@ where ss1.c2 = 0;
          Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
          ->  Seq Scan on public.text_tbl
                Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42)
-(25 rows)
+(33 rows)
 
 select ss2.* from
   int4_tbl i41
@@ -3906,6 +3970,8 @@ explain (verbose, costs off)
 ---------------------------------------------------------
  Merge Left Join
    Output: a.q2, b.q1
+   Inner Unique: No
+   Outer Unique: No
    Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint)))
    Filter: (COALESCE(b.q1, '1'::bigint) > 0)
    ->  Sort
@@ -3918,7 +3984,7 @@ explain (verbose, costs off)
          Sort Key: (COALESCE(b.q1, '1'::bigint))
          ->  Seq Scan on public.int8_tbl b
                Output: b.q1, COALESCE(b.q1, '1'::bigint)
-(14 rows)
+(16 rows)
 
 select a.q2, b.q1
   from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
@@ -4801,12 +4867,14 @@ select * from
 ------------------------------------------
  Nested Loop Left Join
    Output: a.q1, a.q2, b.q1, b.q2, (a.q2)
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl a
          Output: a.q1, a.q2
    ->  Seq Scan on public.int8_tbl b
          Output: b.q1, b.q2, a.q2
          Filter: (a.q2 = b.q1)
-(7 rows)
+(9 rows)
 
 select * from
   int8_tbl a left join
@@ -4833,12 +4901,14 @@ select * from
 ------------------------------------------------------------------
  Nested Loop Left Join
    Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, '42'::bigint))
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl a
          Output: a.q1, a.q2
    ->  Seq Scan on public.int8_tbl b
          Output: b.q1, b.q2, COALESCE(a.q2, '42'::bigint)
          Filter: (a.q2 = b.q1)
-(7 rows)
+(9 rows)
 
 select * from
   int8_tbl a left join
@@ -4866,6 +4936,8 @@ select * from int4_tbl i left join
 -------------------------------------------
  Hash Left Join
    Output: i.f1, j.f1
+   Inner Unique: No
+   Outer Unique: No
    Hash Cond: (i.f1 = j.f1)
    ->  Seq Scan on public.int4_tbl i
          Output: i.f1
@@ -4873,7 +4945,7 @@ select * from int4_tbl i left join
          Output: j.f1
          ->  Seq Scan on public.int2_tbl j
                Output: j.f1
-(9 rows)
+(11 rows)
 
 select * from int4_tbl i left join
   lateral (select * from int2_tbl j where i.f1 = j.f1) k on true;
@@ -4893,12 +4965,14 @@ select * from int4_tbl i left join
 -------------------------------------
  Nested Loop Left Join
    Output: i.f1, (COALESCE(i.*))
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int4_tbl i
          Output: i.f1, i.*
    ->  Seq Scan on public.int2_tbl j
          Output: j.f1, COALESCE(i.*)
          Filter: (i.f1 = j.f1)
-(7 rows)
+(9 rows)
 
 select * from int4_tbl i left join
   lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true;
@@ -4920,10 +4994,14 @@ select * from int4_tbl a,
 -------------------------------------------------
  Nested Loop
    Output: a.f1, b.f1, c.q1, c.q2
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int4_tbl a
          Output: a.f1
    ->  Hash Left Join
          Output: b.f1, c.q1, c.q2
+         Inner Unique: No
+         Outer Unique: No
          Hash Cond: (b.f1 = c.q1)
          ->  Seq Scan on public.int4_tbl b
                Output: b.f1
@@ -4932,7 +5010,7 @@ select * from int4_tbl a,
                ->  Seq Scan on public.int8_tbl c
                      Output: c.q1, c.q2
                      Filter: (a.f1 = c.q2)
-(14 rows)
+(18 rows)
 
 select * from int4_tbl a,
   lateral (
@@ -4978,16 +5056,20 @@ select * from
 -------------------------------------------------------------
  Nested Loop Left Join
    Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1))
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl a
          Output: a.q1, a.q2
    ->  Nested Loop
          Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
+         Inner Unique: No
+         Outer Unique: No
          ->  Seq Scan on public.int8_tbl b
                Output: b.q1, b.q2
                Filter: (a.q2 = b.q1)
          ->  Seq Scan on public.int8_tbl c
                Output: c.q1, c.q2
-(11 rows)
+(15 rows)
 
 select * from
   int8_tbl a left join lateral
@@ -5054,13 +5136,21 @@ select * from
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop
    Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)))
+   Inner Unique: No
+   Outer Unique: No
    ->  Hash Right Join
          Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+         Inner Unique: No
+         Outer Unique: No
          Hash Cond: (d.q1 = c.q2)
          ->  Nested Loop
                Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+               Inner Unique: No
+               Outer Unique: No
                ->  Hash Left Join
                      Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
+                     Inner Unique: No
+                     Outer Unique: No
                      Hash Cond: (a.q2 = b.q1)
                      ->  Seq Scan on public.int8_tbl a
                            Output: a.q1, a.q2
@@ -5076,7 +5166,7 @@ select * from
                      Output: c.q1, c.q2
    ->  Result
          Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
-(24 rows)
+(32 rows)
 
 -- case that breaks the old ph_may_need optimization
 explain (verbose, costs off)
@@ -5094,17 +5184,27 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from
 ---------------------------------------------------------------------------------------------------------
  Nested Loop
    Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1)
    ->  Hash Right Join
          Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
+         Inner Unique: No
+         Outer Unique: No
          Hash Cond: (d.q1 = c.q2)
          ->  Nested Loop
                Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2))
+               Inner Unique: No
+               Outer Unique: No
                ->  Hash Right Join
                      Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint))
+                     Inner Unique: No
+                     Outer Unique: No
                      Hash Cond: (b.q1 = a.q2)
                      ->  Nested Loop
                            Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint)
+                           Inner Unique: No
+                           Outer Unique: No
                            Join Filter: (b.q1 < b2.f1)
                            ->  Seq Scan on public.int8_tbl b
                                  Output: b.q1, b.q2
@@ -5126,7 +5226,7 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from
          Output: i.f1
          ->  Seq Scan on public.int4_tbl i
                Output: i.f1
-(34 rows)
+(44 rows)
 
 -- check processing of postponed quals (bug #9041)
 explain (verbose, costs off)
@@ -5139,16 +5239,20 @@ select * from
 ----------------------------------------------
  Nested Loop Left Join
    Output: (1), (2), (3)
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: (((3) = (1)) AND ((3) = (2)))
    ->  Nested Loop
          Output: (1), (2)
+         Inner Unique: No
+         Outer Unique: No
          ->  Result
                Output: 1
          ->  Result
                Output: 2
    ->  Result
          Output: 3
-(11 rows)
+(15 rows)
 
 -- check we don't try to do a unique-ified semijoin with LATERAL
 explain (verbose, costs off)
@@ -5161,10 +5265,14 @@ select * from
 ----------------------------------------------------------------------
  Nested Loop
    Output: "*VALUES*".column1, "*VALUES*".column2, int4_tbl.f1
+   Inner Unique: No
+   Outer Unique: No
    ->  Values Scan on "*VALUES*"
          Output: "*VALUES*".column1, "*VALUES*".column2
    ->  Nested Loop Semi Join
          Output: int4_tbl.f1
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (int4_tbl.f1 = tenk1.unique1)
          ->  Seq Scan on public.int4_tbl
                Output: int4_tbl.f1
@@ -5173,7 +5281,7 @@ select * from
                ->  Index Scan using tenk1_unique2 on public.tenk1
                      Output: tenk1.unique1
                      Index Cond: (tenk1.unique2 = "*VALUES*".column2)
-(14 rows)
+(18 rows)
 
 select * from
   (values (0,9998), (1,1000)) v(id,x),
@@ -5200,10 +5308,14 @@ lateral (select * from int8_tbl t1,
 -----------------------------------------------------------------
  Nested Loop
    Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl t1
          Output: t1.q1, t1.q2
    ->  Nested Loop
          Output: "*VALUES*".column1, ss2.q1, ss2.q2
+         Inner Unique: No
+         Outer Unique: No
          ->  Values Scan on "*VALUES*"
                Output: "*VALUES*".column1
          ->  Subquery Scan on ss2
@@ -5225,7 +5337,7 @@ lateral (select * from int8_tbl t1,
                              ->  Seq Scan on public.int8_tbl t3
                                    Output: t3.q1, t3.q2
                                    Filter: (t3.q2 = $2)
-(27 rows)
+(31 rows)
 
 select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
@@ -5323,3 +5435,255 @@ ERROR:  invalid reference to FROM-clause entry for table "xx1"
 LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
                                                                 ^
 HINT:  There is an entry for table "xx1", but it cannot be referenced from this part of the query.
+--
+-- test planner's ability to mark joins as unique
+--
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Join
+   Output: j1.id, j2.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(11 rows)
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Output: j1.id, j2.id
+   Inner Unique: No
+   Outer Unique: No
+   Join Filter: (j1.id > j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(11 rows)
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Join
+   Output: j1.id, j3.id
+   Inner Unique: No
+   Outer Unique: Yes
+   Hash Cond: (j1.id = j3.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j3.id
+         ->  Seq Scan on public.j3
+               Output: j3.id
+(11 rows)
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Left Join
+   Output: j1.id, j2.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(11 rows)
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Left Join
+   Output: j1.id, j2.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Hash Cond: (j2.id = j1.id)
+   ->  Seq Scan on public.j2
+         Output: j2.id
+   ->  Hash
+         Output: j1.id
+         ->  Seq Scan on public.j1
+               Output: j1.id
+(11 rows)
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Output: j1.id, j2.id
+   Inner Unique: No
+   Outer Unique: No
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(10 rows)
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+            QUERY PLAN             
+-----------------------------------
+ Hash Join
+   Output: j1.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(11 rows)
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+               QUERY PLAN                
+-----------------------------------------
+ Nested Loop
+   Output: j1.id, j3.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Join Filter: (j1.id = j3.id)
+   ->  Unique
+         Output: j3.id
+         ->  Sort
+               Output: j3.id
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
+                     Output: j3.id
+   ->  Seq Scan on public.j1
+         Output: j1.id
+(14 rows)
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+               QUERY PLAN                
+-----------------------------------------
+ Nested Loop
+   Output: j1.id, j3.id
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Join Filter: (j1.id = j3.id)
+   ->  Group
+         Output: j3.id
+         Group Key: j3.id
+         ->  Sort
+               Output: j3.id
+               Sort Key: j3.id
+               ->  Seq Scan on public.j3
+                     Output: j3.id
+   ->  Seq Scan on public.j1
+         Output: j1.id
+(15 rows)
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Full Join
+   Output: j1.id, j2.id
+   Inner Unique: No
+   Outer Unique: No
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(11 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
+-- test a more complex permutations of join conversions
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop
+   Output: j1.id1, j1.id2, j2.id1, j2.id2
+   Inner Unique: No
+   Outer Unique: No
+   Join Filter: (j1.id1 = j2.id1)
+   ->  Seq Scan on public.j2
+         Output: j2.id1, j2.id2
+   ->  Seq Scan on public.j1
+         Output: j1.id1, j1.id2
+(9 rows)
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   Output: j1.id1, j1.id2, j2.id1, j2.id2
+   Inner Unique: Yes
+   Outer Unique: Yes
+   Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
+   ->  Seq Scan on public.j2
+         Output: j2.id1, j2.id2
+   ->  Seq Scan on public.j1
+         Output: j1.id1, j1.id2
+(9 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index 79513e4..7e948ed 100644
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -5391,12 +5391,14 @@ select i, a from
 -----------------------------------------------------------------
  Nested Loop
    Output: i.i, (returns_rw_array(1))
+   Inner Unique: No
+   Outer Unique: No
    ->  Result
          Output: returns_rw_array(1)
    ->  Function Scan on public.consumes_rw_array i
          Output: i.i
          Function Call: consumes_rw_array((returns_rw_array(1)))
-(7 rows)
+(9 rows)
 
 select i, a from
   (select returns_rw_array(1) as a offset 0) ss,
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index 56481de..434de62 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -2010,12 +2010,14 @@ select x from int8_tbl, extractq2(int8_tbl) f(x);
 ------------------------------------------
  Nested Loop
    Output: f.x
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl
          Output: int8_tbl.q1, int8_tbl.q2
    ->  Function Scan on f
          Output: f.x
          Function Call: int8_tbl.q2
-(7 rows)
+(9 rows)
 
 select x from int8_tbl, extractq2(int8_tbl) f(x);
          x         
@@ -2036,11 +2038,13 @@ select x from int8_tbl, extractq2_2(int8_tbl) f(x);
 -----------------------------------
  Nested Loop
    Output: ((int8_tbl.*).q2)
+   Inner Unique: No
+   Outer Unique: No
    ->  Seq Scan on public.int8_tbl
          Output: int8_tbl.*
    ->  Result
          Output: (int8_tbl.*).q2
-(6 rows)
+(8 rows)
 
 select x from int8_tbl, extractq2_2(int8_tbl) f(x);
          x         
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index abd3217..e95c42b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -783,6 +783,8 @@ select * from int4_tbl where
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Nested Loop Semi Join
    Output: int4_tbl.f1
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten)
    ->  Seq Scan on public.int4_tbl
          Output: int4_tbl.f1
@@ -791,7 +793,7 @@ select * from int4_tbl where
    SubPlan 1
      ->  Index Only Scan using tenk1_unique1 on public.tenk1 a
            Output: a.unique1
-(10 rows)
+(12 rows)
 
 select * from int4_tbl where
   (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
@@ -811,6 +813,8 @@ select * from int4_tbl o where (f1, f1) in
 -------------------------------------------------------------------
  Nested Loop Semi Join
    Output: o.f1
+   Inner Unique: No
+   Outer Unique: No
    Join Filter: (o.f1 = "ANY_subquery".f1)
    ->  Seq Scan on public.int4_tbl o
          Output: o.f1
@@ -828,7 +832,7 @@ select * from int4_tbl o where (f1, f1) in
                                  Group Key: i.f1
                                  ->  Seq Scan on public.int4_tbl i
                                        Output: i.f1
-(19 rows)
+(21 rows)
 
 select * from int4_tbl o where (f1, f1) in
   (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 02fa08e..aa012d4 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -2183,6 +2183,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
                  Output: '42'::bigint, '47'::bigint
    ->  Nested Loop
          Output: a.ctid, wcte.*
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (a.aa = wcte.q2)
          ->  Seq Scan on public.a
                Output: a.ctid, a.aa
@@ -2190,6 +2192,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
                Output: wcte.*, wcte.q2
    ->  Nested Loop
          Output: b.ctid, wcte.*
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (b.aa = wcte.q2)
          ->  Seq Scan on public.b
                Output: b.ctid, b.aa
@@ -2197,6 +2201,8 @@ DELETE FROM a USING wcte WHERE aa = q2;
                Output: wcte.*, wcte.q2
    ->  Nested Loop
          Output: c.ctid, wcte.*
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (c.aa = wcte.q2)
          ->  Seq Scan on public.c
                Output: c.ctid, c.aa
@@ -2204,12 +2210,14 @@ DELETE FROM a USING wcte WHERE aa = q2;
                Output: wcte.*, wcte.q2
    ->  Nested Loop
          Output: d.ctid, wcte.*
+         Inner Unique: No
+         Outer Unique: No
          Join Filter: (d.aa = wcte.q2)
          ->  Seq Scan on public.d
                Output: d.ctid, d.aa
          ->  CTE Scan on wcte
                Output: wcte.*, wcte.q2
-(38 rows)
+(46 rows)
 
 -- error cases
 -- data-modifying WITH tries to use its own output
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 97bccec..69e0109 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1730,3 +1730,95 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1)
 delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
 delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
 delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
+
+--
+-- test planner's ability to mark joins as unique
+--
+
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+
+drop table j1;
+drop table j2;
+drop table j3;
+
+-- test a more complex permutations of join conversions
+
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+
+drop table j1;
+drop table j2;
+drop table j3;

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Performance improvement for joins where outer side is unique

Reply via email to