Hello,

> Hello, The attached is non-search version of unique join.

This is a quite bucket-brigade soye makes many functions got
changes to have new parameter only to convey the new bool
information. I know it's very bad.

After some consideration, I removed almost all of such parameters
from path creation function and confine the main work within
add_paths_to_joinrel. After all the patch shrinked and looks
better. Addition to that, somehow, some additional regtests found
to be inner-unique.

I think this satisfies your wish and implemented in non
exhaustive-seearch-in-jointree manner. It still don't have
regressions for itself but I don't see siginificance in adding
it so much...

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
>From e1d593a6dd3154d7299b4995c4affa50476df15f Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp>
Date: Tue, 17 Mar 2015 15:37:47 +0900
Subject: [PATCH] Boost inner-unique joins.

Inner-unique joins is accelerated by omitting to fetch the second
inner rows on execution. By this patch allow executor to do so
by detecting inner-unique joins on join path creation.
---
 src/backend/commands/explain.c               |  7 ++++
 src/backend/executor/nodeHashjoin.c          | 12 ++++---
 src/backend/executor/nodeMergejoin.c         | 13 +++++---
 src/backend/executor/nodeNestloop.c          | 12 ++++---
 src/backend/optimizer/path/joinpath.c        | 48 +++++++++++++++++++++++++++-
 src/backend/optimizer/plan/createplan.c      | 28 ++++++++--------
 src/include/nodes/execnodes.h                |  1 +
 src/include/nodes/plannodes.h                |  1 +
 src/include/nodes/relation.h                 |  1 +
 src/test/regress/expected/equivclass.out     |  6 ++--
 src/test/regress/expected/join.out           |  4 +--
 src/test/regress/expected/rowsecurity.out    |  2 +-
 src/test/regress/expected/select_views_1.out |  8 ++---
 13 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index a951c55..b8a68b5 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1151,9 +1151,16 @@ ExplainNode(PlanState *planstate, List *ancestors,
 						appendStringInfo(es->str, " %s Join", jointype);
 					else if (!IsA(plan, NestLoop))
 						appendStringInfoString(es->str, " Join");
+					if (((Join *)plan)->inner_unique)
+						appendStringInfoString(es->str, "(inner unique)");
+
 				}
 				else
+				{
 					ExplainPropertyText("Join Type", jointype, es);
+					ExplainPropertyText("Inner unique", 
+							((Join *)plan)->inner_unique?"true":"false", es);
+				}
 			}
 			break;
 		case T_SetOp:
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 1d78cdf..d3b14e5 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -306,10 +306,11 @@ ExecHashJoin(HashJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * We'll consider returning the first match if the inner
+					 * is unique, but after that we're done with this outer
+					 * tuple.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.inner_unique)
 						node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
 					if (otherqual == NIL ||
@@ -451,6 +452,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	hjstate = makeNode(HashJoinState);
 	hjstate->js.ps.plan = (Plan *) node;
 	hjstate->js.ps.state = estate;
+	hjstate->js.inner_unique = node->join.inner_unique;
 
 	/*
 	 * Miscellaneous initialization
@@ -498,8 +500,10 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	/* set up null tuples for outer joins, if needed */
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			hjstate->js.inner_unique = true;
+			/* fall through */
+		case JOIN_INNER:
 			break;
 		case JOIN_LEFT:
 		case JOIN_ANTI:
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 15742c5..3c21ffe 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -840,10 +840,11 @@ ExecMergeJoin(MergeJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * We'll consider returning the first match if the inner
+					 * is unique, but after that we're done with this outer
+					 * tuple.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.inner_unique)
 						node->mj_JoinState = EXEC_MJ_NEXTOUTER;
 
 					qualResult = (otherqual == NIL ||
@@ -1486,6 +1487,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 	mergestate->js.ps.plan = (Plan *) node;
 	mergestate->js.ps.state = estate;
 
+	mergestate->js.inner_unique = node->join.inner_unique;
+
 	/*
 	 * Miscellaneous initialization
 	 *
@@ -1553,8 +1556,10 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			mergestate->js.inner_unique = true;
+			/* fall through */
+		case JOIN_INNER:
 			mergestate->mj_FillOuter = false;
 			mergestate->mj_FillInner = false;
 			break;
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index e66bcda..342c448 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -247,10 +247,10 @@ ExecNestLoop(NestLoopState *node)
 			}
 
 			/*
-			 * In a semijoin, we'll consider returning the first match, but
-			 * after that we're done with this outer tuple.
+			 * We'll consider returning the first match if the inner is
+			 * unique, but after that we're done with this outer tuple.
 			 */
-			if (node->js.jointype == JOIN_SEMI)
+			if (node->js.inner_unique)
 				node->nl_NeedNewOuter = true;
 
 			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
@@ -310,6 +310,8 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 	nlstate->js.ps.plan = (Plan *) node;
 	nlstate->js.ps.state = estate;
 
+	nlstate->js.inner_unique = node->join.inner_unique;
+
 	/*
 	 * Miscellaneous initialization
 	 *
@@ -354,8 +356,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 
 	switch (node->join.jointype)
 	{
-		case JOIN_INNER:
 		case JOIN_SEMI:
+			nlstate->js.inner_unique = true;
+			/* fall through */
+		case JOIN_INNER:
 			break;
 		case JOIN_LEFT:
 		case JOIN_ANTI:
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 1da953f..52e8693 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -49,7 +49,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 						 List *restrictlist,
 						 JoinType jointype,
 						 bool *mergejoin_allowed);
-
+static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
+										   RelOptInfo *innerrel);
 
 /*
  * add_paths_to_joinrel
@@ -260,6 +261,51 @@ add_paths_to_joinrel(PlannerInfo *root,
 							 restrictlist, jointype,
 							 sjinfo, &semifactors,
 							 param_source_rels, extra_lateral_rels);
+
+	/*
+	 * We can optimize inner loop execution for joins on which the inner rel
+	 * is unique on the restrictlist.
+	 */
+	if (jointype == JOIN_INNER &&
+ 		innerrel->rtekind == RTE_RELATION &&
+		restrictlist)
+	{
+		/* relation_has_unique_index_for may add some restrictions */
+		int org_len = list_length(restrictlist);
+		ListCell *lc;
+
+		/*
+		 * relation_has_unique_index_for requires restrict infos to have
+		 * correct clause direction info
+		 */
+		foreach (lc, restrictlist)
+		{
+			clause_sides_match_join((RestrictInfo *) lfirst(lc),
+									outerrel, innerrel);
+		}
+		if (relation_has_unique_index_for(root, innerrel, restrictlist,
+										  NIL, NIL))
+		{
+			/*
+			 * OK, this join has the unique inner rel, so mark the paths added
+			 * now that the inner is unique
+			 */
+			foreach (lc, joinrel->pathlist)
+			{
+				JoinPath *jp = (JoinPath *) lfirst(lc);
+
+				/*
+				 * This relies on that add_paths_to_joinrel won't be called
+				 * with same outer/inner rels for dirrerent restrictlist.
+				 */
+				if (jp->outerjoinpath->parent == outerrel &&
+					jp->innerjoinpath->parent == innerrel)
+					jp->inner_unique = true;
+			}
+		}
+		/* Remove restirictions added by the function */
+		list_truncate(restrictlist, org_len);
+	}
 }
 
 /*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cb69c03..ea08695 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -131,13 +131,12 @@ static BitmapAnd *make_bitmap_and(List *bitmapplans);
 static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
 			  List *joinclauses, List *otherclauses, List *nestParams,
-			  Plan *lefttree, Plan *righttree,
-			  JoinType jointype);
+			  Plan *lefttree, Plan *righttree, JoinPath *jpath);
 static HashJoin *make_hashjoin(List *tlist,
 			  List *joinclauses, List *otherclauses,
 			  List *hashclauses,
 			  Plan *lefttree, Plan *righttree,
-			  JoinType jointype);
+			  JoinPath *jpath);
 static Hash *make_hash(Plan *lefttree,
 		  Oid skewTable,
 		  AttrNumber skewColumn,
@@ -152,7 +151,7 @@ static MergeJoin *make_mergejoin(List *tlist,
 			   int *mergestrategies,
 			   bool *mergenullsfirst,
 			   Plan *lefttree, Plan *righttree,
-			   JoinType jointype);
+			   JoinPath *jpath);
 static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
 		  AttrNumber *sortColIdx, Oid *sortOperators,
 		  Oid *collations, bool *nullsFirst,
@@ -2192,7 +2191,7 @@ create_nestloop_plan(PlannerInfo *root,
 							  nestParams,
 							  outer_plan,
 							  inner_plan,
-							  best_path->jointype);
+							  best_path);
 
 	copy_path_costsize(&join_plan->join.plan, &best_path->path);
 
@@ -2486,7 +2485,7 @@ create_mergejoin_plan(PlannerInfo *root,
 							   mergenullsfirst,
 							   outer_plan,
 							   inner_plan,
-							   best_path->jpath.jointype);
+							   &best_path->jpath);
 
 	/* Costs of sort and material steps are included in path cost already */
 	copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path);
@@ -2612,7 +2611,7 @@ create_hashjoin_plan(PlannerInfo *root,
 							  hashclauses,
 							  outer_plan,
 							  (Plan *) hash_plan,
-							  best_path->jpath.jointype);
+							  &best_path->jpath);
 
 	copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path);
 
@@ -3717,7 +3716,7 @@ make_nestloop(List *tlist,
 			  List *nestParams,
 			  Plan *lefttree,
 			  Plan *righttree,
-			  JoinType jointype)
+			  JoinPath *jpath)
 {
 	NestLoop   *node = makeNode(NestLoop);
 	Plan	   *plan = &node->join.plan;
@@ -3727,8 +3726,9 @@ make_nestloop(List *tlist,
 	plan->qual = otherclauses;
 	plan->lefttree = lefttree;
 	plan->righttree = righttree;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
+	node->join.inner_unique = jpath->inner_unique;
 	node->nestParams = nestParams;
 
 	return node;
@@ -3741,7 +3741,7 @@ make_hashjoin(List *tlist,
 			  List *hashclauses,
 			  Plan *lefttree,
 			  Plan *righttree,
-			  JoinType jointype)
+			  JoinPath *jpath)
 {
 	HashJoin   *node = makeNode(HashJoin);
 	Plan	   *plan = &node->join.plan;
@@ -3752,8 +3752,9 @@ make_hashjoin(List *tlist,
 	plan->lefttree = lefttree;
 	plan->righttree = righttree;
 	node->hashclauses = hashclauses;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
+	node->join.inner_unique = jpath->inner_unique;
 
 	return node;
 }
@@ -3801,7 +3802,7 @@ make_mergejoin(List *tlist,
 			   bool *mergenullsfirst,
 			   Plan *lefttree,
 			   Plan *righttree,
-			   JoinType jointype)
+			   JoinPath *jpath)
 {
 	MergeJoin  *node = makeNode(MergeJoin);
 	Plan	   *plan = &node->join.plan;
@@ -3816,8 +3817,9 @@ make_mergejoin(List *tlist,
 	node->mergeCollations = mergecollations;
 	node->mergeStrategies = mergestrategies;
 	node->mergeNullsFirst = mergenullsfirst;
-	node->join.jointype = jointype;
+	node->join.jointype = jpath->jointype;
 	node->join.joinqual = joinclauses;
+	node->join.inner_unique = jpath->inner_unique;
 
 	return node;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 59b17f3..f86f806 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1562,6 +1562,7 @@ typedef struct JoinState
 	PlanState	ps;
 	JoinType	jointype;
 	List	   *joinqual;		/* JOIN quals (in addition to ps.qual) */
+	bool		inner_unique;
 } JoinState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 21cbfa8..122f2f4 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -543,6 +543,7 @@ typedef struct Join
 	Plan		plan;
 	JoinType	jointype;
 	List	   *joinqual;		/* JOIN quals (in addition to plan.qual) */
+	bool		inner_unique;
 } Join;
 
 /* ----------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 334cf51..c1ebfdb 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1030,6 +1030,7 @@ typedef struct JoinPath
 
 	Path	   *outerjoinpath;	/* path for the outer side of the join */
 	Path	   *innerjoinpath;	/* path for the inner side of the join */
+	bool		inner_unique;
 
 	List	   *joinrestrictinfo;		/* RestrictInfos to apply to join */
 
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index dfae84e..ad1d673 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -186,7 +186,7 @@ explain (costs off)
   select * from ec1, ec2 where ff = x1 and x1 = '42'::int8alias2;
                QUERY PLAN                
 -----------------------------------------
- Nested Loop
+ Nested Loop(inner unique)
    ->  Seq Scan on ec2
          Filter: (x1 = '42'::int8alias2)
    ->  Index Scan using ec1_pkey on ec1
@@ -310,7 +310,7 @@ explain (costs off)
          ->  Index Scan using ec1_expr3 on ec1 ec1_5
          ->  Index Scan using ec1_expr4 on ec1 ec1_6
    ->  Materialize
-         ->  Merge Join
+         ->  Merge Join(inner unique)
                Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
                ->  Merge Append
                      Sort Key: (((ec1_1.ff + 2) + 1))
@@ -365,7 +365,7 @@ explain (costs off)
   where ss1.x = ec1.f1 and ec1.ff = 42::int8;
                      QUERY PLAN                      
 -----------------------------------------------------
- Merge Join
+ Merge Join(inner unique)
    Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
    ->  Merge Append
          Sort Key: (((ec1_1.ff + 2) + 1))
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 57fc910..b6e3024 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2614,8 +2614,8 @@ from nt3 as nt3
 where nt3.id = 1 and ss2.b3;
                   QUERY PLAN                   
 -----------------------------------------------
- Nested Loop
-   ->  Nested Loop
+ Nested Loop(inner unique)
+   ->  Nested Loop(inner unique)
          ->  Index Scan using nt3_pkey on nt3
                Index Cond: (id = 1)
          ->  Index Scan using nt2_pkey on nt2
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index f41bef1..aaf585d 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -248,7 +248,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
 EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
                      QUERY PLAN                     
 ----------------------------------------------------
- Nested Loop
+ Nested Loop(inner unique)
    ->  Subquery Scan on document
          Filter: f_leak(document.dtitle)
          ->  Seq Scan on document document_1
diff --git a/src/test/regress/expected/select_views_1.out b/src/test/regress/expected/select_views_1.out
index ce22bfa..a37bde4 100644
--- a/src/test/regress/expected/select_views_1.out
+++ b/src/test/regress/expected/select_views_1.out
@@ -1365,7 +1365,7 @@ NOTICE:  f_leak => 9801-2345-6789-0123
 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum);
                        QUERY PLAN                        
 ---------------------------------------------------------
- Hash Join
+ Hash Join(inner unique)
    Hash Cond: (r.cid = l.cid)
    ->  Seq Scan on credit_card r
          Filter: f_leak(cnum)
@@ -1386,7 +1386,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum);
 ---------------------------------------------------------------
  Subquery Scan on my_credit_card_secure
    Filter: f_leak(my_credit_card_secure.cnum)
-   ->  Hash Join
+   ->  Hash Join(inner unique)
          Hash Cond: (r.cid = l.cid)
          ->  Seq Scan on credit_card r
          ->  Hash
@@ -1420,7 +1420,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal
    ->  Materialize
          ->  Subquery Scan on l
                Filter: f_leak(l.cnum)
-               ->  Hash Join
+               ->  Hash Join(inner unique)
                      Hash Cond: (r_1.cid = l_1.cid)
                      ->  Seq Scan on credit_card r_1
                      ->  Hash
@@ -1451,7 +1451,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure
          ->  Seq Scan on credit_usage r
                Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date))
          ->  Materialize
-               ->  Hash Join
+               ->  Hash Join(inner unique)
                      Hash Cond: (r_1.cid = l.cid)
                      ->  Seq Scan on credit_card r_1
                      ->  Hash
-- 
2.1.0.GIT

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to