Andreas Seltenreich <seltenre...@gmx.de> writes:
> Tom Lane writes:
>> [2. transitive-lateral-fixes-1.patch]

> I was about to write that sqlsmith likes the patch, but after more than
> 10^8 ok queries the attached ones were generated.

Ah-hah --- the new check I added in join_is_legal understood about
chains of LATERAL references, but it forgot that we could also have chains
of outer-join ordering constraints.  When we're looking to see if joining
on the basis of a LATERAL reference would break some later outer join, we
have to look at outer joins to the outer joins' inner relations, too.

Fixed in the attached.  I also stuck all of join_is_legal's
lateral-related checks inside an "if (root->hasLateralRTEs)" block,
which will save some time in typical queries with no LATERAL.  That
makes that section of the patch a bit bigger than before, but it's
mostly just reindentation.

Many thanks for the work you've done with this tool!  Who knows how
long it would've taken us to find these problems otherwise ...

                        regards, tom lane

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 0f04033..53d8fdd 100644
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
*************** allow_star_schema_join(PlannerInfo *root
*** 255,308 ****
  }
  
  /*
-  * There's a pitfall for creating parameterized nestloops: suppose the inner
-  * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
-  * minimum eval_at set includes the outer rel (B) and some third rel (C).
-  * We might think we could create a B/A nestloop join that's parameterized by
-  * C.  But we would end up with a plan in which the PHV's expression has to be
-  * evaluated as a nestloop parameter at the B/A join; and the executor is only
-  * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
-  * and overhead to the executor for such corner cases, it seems better to
-  * forbid the join.  (Note that existence of such a PHV probably means there
-  * is a join order constraint that will cause us to consider joining B and C
-  * directly; so we can still make use of A's parameterized path with B+C.)
-  * So we check whether any PHVs used in the query could pose such a hazard.
-  * We don't have any simple way of checking whether a risky PHV would actually
-  * be used in the inner plan, and the case is so unusual that it doesn't seem
-  * worth working very hard on it.
-  *
-  * This case can occur whether or not the join's remaining parameterization
-  * overlaps param_source_rels, so we have to check for it separately from
-  * allow_star_schema_join, even though it looks much like a star-schema case.
-  */
- static inline bool
- check_hazardous_phv(PlannerInfo *root,
- 					Path *outer_path,
- 					Path *inner_path)
- {
- 	Relids		innerparams = PATH_REQ_OUTER(inner_path);
- 	Relids		outerrelids = outer_path->parent->relids;
- 	ListCell   *lc;
- 
- 	foreach(lc, root->placeholder_list)
- 	{
- 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
- 
- 		if (!bms_is_subset(phinfo->ph_eval_at, innerparams))
- 			continue;			/* ignore, could not be a nestloop param */
- 		if (!bms_overlap(phinfo->ph_eval_at, outerrelids))
- 			continue;			/* ignore, not relevant to this join */
- 		if (bms_is_subset(phinfo->ph_eval_at, outerrelids))
- 			continue;			/* safe, it can be eval'd within outerrel */
- 		/* Otherwise, it's potentially unsafe, so reject the join */
- 		return false;
- 	}
- 
- 	/* OK to perform the join */
- 	return true;
- }
- 
- /*
   * try_nestloop_path
   *	  Consider a nestloop join path; if it appears useful, push it into
   *	  the joinrel's pathlist via add_path().
--- 255,260 ----
*************** try_nestloop_path(PlannerInfo *root,
*** 322,336 ****
  	/*
  	 * Check to see if proposed path is still parameterized, and reject if the
  	 * parameterization wouldn't be sensible --- unless allow_star_schema_join
! 	 * says to allow it anyway.  Also, we must reject if check_hazardous_phv
! 	 * doesn't like the look of it.
  	 */
  	required_outer = calc_nestloop_required_outer(outer_path,
  												  inner_path);
  	if (required_outer &&
  		((!bms_overlap(required_outer, extra->param_source_rels) &&
  		  !allow_star_schema_join(root, outer_path, inner_path)) ||
! 		 !check_hazardous_phv(root, outer_path, inner_path)))
  	{
  		/* Waste no memory when we reject a path here */
  		bms_free(required_outer);
--- 274,291 ----
  	/*
  	 * Check to see if proposed path is still parameterized, and reject if the
  	 * parameterization wouldn't be sensible --- unless allow_star_schema_join
! 	 * says to allow it anyway.  Also, we must reject if have_dangerous_phv
! 	 * doesn't like the look of it, which could only happen if the nestloop is
! 	 * still parameterized.
  	 */
  	required_outer = calc_nestloop_required_outer(outer_path,
  												  inner_path);
  	if (required_outer &&
  		((!bms_overlap(required_outer, extra->param_source_rels) &&
  		  !allow_star_schema_join(root, outer_path, inner_path)) ||
! 		 have_dangerous_phv(root,
! 							outer_path->parent->relids,
! 							PATH_REQ_OUTER(inner_path))))
  	{
  		/* Waste no memory when we reject a path here */
  		bms_free(required_outer);
*************** try_nestloop_path(PlannerInfo *root,
*** 338,353 ****
  	}
  
  	/*
- 	 * Independently of that, add parameterization needed for any
- 	 * PlaceHolderVars that need to be computed at the join.  We can handle
- 	 * that just by adding joinrel->lateral_relids; that might include some
- 	 * rels that are already in required_outer, but no harm done.  (Note that
- 	 * lateral_relids is exactly NULL if empty, so this will not break the
- 	 * property that required_outer is too.)
- 	 */
- 	required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
- 
- 	/*
  	 * Do a precheck to quickly eliminate obviously-inferior paths.  We
  	 * calculate a cheap lower bound on the path's cost and then use
  	 * add_path_precheck() to see if the path is clearly going to be dominated
--- 293,298 ----
*************** try_mergejoin_path(PlannerInfo *root,
*** 419,430 ****
  	}
  
  	/*
- 	 * Independently of that, add parameterization needed for any
- 	 * PlaceHolderVars that need to be computed at the join.
- 	 */
- 	required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
- 
- 	/*
  	 * If the given paths are already well enough ordered, we can skip doing
  	 * an explicit sort.
  	 */
--- 364,369 ----
*************** try_hashjoin_path(PlannerInfo *root,
*** 501,512 ****
  	}
  
  	/*
- 	 * Independently of that, add parameterization needed for any
- 	 * PlaceHolderVars that need to be computed at the join.
- 	 */
- 	required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
- 
- 	/*
  	 * See comments in try_nestloop_path().  Also note that hashjoin paths
  	 * never have any output pathkeys, per comments in create_hashjoin_path.
  	 */
--- 440,445 ----
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 9f0212f..e57b8e2 100644
*** a/src/backend/optimizer/path/joinrels.c
--- b/src/backend/optimizer/path/joinrels.c
*************** join_is_legal(PlannerInfo *root, RelOptI
*** 332,340 ****
  	bool		reversed;
  	bool		unique_ified;
  	bool		must_be_leftjoin;
- 	bool		lateral_fwd;
- 	bool		lateral_rev;
- 	Relids		join_lateral_rels;
  	ListCell   *l;
  
  	/*
--- 332,337 ----
*************** join_is_legal(PlannerInfo *root, RelOptI
*** 527,601 ****
  
  	/*
  	 * We also have to check for constraints imposed by LATERAL references.
- 	 * The proposed rels could each contain lateral references to the other,
- 	 * in which case the join is impossible.  If there are lateral references
- 	 * in just one direction, then the join has to be done with a nestloop
- 	 * with the lateral referencer on the inside.  If the join matches an SJ
- 	 * that cannot be implemented by such a nestloop, the join is impossible.
  	 */
! 	lateral_fwd = lateral_rev = false;
! 	foreach(l, root->lateral_info_list)
  	{
! 		LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
  
! 		if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
! 			bms_overlap(ljinfo->lateral_lhs, rel1->relids))
  		{
  			/* has to be implemented as nestloop with rel1 on left */
- 			if (lateral_rev)
- 				return false;	/* have lateral refs in both directions */
- 			lateral_fwd = true;
- 			if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
- 				return false;	/* rel1 can't compute the required parameter */
  			if (match_sjinfo &&
  				(reversed ||
  				 unique_ified ||
  				 match_sjinfo->jointype == JOIN_FULL))
  				return false;	/* not implementable as nestloop */
  		}
! 		if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
! 			bms_overlap(ljinfo->lateral_lhs, rel2->relids))
  		{
  			/* has to be implemented as nestloop with rel2 on left */
- 			if (lateral_fwd)
- 				return false;	/* have lateral refs in both directions */
- 			lateral_rev = true;
- 			if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
- 				return false;	/* rel2 can't compute the required parameter */
  			if (match_sjinfo &&
  				(!reversed ||
  				 unique_ified ||
  				 match_sjinfo->jointype == JOIN_FULL))
  				return false;	/* not implementable as nestloop */
  		}
- 	}
  
! 	/*
! 	 * LATERAL references could also cause problems later on if we accept this
! 	 * join: if the join's minimum parameterization includes any rels that
! 	 * would have to be on the inside of an outer join with this join rel,
! 	 * then it's never going to be possible to build the complete query using
! 	 * this join.  We should reject this join not only because it'll save
! 	 * work, but because if we don't, the clauseless-join heuristics might
! 	 * think that legality of this join means that some other join rel need
! 	 * not be formed, and that could lead to failure to find any plan at all.
! 	 * It seems best not to merge this check into the main loop above, because
! 	 * it is concerned with SJs that are not otherwise relevant to this join.
! 	 */
! 	join_lateral_rels = min_join_parameterization(root, joinrelids);
! 	if (join_lateral_rels)
! 	{
! 		foreach(l, root->join_info_list)
  		{
! 			SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
  
! 			if (bms_overlap(sjinfo->min_righthand, join_lateral_rels) &&
! 				bms_overlap(sjinfo->min_lefthand, joinrelids))
! 				return false;	/* will not be able to join to min_righthand */
! 			if (sjinfo->jointype == JOIN_FULL &&
! 				bms_overlap(sjinfo->min_lefthand, join_lateral_rels) &&
! 				bms_overlap(sjinfo->min_righthand, joinrelids))
! 				return false;	/* will not be able to join to min_lefthand */
  		}
  	}
  
--- 524,644 ----
  
  	/*
  	 * We also have to check for constraints imposed by LATERAL references.
  	 */
! 	if (root->hasLateralRTEs)
  	{
! 		bool		lateral_fwd;
! 		bool		lateral_rev;
! 		Relids		join_lateral_rels;
  
! 		/*
! 		 * The proposed rels could each contain lateral references to the
! 		 * other, in which case the join is impossible.  If there are lateral
! 		 * references in just one direction, then the join has to be done with
! 		 * a nestloop with the lateral referencer on the inside.  If the join
! 		 * matches an SJ that cannot be implemented by such a nestloop, the
! 		 * join is impossible.  Another case that might keep us from building
! 		 * a valid plan is the implementation restriction described by
! 		 * have_dangerous_phv().
! 		 */
! 		lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
! 		lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
! 		if (lateral_fwd && lateral_rev)
! 			return false;		/* have lateral refs in both directions */
! 		if (lateral_fwd)
  		{
  			/* has to be implemented as nestloop with rel1 on left */
  			if (match_sjinfo &&
  				(reversed ||
  				 unique_ified ||
  				 match_sjinfo->jointype == JOIN_FULL))
  				return false;	/* not implementable as nestloop */
+ 			/* check there is a direct reference from rel2 to rel1 */
+ 			foreach(l, root->lateral_info_list)
+ 			{
+ 				LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+ 
+ 				if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
+ 					bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
+ 					break;
+ 			}
+ 			if (l == NULL)
+ 				return false;	/* only indirect refs, so reject */
+ 			/* check we won't have a dangerous PHV */
+ 			if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
+ 				return false;	/* might be unable to handle required PHV */
  		}
! 		else if (lateral_rev)
  		{
  			/* has to be implemented as nestloop with rel2 on left */
  			if (match_sjinfo &&
  				(!reversed ||
  				 unique_ified ||
  				 match_sjinfo->jointype == JOIN_FULL))
  				return false;	/* not implementable as nestloop */
+ 			/* check there is a direct reference from rel1 to rel2 */
+ 			foreach(l, root->lateral_info_list)
+ 			{
+ 				LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+ 
+ 				if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
+ 					bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
+ 					break;
+ 			}
+ 			if (l == NULL)
+ 				return false;	/* only indirect refs, so reject */
+ 			/* check we won't have a dangerous PHV */
+ 			if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
+ 				return false;	/* might be unable to handle required PHV */
  		}
  
! 		/*
! 		 * LATERAL references could also cause problems later on if we accept
! 		 * this join: if the join's minimum parameterization includes any rels
! 		 * that would have to be on the inside of an outer join with this join
! 		 * rel, then it's never going to be possible to build the complete
! 		 * query using this join.  We should reject this join not only because
! 		 * it'll save work, but because if we don't, the clauseless-join
! 		 * heuristics might think that legality of this join means that some
! 		 * other join rel need not be formed, and that could lead to failure
! 		 * to find any plan at all.  We have to consider not only rels that
! 		 * are directly on the inner side of an OJ with the joinrel, but also
! 		 * ones that are indirectly so, so search to find all such rels.
! 		 */
! 		join_lateral_rels = min_join_parameterization(root, joinrelids,
! 													  rel1, rel2);
! 		if (join_lateral_rels)
  		{
! 			Relids		join_plus_rhs = bms_copy(joinrelids);
! 			bool		more;
  
! 			do
! 			{
! 				more = false;
! 				foreach(l, root->join_info_list)
! 				{
! 					SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
! 
! 					if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
! 						!bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
! 					{
! 						join_plus_rhs = bms_add_members(join_plus_rhs,
! 													  sjinfo->min_righthand);
! 						more = true;
! 					}
! 					/* full joins constrain both sides symmetrically */
! 					if (sjinfo->jointype == JOIN_FULL &&
! 						bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
! 						!bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
! 					{
! 						join_plus_rhs = bms_add_members(join_plus_rhs,
! 														sjinfo->min_lefthand);
! 						more = true;
! 					}
! 				}
! 			} while (more);
! 			if (bms_overlap(join_plus_rhs, join_lateral_rels))
! 				return false;	/* will not be able to join to some RHS rel */
  		}
  	}
  
*************** make_join_rel(PlannerInfo *root, RelOptI
*** 852,859 ****
   * could be merged with that function, but it seems clearer to separate the
   * two concerns.  We need this test because there are degenerate cases where
   * a clauseless join must be performed to satisfy join-order restrictions.
!  * Also, if one rel has a lateral reference to the other, we should consider
!  * joining them even if the join would be clauseless.
   *
   * Note: this is only a problem if one side of a degenerate outer join
   * contains multiple rels, or a clauseless join is required within an
--- 895,903 ----
   * could be merged with that function, but it seems clearer to separate the
   * two concerns.  We need this test because there are degenerate cases where
   * a clauseless join must be performed to satisfy join-order restrictions.
!  * Also, if one rel has a lateral reference to the other, or both are needed
!  * to compute some PHV, we should consider joining them even if the join would
!  * be clauseless.
   *
   * Note: this is only a problem if one side of a degenerate outer join
   * contains multiple rels, or a clauseless join is required within an
*************** have_join_order_restriction(PlannerInfo 
*** 870,877 ****
  	ListCell   *l;
  
  	/*
! 	 * If either side has a lateral reference to the other, attempt the join
! 	 * regardless of outer-join considerations.
  	 */
  	foreach(l, root->lateral_info_list)
  	{
--- 914,921 ----
  	ListCell   *l;
  
  	/*
! 	 * If either side has a direct lateral reference to the other, attempt the
! 	 * join regardless of outer-join considerations.
  	 */
  	foreach(l, root->lateral_info_list)
  	{
*************** have_join_order_restriction(PlannerInfo 
*** 886,891 ****
--- 930,951 ----
  	}
  
  	/*
+ 	 * Likewise, if both rels are needed to compute some PlaceHolderVar,
+ 	 * attempt the join regardless of outer-join considerations.  (This is not
+ 	 * very desirable, because a PHV with a large eval_at set will cause a lot
+ 	 * of probably-useless joins to be considered, but failing to do this can
+ 	 * cause us to fail to construct a plan at all.)
+ 	 */
+ 	foreach(l, root->placeholder_list)
+ 	{
+ 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+ 
+ 		if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
+ 			bms_is_subset(rel2->relids, phinfo->ph_eval_at))
+ 			return true;
+ 	}
+ 
+ 	/*
  	 * It's possible that the rels correspond to the left and right sides of a
  	 * degenerate outer join, that is, one with no joinclause mentioning the
  	 * non-nullable side; in which case we should force the join to occur.
*************** have_join_order_restriction(PlannerInfo 
*** 960,966 ****
   * has_join_restriction
   *		Detect whether the specified relation has join-order restrictions,
   *		due to being inside an outer join or an IN (sub-SELECT),
!  *		or participating in any LATERAL references.
   *
   * Essentially, this tests whether have_join_order_restriction() could
   * succeed with this rel and some other one.  It's OK if we sometimes
--- 1020,1026 ----
   * has_join_restriction
   *		Detect whether the specified relation has join-order restrictions,
   *		due to being inside an outer join or an IN (sub-SELECT),
!  *		or participating in any LATERAL references or multi-rel PHVs.
   *
   * Essentially, this tests whether have_join_order_restriction() could
   * succeed with this rel and some other one.  It's OK if we sometimes
*************** has_join_restriction(PlannerInfo *root, 
*** 972,983 ****
  {
  	ListCell   *l;
  
! 	foreach(l, root->lateral_info_list)
  	{
! 		LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
  
! 		if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) ||
! 			bms_overlap(ljinfo->lateral_lhs, rel->relids))
  			return true;
  	}
  
--- 1032,1046 ----
  {
  	ListCell   *l;
  
! 	if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
! 		return true;
! 
! 	foreach(l, root->placeholder_list)
  	{
! 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
  
! 		if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
! 			!bms_equal(rel->relids, phinfo->ph_eval_at))
  			return true;
  	}
  
*************** has_legal_joinclause(PlannerInfo *root, 
*** 1059,1064 ****
--- 1122,1178 ----
  
  
  /*
+  * There's a pitfall for creating parameterized nestloops: suppose the inner
+  * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
+  * minimum eval_at set includes the outer rel (B) and some third rel (C).
+  * We might think we could create a B/A nestloop join that's parameterized by
+  * C.  But we would end up with a plan in which the PHV's expression has to be
+  * evaluated as a nestloop parameter at the B/A join; and the executor is only
+  * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
+  * and overhead to the executor for such corner cases, it seems better to
+  * forbid the join.  (Note that we can still make use of A's parameterized
+  * path with pre-joined B+C as the outer rel.  have_join_order_restriction()
+  * ensures that we will consider making such a join even if there are not
+  * other reasons to do so.)
+  *
+  * So we check whether any PHVs used in the query could pose such a hazard.
+  * We don't have any simple way of checking whether a risky PHV would actually
+  * be used in the inner plan, and the case is so unusual that it doesn't seem
+  * worth working very hard on it.
+  *
+  * This needs to be checked in two places.  If the inner rel's minimum
+  * parameterization would trigger the restriction, then join_is_legal() should
+  * reject the join altogether, because there will be no workable paths for it.
+  * But joinpath.c has to check again for every proposed nestloop path, because
+  * the inner path might have more than the minimum parameterization, causing
+  * some PHV to be dangerous for it that otherwise wouldn't be.
+  */
+ bool
+ have_dangerous_phv(PlannerInfo *root,
+ 				   Relids outer_relids, Relids inner_params)
+ {
+ 	ListCell   *lc;
+ 
+ 	foreach(lc, root->placeholder_list)
+ 	{
+ 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
+ 
+ 		if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
+ 			continue;			/* ignore, could not be a nestloop param */
+ 		if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
+ 			continue;			/* ignore, not relevant to this join */
+ 		if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
+ 			continue;			/* safe, it can be eval'd within outerrel */
+ 		/* Otherwise, it's potentially unsafe, so reject the join */
+ 		return true;
+ 	}
+ 
+ 	/* OK to perform the join */
+ 	return false;
+ }
+ 
+ 
+ /*
   * is_dummy_rel --- has relation been proven empty?
   */
  static bool
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 2f4e818..e5688ef 100644
*** a/src/backend/optimizer/plan/initsplan.c
--- b/src/backend/optimizer/plan/initsplan.c
*************** create_lateral_join_info(PlannerInfo *ro
*** 449,457 ****
  				Assert(false);
  		}
  
! 		/* We now know all the relids needed for lateral refs in this rel */
! 		if (bms_is_empty(lateral_relids))
! 			continue;			/* ensure lateral_relids is NULL if empty */
  		brel->lateral_relids = lateral_relids;
  	}
  
--- 449,455 ----
  				Assert(false);
  		}
  
! 		/* We now have all the direct lateral refs from this rel */
  		brel->lateral_relids = lateral_relids;
  	}
  
*************** create_lateral_join_info(PlannerInfo *ro
*** 459,464 ****
--- 457,470 ----
  	 * Now check for lateral references within PlaceHolderVars, and make
  	 * LateralJoinInfos describing each such reference.  Unlike references in
  	 * unflattened LATERAL RTEs, the referencing location could be a join.
+ 	 *
+ 	 * For a PHV that is due to be evaluated at a join, we mark each of the
+ 	 * join's member baserels as having the PHV's lateral references too. Even
+ 	 * though the baserels could be scanned without considering those lateral
+ 	 * refs, we will never be able to form the join except as a path
+ 	 * parameterized by the lateral refs, so there is no point in considering
+ 	 * unparameterized paths for the baserels; and we mustn't try to join any
+ 	 * of those baserels to the lateral refs too soon, either.
  	 */
  	foreach(lc, root->placeholder_list)
  	{
*************** create_lateral_join_info(PlannerInfo *ro
*** 471,476 ****
--- 477,483 ----
  											   PVC_RECURSE_AGGREGATES,
  											   PVC_INCLUDE_PLACEHOLDERS);
  			ListCell   *lc2;
+ 			int			ev_at;
  
  			foreach(lc2, vars)
  			{
*************** create_lateral_join_info(PlannerInfo *ro
*** 500,505 ****
--- 507,521 ----
  			}
  
  			list_free(vars);
+ 
+ 			ev_at = -1;
+ 			while ((ev_at = bms_next_member(eval_at, ev_at)) >= 0)
+ 			{
+ 				RelOptInfo *brel = find_base_rel(root, ev_at);
+ 
+ 				brel->lateral_relids = bms_add_members(brel->lateral_relids,
+ 													   phinfo->ph_lateral);
+ 			}
  		}
  	}
  
*************** create_lateral_join_info(PlannerInfo *ro
*** 508,519 ****
  		return;
  
  	/*
! 	 * Now that we've identified all lateral references, make a second pass in
! 	 * which we mark each baserel with the set of relids of rels that
! 	 * reference it laterally (essentially, the inverse mapping of
! 	 * lateral_relids).  We'll need this for join_clause_is_movable_to().
  	 *
! 	 * Also, propagate lateral_relids and lateral_referencers from appendrel
  	 * parent rels to their child rels.  We intentionally give each child rel
  	 * the same minimum parameterization, even though it's quite possible that
  	 * some don't reference all the lateral rels.  This is because any append
--- 524,613 ----
  		return;
  
  	/*
! 	 * At this point the lateral_relids sets represent only direct lateral
! 	 * references.  Replace them by their transitive closure, so that they
! 	 * describe both direct and indirect lateral references.  If relation X
! 	 * references Y laterally, and Y references Z laterally, then we will have
! 	 * to scan X on the inside of a nestloop with Z, so for all intents and
! 	 * purposes X is laterally dependent on Z too.
  	 *
! 	 * This code is essentially Warshall's algorithm for transitive closure.
! 	 * The outer loop considers each baserel, and propagates its lateral
! 	 * dependencies to those baserels that have a lateral dependency on it.
! 	 */
! 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
! 	{
! 		RelOptInfo *brel = root->simple_rel_array[rti];
! 		Relids		outer_lateral_relids;
! 		Index		rti2;
! 
! 		if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
! 			continue;
! 
! 		/* need not consider baserel further if it has no lateral refs */
! 		outer_lateral_relids = brel->lateral_relids;
! 		if (outer_lateral_relids == NULL)
! 			continue;
! 
! 		/* else scan all baserels */
! 		for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++)
! 		{
! 			RelOptInfo *brel2 = root->simple_rel_array[rti2];
! 
! 			if (brel2 == NULL || brel2->reloptkind != RELOPT_BASEREL)
! 				continue;
! 
! 			/* if brel2 has lateral ref to brel, propagate brel's refs */
! 			if (bms_is_member(rti, brel2->lateral_relids))
! 				brel2->lateral_relids = bms_add_members(brel2->lateral_relids,
! 														outer_lateral_relids);
! 		}
! 	}
! 
! 	/*
! 	 * Now that we've identified all lateral references, mark each baserel
! 	 * with the set of relids of rels that reference it laterally (possibly
! 	 * indirectly) --- that is, the inverse mapping of lateral_relids.
! 	 */
! 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
! 	{
! 		RelOptInfo *brel = root->simple_rel_array[rti];
! 		Relids		lateral_relids;
! 		Index		rti2;
! 
! 		if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
! 			continue;
! 
! 		/* Nothing to do at rels with no lateral refs */
! 		lateral_relids = brel->lateral_relids;
! 		if (lateral_relids == NULL)
! 			continue;
! 
! 		/*
! 		 * We should not have broken the invariant that lateral_relids is
! 		 * exactly NULL if empty.
! 		 */
! 		Assert(!bms_is_empty(lateral_relids));
! 
! 		/* Also, no rel should have a lateral dependency on itself */
! 		Assert(!bms_is_member(rti, lateral_relids));
! 
! 		/* Mark this rel's referencees */
! 		for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++)
! 		{
! 			if (bms_is_member(rti2, lateral_relids))
! 			{
! 				RelOptInfo *brel2 = root->simple_rel_array[rti2];
! 
! 				Assert(brel2 != NULL && brel2->reloptkind == RELOPT_BASEREL);
! 				brel2->lateral_referencers =
! 					bms_add_member(brel2->lateral_referencers, rti);
! 			}
! 		}
! 	}
! 
! 	/*
! 	 * Last, propagate lateral_relids and lateral_referencers from appendrel
  	 * parent rels to their child rels.  We intentionally give each child rel
  	 * the same minimum parameterization, even though it's quite possible that
  	 * some don't reference all the lateral rels.  This is because any append
*************** create_lateral_join_info(PlannerInfo *ro
*** 525,549 ****
  	for (rti = 1; rti < root->simple_rel_array_size; rti++)
  	{
  		RelOptInfo *brel = root->simple_rel_array[rti];
- 		Relids		lateral_referencers;
  
! 		if (brel == NULL)
! 			continue;
! 		if (brel->reloptkind != RELOPT_BASEREL)
  			continue;
  
- 		/* Compute lateral_referencers using the finished lateral_info_list */
- 		lateral_referencers = NULL;
- 		foreach(lc, root->lateral_info_list)
- 		{
- 			LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
- 
- 			if (bms_is_member(brel->relid, ljinfo->lateral_lhs))
- 				lateral_referencers = bms_add_members(lateral_referencers,
- 													  ljinfo->lateral_rhs);
- 		}
- 		brel->lateral_referencers = lateral_referencers;
- 
  		/*
  		 * If it's an appendrel parent, copy its lateral_relids and
  		 * lateral_referencers to each child rel.
--- 619,628 ----
  	for (rti = 1; rti < root->simple_rel_array_size; rti++)
  	{
  		RelOptInfo *brel = root->simple_rel_array[rti];
  
! 		if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
  			continue;
  
  		/*
  		 * If it's an appendrel parent, copy its lateral_relids and
  		 * lateral_referencers to each child rel.
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index b197f14..a2be2ed 100644
*** a/src/backend/optimizer/util/relnode.c
--- b/src/backend/optimizer/util/relnode.c
*************** build_join_rel(PlannerInfo *root,
*** 373,379 ****
  	joinrel->cheapest_total_path = NULL;
  	joinrel->cheapest_unique_path = NULL;
  	joinrel->cheapest_parameterized_paths = NIL;
! 	joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids);
  	joinrel->relid = 0;			/* indicates not a baserel */
  	joinrel->rtekind = RTE_JOIN;
  	joinrel->min_attr = 0;
--- 373,380 ----
  	joinrel->cheapest_total_path = NULL;
  	joinrel->cheapest_unique_path = NULL;
  	joinrel->cheapest_parameterized_paths = NIL;
! 	joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids,
! 														outer_rel, inner_rel);
  	joinrel->relid = 0;			/* indicates not a baserel */
  	joinrel->rtekind = RTE_JOIN;
  	joinrel->min_attr = 0;
*************** build_join_rel(PlannerInfo *root,
*** 508,536 ****
   * because join_is_legal() needs the value to check a prospective join.
   */
  Relids
! min_join_parameterization(PlannerInfo *root, Relids joinrelids)
  {
  	Relids		result;
- 	ListCell   *lc;
- 
- 	/* Easy if there are no lateral references */
- 	if (root->lateral_info_list == NIL)
- 		return NULL;
  
  	/*
! 	 * Scan lateral_info_list to find all the lateral references occurring in
! 	 * or below this join.
  	 */
! 	result = NULL;
! 	foreach(lc, root->lateral_info_list)
! 	{
! 		LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
! 
! 		if (bms_is_subset(ljinfo->lateral_rhs, joinrelids))
! 			result = bms_add_members(result, ljinfo->lateral_lhs);
! 	}
! 
! 	/* Remove any rels that are already included in the join */
  	result = bms_del_members(result, joinrelids);
  
  	/* Maintain invariant that result is exactly NULL if empty */
--- 509,535 ----
   * because join_is_legal() needs the value to check a prospective join.
   */
  Relids
! min_join_parameterization(PlannerInfo *root,
! 						  Relids joinrelids,
! 						  RelOptInfo *outer_rel,
! 						  RelOptInfo *inner_rel)
  {
  	Relids		result;
  
  	/*
! 	 * Basically we just need the union of the inputs' lateral_relids, less
! 	 * whatever is already in the join.
! 	 *
! 	 * It's not immediately obvious that this is a valid way to compute the
! 	 * result, because it might seem that we're ignoring possible lateral refs
! 	 * of PlaceHolderVars that are due to be computed at the join but not in
! 	 * either input.  However, because create_lateral_join_info() already
! 	 * charged all such PHV refs to each member baserel of the join, they'll
! 	 * be accounted for already in the inputs' lateral_relids.  Likewise, we
! 	 * do not need to worry about doing transitive closure here, because that
! 	 * was already accounted for in the original baserel lateral_relids.
  	 */
! 	result = bms_union(outer_rel->lateral_relids, inner_rel->lateral_relids);
  	result = bms_del_members(result, joinrelids);
  
  	/* Maintain invariant that result is exactly NULL if empty */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 3232123..74f4daf 100644
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
*************** typedef struct PlannerInfo
*** 358,363 ****
--- 358,364 ----
   *		cheapest_parameterized_paths - best paths for their parameterizations;
   *			always includes cheapest_total_path, even if that's unparameterized
   *		lateral_relids - required outer rels for LATERAL, as a Relids set
+  *			(includes both direct and indirect lateral references)
   *
   * If the relation is a base relation it will have these fields set:
   *
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index f41847f..8fb9eda 100644
*** a/src/include/optimizer/pathnode.h
--- b/src/include/optimizer/pathnode.h
*************** extern RelOptInfo *build_join_rel(Planne
*** 148,154 ****
  			   RelOptInfo *inner_rel,
  			   SpecialJoinInfo *sjinfo,
  			   List **restrictlist_ptr);
! extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids);
  extern RelOptInfo *build_empty_join_rel(PlannerInfo *root);
  extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root,
  							RelOptInfo *rel);
--- 148,157 ----
  			   RelOptInfo *inner_rel,
  			   SpecialJoinInfo *sjinfo,
  			   List **restrictlist_ptr);
! extern Relids min_join_parameterization(PlannerInfo *root,
! 						  Relids joinrelids,
! 						  RelOptInfo *outer_rel,
! 						  RelOptInfo *inner_rel);
  extern RelOptInfo *build_empty_join_rel(PlannerInfo *root);
  extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root,
  							RelOptInfo *rel);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 87123a5..7757741 100644
*** a/src/include/optimizer/paths.h
--- b/src/include/optimizer/paths.h
*************** extern RelOptInfo *make_join_rel(Planner
*** 98,103 ****
--- 98,105 ----
  			  RelOptInfo *rel1, RelOptInfo *rel2);
  extern bool have_join_order_restriction(PlannerInfo *root,
  							RelOptInfo *rel1, RelOptInfo *rel2);
+ extern bool have_dangerous_phv(PlannerInfo *root,
+ 				   Relids outer_relids, Relids inner_params);
  
  /*
   * equivclass.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index dba5d2d..64f046e 100644
*** a/src/test/regress/expected/join.out
--- b/src/test/regress/expected/join.out
*************** where t1.f1 = ss.f1;
*** 3620,3625 ****
--- 3620,3778 ----
   doh! | 4567890123456789 | 123 | 4567890123456789 | doh!
  (1 row)
  
+ explain (verbose, costs off)
+ select * from
+   text_tbl t1
+   left join int8_tbl i8
+   on i8.q2 = 123,
+   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+   lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+ where t1.f1 = ss2.f1;
+                             QUERY PLAN                             
+ -------------------------------------------------------------------
+  Nested Loop
+    Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
+    Join Filter: (t1.f1 = (t2.f1))
+    ->  Nested Loop
+          Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+          ->  Nested Loop Left Join
+                Output: t1.f1, i8.q1, i8.q2
+                ->  Seq Scan on public.text_tbl t1
+                      Output: t1.f1
+                ->  Materialize
+                      Output: i8.q1, i8.q2
+                      ->  Seq Scan on public.int8_tbl i8
+                            Output: i8.q1, i8.q2
+                            Filter: (i8.q2 = 123)
+          ->  Limit
+                Output: (i8.q1), t2.f1
+                ->  Seq Scan on public.text_tbl t2
+                      Output: i8.q1, t2.f1
+    ->  Limit
+          Output: ((i8.q1)), (t2.f1)
+          ->  Seq Scan on public.text_tbl t3
+                Output: (i8.q1), t2.f1
+ (22 rows)
+ 
+ select * from
+   text_tbl t1
+   left join int8_tbl i8
+   on i8.q2 = 123,
+   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+   lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+ where t1.f1 = ss2.f1;
+   f1  |        q1        | q2  |        q1        |  f1  |        q1        |  f1  
+ ------+------------------+-----+------------------+------+------------------+------
+  doh! | 4567890123456789 | 123 | 4567890123456789 | doh! | 4567890123456789 | doh!
+ (1 row)
+ 
+ explain (verbose, costs off)
+ select 1 from
+   text_tbl as tt1
+   inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+   left join text_tbl as tt3 on (tt3.f1 = 'foo')
+   left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+   lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+ where tt1.f1 = ss1.c0;
+                         QUERY PLAN                        
+ ----------------------------------------------------------
+  Nested Loop
+    Output: 1
+    ->  Nested Loop Left Join
+          Output: tt1.f1, tt4.f1
+          ->  Nested Loop
+                Output: tt1.f1
+                ->  Seq Scan on public.text_tbl tt1
+                      Output: tt1.f1
+                      Filter: (tt1.f1 = 'foo'::text)
+                ->  Seq Scan on public.text_tbl tt2
+                      Output: tt2.f1
+          ->  Materialize
+                Output: tt4.f1
+                ->  Nested Loop Left Join
+                      Output: tt4.f1
+                      Join Filter: (tt3.f1 = tt4.f1)
+                      ->  Seq Scan on public.text_tbl tt3
+                            Output: tt3.f1
+                            Filter: (tt3.f1 = 'foo'::text)
+                      ->  Seq Scan on public.text_tbl tt4
+                            Output: tt4.f1
+                            Filter: (tt4.f1 = 'foo'::text)
+    ->  Subquery Scan on ss1
+          Output: ss1.c0
+          Filter: (ss1.c0 = 'foo'::text)
+          ->  Limit
+                Output: (tt4.f1)
+                ->  Seq Scan on public.text_tbl tt5
+                      Output: tt4.f1
+ (29 rows)
+ 
+ select 1 from
+   text_tbl as tt1
+   inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+   left join text_tbl as tt3 on (tt3.f1 = 'foo')
+   left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+   lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+ where tt1.f1 = ss1.c0;
+  ?column? 
+ ----------
+ (0 rows)
+ 
+ --
+ -- check a case in which a PlaceHolderVar forces join order
+ --
+ explain (verbose, costs off)
+ select ss2.* from
+   int4_tbl i41
+   left join int8_tbl i8
+     join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+           from int4_tbl i42, int4_tbl i43) ss1
+     on i8.q1 = ss1.c2
+   on i41.f1 = ss1.c1,
+   lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+ where ss1.c2 = 0;
+                                QUERY PLAN                               
+ ------------------------------------------------------------------------
+  Nested Loop
+    Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+    ->  Hash Join
+          Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42
+          Hash Cond: (i41.f1 = i42.f1)
+          ->  Nested Loop
+                Output: i8.q1, i8.q2, i43.f1, i41.f1
+                ->  Nested Loop
+                      Output: i8.q1, i8.q2, i43.f1
+                      ->  Seq Scan on public.int8_tbl i8
+                            Output: i8.q1, i8.q2
+                            Filter: (i8.q1 = 0)
+                      ->  Seq Scan on public.int4_tbl i43
+                            Output: i43.f1
+                            Filter: (i43.f1 = 0)
+                ->  Seq Scan on public.int4_tbl i41
+                      Output: i41.f1
+          ->  Hash
+                Output: i42.f1
+                ->  Seq Scan on public.int4_tbl i42
+                      Output: i42.f1
+    ->  Limit
+          Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+          ->  Seq Scan on public.text_tbl
+                Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42)
+ (25 rows)
+ 
+ select ss2.* from
+   int4_tbl i41
+   left join int8_tbl i8
+     join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+           from int4_tbl i42, int4_tbl i43) ss1
+     on i8.q1 = ss1.c2
+   on i41.f1 = ss1.c1,
+   lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+ where ss1.c2 = 0;
+  f1 | q1 | q2 | c1 | c2 | c3 
+ ----+----+----+----+----+----
+ (0 rows)
+ 
  --
  -- test ability to push constants through outer join clauses
  --
*************** select * from
*** 4741,4754 ****
           Output: a.q1, a.q2
     ->  Nested Loop
           Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
-          Join Filter: (a.q2 = b.q1)
           ->  Seq Scan on public.int8_tbl b
                 Output: b.q1, b.q2
!          ->  Materialize
!                Output: c.q1
!                ->  Seq Scan on public.int8_tbl c
!                      Output: c.q1
! (13 rows)
  
  select * from
    int8_tbl a left join lateral
--- 4894,4905 ----
           Output: a.q1, a.q2
     ->  Nested Loop
           Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
           ->  Seq Scan on public.int8_tbl b
                 Output: b.q1, b.q2
!                Filter: (a.q2 = b.q1)
!          ->  Seq Scan on public.int8_tbl c
!                Output: c.q1, c.q2
! (11 rows)
  
  select * from
    int8_tbl a left join lateral
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index fdd4e78..0358d00 100644
*** a/src/test/regress/sql/join.sql
--- b/src/test/regress/sql/join.sql
*************** select * from
*** 1134,1139 ****
--- 1134,1198 ----
    lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss
  where t1.f1 = ss.f1;
  
+ explain (verbose, costs off)
+ select * from
+   text_tbl t1
+   left join int8_tbl i8
+   on i8.q2 = 123,
+   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+   lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+ where t1.f1 = ss2.f1;
+ 
+ select * from
+   text_tbl t1
+   left join int8_tbl i8
+   on i8.q2 = 123,
+   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+   lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+ where t1.f1 = ss2.f1;
+ 
+ explain (verbose, costs off)
+ select 1 from
+   text_tbl as tt1
+   inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+   left join text_tbl as tt3 on (tt3.f1 = 'foo')
+   left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+   lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+ where tt1.f1 = ss1.c0;
+ 
+ select 1 from
+   text_tbl as tt1
+   inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+   left join text_tbl as tt3 on (tt3.f1 = 'foo')
+   left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+   lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+ where tt1.f1 = ss1.c0;
+ 
+ --
+ -- check a case in which a PlaceHolderVar forces join order
+ --
+ 
+ explain (verbose, costs off)
+ select ss2.* from
+   int4_tbl i41
+   left join int8_tbl i8
+     join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+           from int4_tbl i42, int4_tbl i43) ss1
+     on i8.q1 = ss1.c2
+   on i41.f1 = ss1.c1,
+   lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+ where ss1.c2 = 0;
+ 
+ select ss2.* from
+   int4_tbl i41
+   left join int8_tbl i8
+     join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+           from int4_tbl i42, int4_tbl i43) ss1
+     on i8.q1 = ss1.c2
+   on i41.f1 = ss1.c1,
+   lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+ where ss1.c2 = 0;
+ 
  --
  -- test ability to push constants through outer join clauses
  --
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to