From 7a2a460f80c61909b3f6ab7b39f3ecf2c2fba2b0 Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihui.fan1213@gmail.com>
Date: Tue, 14 Apr 2020 11:27:41 +0800
Subject: [PATCH v2] tiny changes for init_grouping_targets

---
 src/backend/optimizer/util/relnode.c | 274 +++++++++++----------------
 1 file changed, 107 insertions(+), 167 deletions(-)

diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 3ee38bf1f6..68a347e2d9 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -73,6 +73,7 @@ static void build_child_join_reltarget(PlannerInfo *root,
 									   RelOptInfo *childrel,
 									   int nappinfos,
 									   AppendRelInfo **appinfos);
+static bool aggref_used_var(PlannerInfo *root, Var *var);
 static bool init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
 								  PathTarget *target, PathTarget *agg_input,
 								  List *gvis, List **group_exprs_extra_p);
@@ -2066,8 +2067,6 @@ create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel)
 	 */
 	Assert(root->grouped_var_list != NIL);
 
-	result = makeNode(RelAggInfo);
-
 	/*
 	 * The current implementation of aggregation push-down cannot handle
 	 * PlaceHolderVar (PHV).
@@ -2334,6 +2333,8 @@ create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel)
 	 * SortGroupClauses.
 	 */
 	i = 0;
+
+	result = makeNode(RelAggInfo);
 	foreach(lc, target->exprs)
 	{
 		Index		sortgroupref = 0;
@@ -2429,10 +2430,15 @@ init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
 					  PathTarget *target, PathTarget *agg_input,
 					  List *gvis, List **group_exprs_extra_p)
 {
-	ListCell   *lc1,
-			   *lc2;
-	List	   *unresolved = NIL;
-	List	   *unresolved_sortgrouprefs = NIL;
+	ListCell   *lc1;
+	List	*grouping_columns = NIL;
+
+	foreach(lc1, root->grouped_var_list)
+	{
+		GroupedVarInfo *gvar = lfirst_node(GroupedVarInfo, lc1);
+		if (IsA(gvar->gvexpr, Var))
+			grouping_columns = lappend(grouping_columns, gvar->gvexpr);
+	}
 
 	foreach(lc1, rel->reltarget->exprs)
 	{
@@ -2440,7 +2446,8 @@ init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
 		bool		is_grouping;
 		Index		sortgroupref = 0;
 		bool		derived = false;
-		bool		needed_by_aggregate;
+		RangeTblEntry	*rte;
+		List	   *deps = NIL;
 
 		/*
 		 * Given that PlaceHolderVar currently prevents us from doing
@@ -2452,6 +2459,8 @@ init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
 		is_grouping = is_grouping_expression(gvis, (Expr *) tvar,
 											 &sortgroupref, &derived);
 
+		rte = root->simple_rte_array[tvar->varno];
+
 		/*
 		 * Derived grouping expressions should not be referenced by the query
 		 * targetlist, so let them fall into vars_unresolved. It'll be checked
@@ -2475,190 +2484,121 @@ init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
 			 * sortgroupref in addition.
 			 */
 			add_column_to_pathtarget(agg_input, (Expr *) tvar, sortgroupref);
-
-			/* Process the next expression. */
-			continue;
 		}
-
-		/*
-		 * Is this Var needed in the query targetlist for anything else than
-		 * aggregate input?
-		 */
-		needed_by_aggregate = false;
-		foreach(lc2, root->grouped_var_list)
+		else if (aggref_used_var(root, tvar) &&
+				 tlist_member((Expr*) tvar, root->processed_tlist) == NULL)
 		{
-			GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, lc2);
-			ListCell   *lc3;
-			List	   *vars;
-
-			if (!IsA(gvi->gvexpr, Aggref))
-				continue;
-
-			if (!bms_is_member(tvar->varno, gvi->gv_eval_at))
-				continue;
-
-			/*
-			 * XXX Consider some sort of caching.
-			 */
-			vars = pull_var_clause((Node *) gvi->gvexpr, PVC_RECURSE_AGGREGATES);
-			foreach(lc3, vars)
-			{
-				Var		   *var = lfirst_node(Var, lc3);
-
-				if (equal(var, tvar))
-				{
-					needed_by_aggregate = true;
-					break;
-				}
-			}
-			list_free(vars);
-			if (needed_by_aggregate)
-				break;
+			add_new_column_to_pathtarget(agg_input, (Expr *) tvar);
 		}
-
-		if (needed_by_aggregate)
-		{
-			bool		found = false;
-
-			foreach(lc2, root->processed_tlist)
-			{
-				TargetEntry *te = lfirst_node(TargetEntry, lc2);
-
-				if (IsA(te->expr, Aggref))
-					continue;
-
-				if (equal(te->expr, tvar))
-				{
-					found = true;
-					break;
-				}
-			}
-
-			/*
-			 * If it's only Aggref input, add it to the aggregation input
-			 * target and that's it.
-			 */
-			if (!found)
-			{
-				add_new_column_to_pathtarget(agg_input, (Expr *) tvar);
-				continue;
-			}
-		}
-
-		/*
-		 * Further investigation involves dependency check, for which we need
-		 * to have all the (plain-var) grouping expressions gathered.
-		 */
-		unresolved = lappend(unresolved, tvar);
-		unresolved_sortgrouprefs = lappend_int(unresolved_sortgrouprefs,
-											   sortgroupref);
-	}
-
-	/*
-	 * Check for other possible reasons for the var to be in the plain target.
-	 */
-	forboth(lc1, unresolved, lc2, unresolved_sortgrouprefs)
-	{
-		Var		   *var = lfirst_node(Var, lc1);
-		Index		sortgroupref = lfirst_int(lc2);
-		RangeTblEntry *rte;
-		List	   *deps = NIL;
-		Relids		relids_subtract;
-		int			ndx;
-		RelOptInfo *baserel;
-
-		rte = root->simple_rte_array[var->varno];
-
-		/*
-		 * Check if the Var can be in the grouping key even though it's not
-		 * mentioned by the GROUP BY clause (and could not be derived using
-		 * ECs).
-		 */
-		if (sortgroupref == 0 &&
-			check_functional_grouping(rte->relid, var->varno,
-									  var->varlevelsup,
-									  target->exprs, &deps))
+		else if (sortgroupref == 0 &&
+				 check_functional_grouping(rte->relid, tvar->varno,
+										   tvar->varlevelsup, grouping_columns, &deps))
 		{
 			/*
 			 * The var shouldn't be actually used as a grouping key (instead,
 			 * the one this depends on will be), so sortgroupref should not be
 			 * important.
 			 */
-			add_new_column_to_pathtarget(target, (Expr *) var);
-			add_new_column_to_pathtarget(agg_input, (Expr *) var);
-
-			/*
-			 * The var may or may not be present in generic grouping
-			 * expression(s) in addition, but this is handled elsewhere.
-			 */
-			continue;
+			add_new_column_to_pathtarget(target, (Expr *) tvar);
+			add_new_column_to_pathtarget(agg_input, (Expr *) tvar);
 		}
-
-		/*
-		 * Isn't the expression needed by joins above the current rel?
-		 *
-		 * The relids we're not interested in do include 0, which is the
-		 * top-level targetlist. The only reason for relids to contain 0
-		 * should be that arg_var is referenced either by aggregate or by
-		 * grouping expression, but right now we're interested in the *other*
-		 * reasons. (As soon aggregation is pushed down, the aggregates in the
-		 * query targetlist no longer need direct reference to arg_var
-		 * anyway.)
-		 */
-		relids_subtract = bms_copy(rel->relids);
-		bms_add_member(relids_subtract, 0);
-
-		baserel = find_base_rel(root, var->varno);
-		ndx = var->varattno - baserel->min_attr;
-		if (bms_nonempty_difference(baserel->attr_needed[ndx],
-									relids_subtract))
+		else
 		{
+			Relids		relids_subtract;
+			int			ndx;
+			RelOptInfo *baserel;
 			/*
-			 * The variable is needed by a join involving this relation. That
-			 * case includes variable that is referenced by a generic grouping
-			 * expression.
+			 * Isn't the expression needed by joins above the current rel?
 			 *
-			 * The only way to bring this var to the aggregation output is to
-			 * add it to the grouping expressions too.
+			 * The relids we're not interested in do include 0, which is the
+			 * top-level targetlist. The only reason for relids to contain 0
+			 * should be that arg_var is referenced either by aggregate or by
+			 * grouping expression, but right now we're interested in the *other*
+			 * reasons. (As soon aggregation is pushed down, the aggregates in the
+			 * query targetlist no longer need direct reference to arg_var
+			 * anyway.)
 			 */
-			if (sortgroupref > 0)
+			relids_subtract = bms_copy(rel->relids);
+			bms_add_member(relids_subtract, 0);
+
+			baserel = find_base_rel(root, tvar->varno);
+			ndx = tvar->varattno - baserel->min_attr;
+			if (bms_nonempty_difference(baserel->attr_needed[ndx],
+										relids_subtract))
 			{
 				/*
-				 * The var could be recognized as a potentially useful
-				 * grouping expression at the top of the loop, so we can add
-				 * it to the grouping target, as well as to the agg_input.
+				 * The variable is needed by a join involving this relation. That
+				 * case includes variable that is referenced by a generic grouping
+				 * expression.
+				 *
+				 * The only way to bring this var to the aggregation output is to
+				 * add it to the grouping expressions too.
 				 */
-				add_column_to_pathtarget(target, (Expr *) var, sortgroupref);
-				add_column_to_pathtarget(agg_input, (Expr *) var, sortgroupref);
+				if (sortgroupref > 0)
+				{
+					/*
+					 * The var could be recognized as a potentially useful
+					 * grouping expression at the top of the loop, so we can add
+					 * it to the grouping target, as well as to the agg_input.
+					 */
+					add_column_to_pathtarget(target, (Expr *) tvar, sortgroupref);
+					add_column_to_pathtarget(agg_input, (Expr *) tvar, sortgroupref);
+				}
+				else
+				{
+					/*
+					 * Since root->parse->groupClause is not supposed to contain
+					 * this expression, we need to construct special
+					 * SortGroupClause. Its tleSortGroupRef needs to be unique
+					 * within target_agg, so postpone creation of the
+					 * SortGroupRefs until we're done with the iteration of
+					 * rel->reltarget->exprs.
+					 */
+					*group_exprs_extra_p = lappend(*group_exprs_extra_p, tvar);
+				}
 			}
 			else
 			{
 				/*
-				 * Since root->parse->groupClause is not supposed to contain
-				 * this expression, we need to construct special
-				 * SortGroupClause. Its tleSortGroupRef needs to be unique
-				 * within target_agg, so postpone creation of the
-				 * SortGroupRefs until we're done with the iteration of
-				 * rel->reltarget->exprs.
+				 * As long as the query is semantically correct, arriving here
+				 * means that the var is referenced by a generic grouping
+				 * expression but not referenced by any join.
+				 *
+				 * create_rel_agg_info() should add this variable to "agg_input"
+				 * target and also add the whole generic expression to "target",
+				 * but that's subject to future enhancement of the aggregate
+				 * push-down feature.
 				 */
-				*group_exprs_extra_p = lappend(*group_exprs_extra_p, var);
+				return false;
 			}
 		}
-		else
-		{
-			/*
-			 * As long as the query is semantically correct, arriving here
-			 * means that the var is referenced by a generic grouping
-			 * expression but not referenced by any join.
-			 *
-			 * create_rel_agg_info() should add this variable to "agg_input"
-			 * target and also add the whole generic expression to "target",
-			 * but that's subject to future enhancement of the aggregate
-			 * push-down feature.
-			 */
-			return false;
-		}
 	}
-
 	return true;
 }
+
+static bool
+aggref_used_var(PlannerInfo *root, Var *var)
+{
+
+	ListCell 	*lc;
+	foreach(lc, root->grouped_var_list)
+	{
+		GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, lc);
+		List	   *vars;
+
+		if (!IsA(gvi->gvexpr, Aggref))
+			continue;
+
+		if (!bms_is_member(var->varno, gvi->gv_eval_at))
+			continue;
+
+		/*
+		 * XXX Consider some sort of caching.
+		 */
+		vars = pull_var_clause((Node *) gvi->gvexpr, PVC_RECURSE_AGGREGATES);
+		if (list_member(vars, var))
+			return true;
+	}
+
+	return false;
+}
-- 
2.21.0

