Hi Jeevan,
I am back reviewing this. Here are some comments.

@@ -1415,7 +1413,8 @@ add_paths_to_append_rel(PlannerInfo *root,
RelOptInfo *rel,
          * the unparameterized Append path we are constructing for the parent.
          * If not, there's no workable unparameterized path.
          */
-        if (childrel->cheapest_total_path->param_info == NULL)
+        if (childrel->pathlist != NIL &&
+            childrel->cheapest_total_path->param_info == NULL)
             accumulate_append_subpath(childrel->cheapest_total_path,
                                       &subpaths, NULL);
         else
@@ -1683,6 +1682,13 @@ add_paths_to_append_rel(PlannerInfo *root,
RelOptInfo *rel,
             RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
             Path       *subpath;

+            if (childrel->pathlist == NIL)
+            {
+                /* failed to make a suitable path for this child */
+                subpaths_valid = false;
+                break;
+            }
+
When can childrel->pathlist be NIL?

diff --git a/src/backend/optimizer/plan/createplan.c
b/src/backend/optimizer/plan/createplan.c
index 9ae1bf3..f90626c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1670,7 +1670,15 @@ create_sort_plan(PlannerInfo *root, SortPath
*best_path, int flags)
     subplan = create_plan_recurse(root, best_path->subpath,
                                   flags | CP_SMALL_TLIST);

-    plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL);
+    /*
+     * In find_ec_member_for_tle(), child EC members are ignored if they don't
+     * belong to the given relids. Thus, if this sort path is based on a child
+     * relation, we must pass the relids of it. Otherwise, we will end-up into
+     * an error requiring pathkey item.
+     */
+    plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys,
+                                   IS_OTHER_REL(best_path->subpath->parent) ?
+                                   best_path->path.parent->relids : NULL);

     copy_generic_path_info(&plan->plan, (Path *) best_path);

Please separate this small adjustment in a patch of its own, with some
explanation of why we need it i.e. now this function can see SortPaths from
child (other) relations.

+    if (child_data)
+    {
+        /* Must be other rel as all child relations are marked OTHER_RELs */
+        Assert(IS_OTHER_REL(input_rel));

I think we should check IS_OTHER_REL() and Assert(child_data). That way we know
that the code in the if block is executed for OTHER relation.

-    if ((root->hasHavingQual || parse->groupingSets) &&
+    if (!child_data && (root->hasHavingQual || parse->groupingSets) &&

Degenerate grouping will never see child relations, so instead of checking for
child_data, Assert (!IS_OTHER_REL()) inside this block. Add a comment there
explaining the assertion.

+     *
+     * If we are performing grouping for a child relation, fetch can_sort from
+     * the child_data to avoid re-calculating same.
      */
-    can_sort = (gd && gd->rollups != NIL)
-        || grouping_is_sortable(parse->groupClause);
+    can_sort = child_data ? child_data->can_sort : ((gd &&
gd->rollups != NIL) ||
+
grouping_is_sortable(parse->groupClause));

Instead of adding a conditional here, we can compute these values before
create_grouping_paths() is called from grouping_planner() and then pass them
down to try_partitionwise_grouping(). I have attached a patch which refactors
this code. Please see if this refactoring is useful. In the attached patch, I
have handled can_sort, can_hash and partial aggregation costs. More on the last
component below.


     /*
      * Figure out whether a PartialAggregate/Finalize Aggregate execution
@@ -3740,10 +3789,8 @@ create_grouping_paths(PlannerInfo *root,
      * partial paths for partially_grouped_rel; that way, later code can
      * easily consider both parallel and non-parallel approaches to grouping.
      */
-    if (try_parallel_aggregation)
+    if (!child_data && !(agg_costs->hasNonPartial || agg_costs->hasNonSerial))
     {
-        PathTarget *partial_grouping_target;
-
[... clipped ...]
+            get_agg_clause_costs(root, havingQual,
                                  AGGSPLIT_FINAL_DESERIAL,
-                                 &agg_final_costs);
+                                 agg_final_costs);
         }
+    }

With this change, we are computing partial aggregation costs even in
the cases when
those will not be used e.g. when there are no children and parallel paths can
not be created. In the attached patch, I have refactored the code such that
they are computed when they are needed the first time and re-used later.

+    if (child_data)
+    {
+        partial_grouping_target = child_data->partialRelTarget;
+        partially_grouped_rel->reltarget = partial_grouping_target;
+        agg_partial_costs = child_data->agg_partial_costs;
+        agg_final_costs = child_data->agg_final_costs;
+    }

I think, with the refactoring, we can get rid of the last two lines here. I
think we can get rid of this block entirely, but I have not reviewed the entire
code to confirm that.

 static PathTarget *
-make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target)
+make_partial_grouping_target(PlannerInfo *root,
+                             PathTarget *grouping_target,
+                             Node *havingQual)
This looks like a refactoring change. Should go to one of the refactoring
patches or in a patch of its own.

This isn't full review. I will continue reviewing this further.

-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index de1257d..8460134 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -109,6 +109,28 @@ typedef struct
 	int		   *tleref_to_colnum_map;
 } grouping_sets_data;
 
+/*
+ * Struct for extra information passed to create_grouping_paths
+ *
+ * can_hash is true if hash-based grouping is possible, false otherwise.
+ * can_sort is true if sort-based grouping is possible, false otherwise.
+ * can_partial_agg is true if partial aggregation is possible, false otherwise.
+ * partial_costs_set indicates whether agg_partial_costs and agg_final_costs
+ *		have valid costs set. Both of those are computed only when partial
+ *		aggregation is required.
+ * agg_partial_costs gives partial aggregation costs.
+ * agg_final_costs gives finalization costs.
+ */
+typedef struct
+{
+	bool		can_hash;
+	bool		can_sort;
+	bool		can_partial_agg;
+	bool		partial_costs_set;
+	AggClauseCosts agg_partial_costs;
+	AggClauseCosts agg_final_costs;
+} GroupPathExtraData;
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -138,7 +160,8 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
 					  const AggClauseCosts *agg_costs,
-					  grouping_sets_data *gd);
+					  grouping_sets_data *gd,
+					  GroupPathExtraData *extra);
 static void consider_groupingsets_paths(PlannerInfo *root,
 							RelOptInfo *grouped_rel,
 							Path *path,
@@ -201,7 +224,11 @@ static void add_paths_to_partial_grouping_rel(PlannerInfo *root,
 								  bool can_sort,
 								  bool can_hash);
 static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
-				 RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
+				 RelOptInfo *grouped_rel, GroupPathExtraData *extra);
+static void compute_group_path_extra_data(PlannerInfo *root,
+							  GroupPathExtraData *extra,
+							  grouping_sets_data *gd,
+							  const AggClauseCosts *agg_costs);
 
 
 /*****************************************************************************
@@ -1981,11 +2008,17 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 */
 		if (have_grouping)
 		{
+			GroupPathExtraData group_extra;
+
+			compute_group_path_extra_data(root, &group_extra, gset_data,
+										  &agg_costs);
+
 			current_rel = create_grouping_paths(root,
 												current_rel,
 												grouping_target,
 												&agg_costs,
-												gset_data);
+												gset_data,
+												&group_extra);
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
 				adjust_paths_for_srfs(root, current_rel,
@@ -3595,6 +3628,67 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
 }
 
 /*
+ * compute_group_path_extra_data
+ *	  Compute extra information required for grouping operation specified in
+ *	  the query.
+ */
+static void
+compute_group_path_extra_data(PlannerInfo *root, GroupPathExtraData *extra,
+							  grouping_sets_data *gd,
+							  const AggClauseCosts *agg_costs)
+{
+	Query	   *parse = root->parse;
+
+	/*
+	 * Determine whether it's possible to perform sort-based implementations
+	 * of grouping.  (Note that if groupClause is empty,
+	 * grouping_is_sortable() is trivially true, and all the
+	 * pathkeys_contained_in() tests will succeed too, so that we'll consider
+	 * every surviving input path.)
+	 *
+	 * If we have grouping sets, we might be able to sort some but not all of
+	 * them; in this case, we need can_sort to be true as long as we must
+	 * consider any sorted-input plan.
+	 */
+	extra->can_sort = (gd && gd->rollups != NIL) ||
+					  grouping_is_sortable(parse->groupClause);
+
+	/*
+	 * Determine whether we should consider hash-based implementations of
+	 * grouping.
+	 *
+	 * Hashed aggregation only applies if we're grouping. If we have grouping
+	 * sets, some groups might be hashable but others not; in this case we set
+	 * can_hash true as long as there is nothing globally preventing us from
+	 * hashing (and we should therefore consider plans with hashes).
+	 *
+	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
+	 * aggregates.  (Doing so would imply storing *all* the input values in
+	 * the hash table, and/or running many sorts in parallel, either of which
+	 * seems like a certain loser.)  We similarly don't support ordered-set
+	 * aggregates in hashed aggregation, but that case is also included in the
+	 * numOrderedAggs count.
+	 *
+	 * Note: grouping_is_hashable() is much more expensive to check than the
+	 * other gating conditions, so we want to do it last.
+	 */
+	extra->can_hash = (parse->groupClause != NIL &&
+					   agg_costs->numOrderedAggs == 0 &&
+					   (gd ? gd->any_hashable :
+							 grouping_is_hashable(parse->groupClause)));
+
+	/*
+	 * Set partial aggregation costs if we are going to calculate partial
+	 * aggregates in create_grouping_paths().
+	 */
+	extra->partial_costs_set = false;
+
+	/* Is partial aggregation possible? */
+	extra->can_partial_agg = (agg_costs->hasNonPartial ||
+							  agg_costs->hasNonSerial);
+}
+
+/*
  * create_grouping_paths
  *
  * Build a new upperrel containing Paths for grouping and/or aggregation.
@@ -3624,17 +3718,18 @@ create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
 					  const AggClauseCosts *agg_costs,
-					  grouping_sets_data *gd)
+					  grouping_sets_data *gd,
+					  GroupPathExtraData *extra)
 {
 	Query	   *parse = root->parse;
 	Path	   *cheapest_path = input_rel->cheapest_total_path;
 	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
-	AggClauseCosts agg_partial_costs;	/* parallel only */
-	AggClauseCosts agg_final_costs; /* parallel only */
+	AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
+	AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
 	double		dNumGroups;
-	bool		can_hash;
-	bool		can_sort;
+	bool		can_hash = extra->can_hash;
+	bool		can_sort = extra->can_sort;
 	bool		try_parallel_aggregation;
 
 	/*
@@ -3750,48 +3845,11 @@ create_grouping_paths(PlannerInfo *root,
 									  gd);
 
 	/*
-	 * Determine whether it's possible to perform sort-based implementations
-	 * of grouping.  (Note that if groupClause is empty,
-	 * grouping_is_sortable() is trivially true, and all the
-	 * pathkeys_contained_in() tests will succeed too, so that we'll consider
-	 * every surviving input path.)
-	 *
-	 * If we have grouping sets, we might be able to sort some but not all of
-	 * them; in this case, we need can_sort to be true as long as we must
-	 * consider any sorted-input plan.
-	 */
-	can_sort = (gd && gd->rollups != NIL)
-		|| grouping_is_sortable(parse->groupClause);
-
-	/*
-	 * Determine whether we should consider hash-based implementations of
-	 * grouping.
-	 *
-	 * Hashed aggregation only applies if we're grouping. If we have grouping
-	 * sets, some groups might be hashable but others not; in this case we set
-	 * can_hash true as long as there is nothing globally preventing us from
-	 * hashing (and we should therefore consider plans with hashes).
-	 *
-	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
-	 * aggregates.  (Doing so would imply storing *all* the input values in
-	 * the hash table, and/or running many sorts in parallel, either of which
-	 * seems like a certain loser.)  We similarly don't support ordered-set
-	 * aggregates in hashed aggregation, but that case is also included in the
-	 * numOrderedAggs count.
-	 *
-	 * Note: grouping_is_hashable() is much more expensive to check than the
-	 * other gating conditions, so we want to do it last.
-	 */
-	can_hash = (parse->groupClause != NIL &&
-				agg_costs->numOrderedAggs == 0 &&
-				(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
-
-	/*
 	 * Figure out whether a PartialAggregate/Finalize Aggregate execution
 	 * strategy is viable.
 	 */
 	try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel,
-												agg_costs);
+												extra);
 
 	/*
 	 * Before generating paths for grouped_rel, we first generate any possible
@@ -3812,38 +3870,45 @@ create_grouping_paths(PlannerInfo *root,
 		partial_grouping_target = make_partial_grouping_target(root, target);
 		partially_grouped_rel->reltarget = partial_grouping_target;
 
-		/*
-		 * Collect statistics about aggregates for estimating costs of
-		 * performing aggregation in parallel.
-		 */
-		MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
-		MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
-		if (parse->hasAggs)
+		/* Set partial aggregation costs, if not already computed. */
+		if (!extra->partial_costs_set)
 		{
-			/* partial phase */
-			get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
-								 AGGSPLIT_INITIAL_SERIAL,
-								 &agg_partial_costs);
-
-			/* final phase */
-			get_agg_clause_costs(root, (Node *) target->exprs,
-								 AGGSPLIT_FINAL_DESERIAL,
-								 &agg_final_costs);
-			get_agg_clause_costs(root, parse->havingQual,
-								 AGGSPLIT_FINAL_DESERIAL,
-								 &agg_final_costs);
+			/*
+			 * Collect statistics about aggregates for estimating costs of
+			 * performing aggregation in parallel.
+			 */
+			MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts));
+			MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
+			if (parse->hasAggs)
+			{
+				/* partial phase */
+				get_agg_clause_costs(root,
+									 (Node *) partial_grouping_target->exprs,
+									 AGGSPLIT_INITIAL_SERIAL,
+									 agg_partial_costs);
+
+				/* final phase */
+				get_agg_clause_costs(root, (Node *) target->exprs,
+									 AGGSPLIT_FINAL_DESERIAL,
+									 agg_final_costs);
+				get_agg_clause_costs(root, parse->havingQual,
+									 AGGSPLIT_FINAL_DESERIAL,
+									 agg_final_costs);
+			}
+
+			extra->partial_costs_set = true;
 		}
 
 		add_paths_to_partial_grouping_rel(root, input_rel,
 										  partially_grouped_rel,
-										  &agg_partial_costs,
+										  agg_partial_costs,
 										  gd, can_sort, can_hash);
 	}
 
 	/* Build final grouping paths */
 	add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
 							  partially_grouped_rel, agg_costs,
-							  &agg_final_costs, gd, can_sort, can_hash,
+							  agg_final_costs, gd, can_sort, can_hash,
 							  dNumGroups, (List *) parse->havingQual);
 
 	/* Give a helpful error if we failed to find any implementation */
@@ -6380,7 +6445,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
  */
 static bool
 can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
-				 RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs)
+				 RelOptInfo *grouped_rel, GroupPathExtraData *extra)
 {
 	Query	   *parse = root->parse;
 
@@ -6407,7 +6472,7 @@ can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
 		/* We don't know how to do grouping sets in parallel. */
 		return false;
 	}
-	else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
+	else if (extra->can_partial_agg)
 	{
 		/* Insufficient support for partial mode. */
 		return false;

Reply via email to