From 1d14a85d90857b5aaca8751bd92eadd0c8e1f2a9 Mon Sep 17 00:00:00 2001
From: Pengzhou Tang <ptang@pivotal.io>
Date: Tue, 24 Sep 2019 04:22:42 -0400
Subject: [PATCH] Support for parallel grouping sets

We used to support grouping sets in one worker only, this PR
want to support parallel grouping sets in multiple workers.

In the first stage, the partial aggregates are performed by
multiple workers, each worker perform the aggregates on all
grouping sets, meanwile, a grouping set id is attached to
the tuples of first stage to identify which grouping set the
tuple belongs to. In the final stage, the gathered tuples are
dispatched to specified grouping set according to the
additional set id and then perform combine aggregates per
grouping set. We don't use GROUPING() func to identify the
grouping set because a sets may contain duplicate grouping
set.

Some changes are also made by executor in final stage:

For AGG_HASHED strategy, all grouping sets still perform
combine aggregates in phase 0, the only difference is that
only one group is selected in final stage, so we need to
skip those un-selected groups.

For AGG_MIXED strategy, phase 0 now also need to do its
own aggregate now.

For AGG_SORTED strategy, rollup will be expanded, eg:
rollup(<c1, c2>, <c1>, <>) is expanded to three rollups:
rollup(<c1, c2>), rollup(<c1>) and rollup(<>). so tuples
can be dispatched to those three phases and do aggregate
then.
---
 src/backend/commands/explain.c          |  10 +-
 src/backend/executor/execExpr.c         |  42 +++-
 src/backend/executor/execExprInterp.c   |  34 +++
 src/backend/executor/nodeAgg.c          | 319 ++++++++++++++++++++++++---
 src/backend/nodes/copyfuncs.c           |  55 ++++-
 src/backend/nodes/equalfuncs.c          |   3 +
 src/backend/nodes/nodeFuncs.c           |   8 +
 src/backend/nodes/outfuncs.c            |  13 +-
 src/backend/nodes/readfuncs.c           |  52 ++++-
 src/backend/optimizer/path/allpaths.c   |   3 +
 src/backend/optimizer/plan/createplan.c |  16 +-
 src/backend/optimizer/plan/planner.c    | 376 +++++++++++++++++++++++---------
 src/backend/optimizer/plan/setrefs.c    |  16 ++
 src/backend/optimizer/util/pathnode.c   |   4 +-
 src/backend/utils/adt/ruleutils.c       |   6 +
 src/include/executor/execExpr.h         |  19 ++
 src/include/executor/nodeAgg.h          |   9 +-
 src/include/nodes/execnodes.h           |  14 ++
 src/include/nodes/nodes.h               |   1 +
 src/include/nodes/pathnodes.h           |   2 +
 src/include/nodes/plannodes.h           |   4 +-
 src/include/nodes/primnodes.h           |   6 +
 src/include/optimizer/pathnode.h        |   3 +-
 src/include/optimizer/planmain.h        |   2 +-
 24 files changed, 857 insertions(+), 160 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 62fb343..f1a2e21 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2206,12 +2206,16 @@ show_agg_keys(AggState *astate, List *ancestors,
 {
 	Agg		   *plan = (Agg *) astate->ss.ps.plan;
 
-	if (plan->numCols > 0 || plan->groupingSets)
+	if (plan->grpSetIdFilter)
+		show_expression(plan->grpSetIdFilter, "Dispatched by",
+						astate, ancestors, true, es);
+
+	if (plan->numCols > 0 || plan->rollup)
 	{
 		/* The key columns refer to the tlist of the child plan */
 		ancestors = lcons(astate, ancestors);
 
-		if (plan->groupingSets)
+		if (plan->rollup)
 			show_grouping_sets(outerPlanState(astate), plan, ancestors, es);
 		else
 			show_sort_group_keys(outerPlanState(astate), "Group Key",
@@ -2263,7 +2267,7 @@ show_grouping_set_keys(PlanState *planstate,
 	Plan	   *plan = planstate->plan;
 	char	   *exprstr;
 	ListCell   *lc;
-	List	   *gsets = aggnode->groupingSets;
+	List	   *gsets = aggnode->rollup->gsets;
 	AttrNumber *keycols = aggnode->grpColIdx;
 	const char *keyname;
 	const char *keysetname;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 6d09f2a..27c8cd9 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -813,7 +813,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
 
 				agg = (Agg *) (state->parent->plan);
 
-				if (agg->groupingSets)
+				if (agg->rollup)
 					scratch.d.grouping_func.clauses = grp_node->cols;
 				else
 					scratch.d.grouping_func.clauses = NIL;
@@ -822,6 +822,15 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				break;
 			}
 
+		case T_GroupingSetId:
+			{
+				scratch.opcode = EEOP_GROUPING_SET_ID;
+				scratch.d.grouping_set_id.parent = (AggState *) state->parent;
+
+				ExprEvalPushStep(state, &scratch);
+				break;
+			}
+
 		case T_WindowFunc:
 			{
 				WindowFunc *wfunc = (WindowFunc *) node;
@@ -3214,6 +3223,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 {
 	int			adjust_init_jumpnull = -1;
 	int			adjust_strict_jumpnull = -1;
+	int			adjust_perhash_jumpnull = -1;
 	ExprContext *aggcontext;
 
 	if (ishash)
@@ -3246,6 +3256,30 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 		adjust_init_jumpnull = state->steps_len - 1;
 	}
 
+	/*
+	 * All grouping sets that use AGG_HASHED are sent to
+	 * phases zero, when combining the partial aggregate
+	 * results, only one group is select for one tuple,
+	 * so we need to add one more check step to skip not
+	 * selected groups.
+	 */
+	if (ishash && aggstate->grpsetid_filter &&
+		DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+	{
+		scratch->opcode = EEOP_AGG_PERHASH_NULL_CHECK;
+		scratch->d.agg_perhash_null_check.aggstate = aggstate;
+		scratch->d.agg_perhash_null_check.setno = setno;
+		scratch->d.agg_perhash_null_check.setoff = setoff;
+		scratch->d.agg_perhash_null_check.transno = transno;
+		scratch->d.agg_perhash_null_check.jumpnull = -1;	/* adjust later */
+		ExprEvalPushStep(state, scratch);
+
+		/*
+		 * Note, we don't push into adjust_bailout here - those jump to the
+		 */
+		adjust_perhash_jumpnull = state->steps_len - 1;
+	}
+
 	if (pertrans->numSortCols == 0 &&
 		fcinfo->flinfo->fn_strict)
 	{
@@ -3291,6 +3325,12 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 		Assert(as->d.agg_init_trans.jumpnull == -1);
 		as->d.agg_init_trans.jumpnull = state->steps_len;
 	}
+	if (adjust_perhash_jumpnull != -1)
+	{
+		ExprEvalStep *as = &state->steps[adjust_perhash_jumpnull];
+		Assert(as->d.agg_perhash_null_check.jumpnull == -1);
+		as->d.agg_perhash_null_check.jumpnull = state->steps_len;
+	}
 	if (adjust_strict_jumpnull != -1)
 	{
 		ExprEvalStep *as = &state->steps[adjust_strict_jumpnull];
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 66a67c7..0895ad7 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -382,6 +382,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_XMLEXPR,
 		&&CASE_EEOP_AGGREF,
 		&&CASE_EEOP_GROUPING_FUNC,
+		&&CASE_EEOP_GROUPING_SET_ID,
 		&&CASE_EEOP_WINDOW_FUNC,
 		&&CASE_EEOP_SUBPLAN,
 		&&CASE_EEOP_ALTERNATIVE_SUBPLAN,
@@ -390,6 +391,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_AGG_STRICT_INPUT_CHECK_ARGS,
 		&&CASE_EEOP_AGG_STRICT_INPUT_CHECK_NULLS,
 		&&CASE_EEOP_AGG_INIT_TRANS,
+		&&CASE_EEOP_AGG_PERHASH_NULL_CHECK,
 		&&CASE_EEOP_AGG_STRICT_TRANS_CHECK,
 		&&CASE_EEOP_AGG_PLAIN_TRANS_BYVAL,
 		&&CASE_EEOP_AGG_PLAIN_TRANS,
@@ -1463,6 +1465,21 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_GROUPING_SET_ID)
+		{
+			int			grpsetid;		
+			AggState	*aggstate = (AggState *) op->d.grouping_set_id.parent;
+
+			if (aggstate->current_phase == 0)
+				grpsetid = aggstate->perhash[aggstate->current_set].grpsetid;	
+			else
+				grpsetid = aggstate->phase->grpsetids[aggstate->current_set];
+
+			*op->resvalue = grpsetid;
+			*op->resnull = false;
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_WINDOW_FUNC)
 		{
 			/*
@@ -1586,6 +1603,23 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_AGG_PERHASH_NULL_CHECK)
+		{
+			AggState   *aggstate;
+			AggStatePerGroup pergroup;
+
+			aggstate = op->d.agg_perhash_null_check.aggstate;
+			pergroup = &aggstate->all_pergroups
+				[op->d.agg_perhash_null_check.setoff]
+				[op->d.agg_perhash_null_check.transno];
+
+			/* If transValue has not yet been initialized, do so now. */
+			if (!pergroup)
+				EEO_JUMP(op->d.agg_perhash_null_check.jumpnull);
+
+			EEO_NEXT();
+		}
+
 		/* check that a strict aggregate's input isn't NULL */
 		EEO_CASE(EEOP_AGG_STRICT_TRANS_CHECK)
 		{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index a9a1fd0..ba9b3a3 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -226,6 +226,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
 #include "optimizer/optimizer.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
@@ -275,6 +276,7 @@ static void build_hash_table(AggState *aggstate);
 static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
 static void lookup_hash_entries(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
+static void agg_dispatch_input_tuples(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
@@ -313,9 +315,6 @@ select_current_set(AggState *aggstate, int setno, bool is_hash)
 /*
  * Switch to phase "newphase", which must either be 0 or 1 (to reset) or
  * current_phase + 1. Juggle the tuplesorts accordingly.
- *
- * Phase 0 is for hashing, which we currently handle last in the AGG_MIXED
- * case, so when entering phase 0, all we need to do is drop open sorts.
  */
 static void
 initialize_phase(AggState *aggstate, int newphase)
@@ -332,6 +331,12 @@ initialize_phase(AggState *aggstate, int newphase)
 		aggstate->sort_in = NULL;
 	}
 
+	if (aggstate->store_in)
+	{
+		tuplestore_end(aggstate->store_in);
+		aggstate->store_in = NULL;	
+	}
+
 	if (newphase <= 1)
 	{
 		/*
@@ -345,21 +350,36 @@ initialize_phase(AggState *aggstate, int newphase)
 	}
 	else
 	{
-		/*
-		 * The old output tuplesort becomes the new input one, and this is the
-		 * right time to actually sort it.
+		/* 
+		 * When combining partial grouping sets aggregate results, we use
+		 * the sort_in or store_in which contains the dispatched tuples as
+		 * the input. Otherwise, use the the sort_out of previous phase.
 		 */
-		aggstate->sort_in = aggstate->sort_out;
+		if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+		{
+			aggstate->sort_in = aggstate->phases[newphase].sort_in;
+			aggstate->store_in = aggstate->phases[newphase].store_in;
+		}
+		else
+		{
+			aggstate->sort_in = aggstate->sort_out;
+			aggstate->store_in = NULL;
+		}
+
 		aggstate->sort_out = NULL;
-		Assert(aggstate->sort_in);
-		tuplesort_performsort(aggstate->sort_in);
+		Assert(aggstate->sort_in || aggstate->store_in);
+
+		/* This is the right time to actually sort it. */
+		if (aggstate->sort_in)
+			tuplesort_performsort(aggstate->sort_in);
 	}
 
 	/*
 	 * If this isn't the last phase, we need to sort appropriately for the
 	 * next phase in sequence.
 	 */
-	if (newphase > 0 && newphase < aggstate->numphases - 1)
+	if (aggstate->aggsplit != AGGSPLIT_FINAL_DESERIAL &&
+		newphase > 0 && newphase < aggstate->numphases - 1)
 	{
 		Sort	   *sortnode = aggstate->phases[newphase + 1].sortnode;
 		PlanState  *outerNode = outerPlanState(aggstate);
@@ -401,6 +421,15 @@ fetch_input_tuple(AggState *aggstate)
 			return NULL;
 		slot = aggstate->sort_slot;
 	}
+	else if (aggstate->store_in)
+	{
+		/* make sure we check for interrupts in either path through here */
+		CHECK_FOR_INTERRUPTS();
+		if (!tuplestore_gettupleslot(aggstate->store_in, true, false,
+									 aggstate->sort_slot))
+			return NULL;
+		slot = aggstate->sort_slot;
+	}
 	else
 		slot = ExecProcNode(outerPlanState(aggstate));
 
@@ -1527,6 +1556,22 @@ lookup_hash_entries(AggState *aggstate)
 	AggStatePerGroup *pergroup = aggstate->hash_pergroup;
 	int			setno;
 
+	if (aggstate->grpsetid_filter)
+	{
+		bool dummynull;
+		int grpsetid = ExecEvalExprSwitchContext(aggstate->grpsetid_filter,
+											   aggstate->tmpcontext,
+											   &dummynull);
+		GrpSetMapping *mapping = &aggstate->grpSetMappings[grpsetid];
+
+		if (!mapping)
+			return;
+
+		select_current_set(aggstate, mapping->index, true);
+		pergroup[mapping->index] = lookup_hash_entry(aggstate)->additional;
+		return;
+	}
+
 	for (setno = 0; setno < numHashes; setno++)
 	{
 		select_current_set(aggstate, setno, true);
@@ -1569,6 +1614,9 @@ ExecAgg(PlanState *pstate)
 				break;
 			case AGG_PLAIN:
 			case AGG_SORTED:
+				if (node->grpsetid_filter && !node->input_dispatched)
+					agg_dispatch_input_tuples(node);
+
 				result = agg_retrieve_direct(node);
 				break;
 		}
@@ -1680,10 +1728,20 @@ agg_retrieve_direct(AggState *aggstate)
 			else if (aggstate->aggstrategy == AGG_MIXED)
 			{
 				/*
-				 * Mixed mode; we've output all the grouped stuff and have
-				 * full hashtables, so switch to outputting those.
+				 * Mixed mode; For non-combine case, we've output all the
+				 * grouped stuff and have full hashtables, so switch to
+				 * outputting those. For combine case, phase one does not
+				 * do this, we need to do our own grouping stuff.
 				 */
 				initialize_phase(aggstate, 0);
+
+				if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+				{
+					/* use the store_in which contians the dispatched tuples */
+					aggstate->store_in = aggstate->phase->store_in;
+					agg_fill_hash_table(aggstate);
+				}
+
 				aggstate->table_filled = true;
 				ResetTupleHashIterator(aggstate->perhash[0].hashtable,
 									   &aggstate->perhash[0].hashiter);
@@ -1838,7 +1896,8 @@ agg_retrieve_direct(AggState *aggstate)
 					 * hashtables as well in advance_aggregates.
 					 */
 					if (aggstate->aggstrategy == AGG_MIXED &&
-						aggstate->current_phase == 1)
+						aggstate->current_phase == 1 &&
+						!aggstate->grpsetid_filter)
 					{
 						lookup_hash_entries(aggstate);
 					}
@@ -1921,6 +1980,122 @@ agg_retrieve_direct(AggState *aggstate)
 }
 
 /*
+ * ExecAgg for parallel grouping sets:
+ *
+ * When combining the partial groupingsets aggregate results from workers,
+ * the input is mixed with tuples from different grouping sets. To avoid
+ * unnecessary working, the tuples will be pre-dispatched to according
+ * phases directly.
+ *
+ * This function must be called in phase one which is a AGG_SORTED or
+ * AGG_PLAIN.
+ */
+static void
+agg_dispatch_input_tuples(AggState *aggstate)
+{
+	int	grpsetid;
+	int phase;
+	bool isNull;
+	PlanState *saved_sort;
+	ExprContext *tmpcontext = aggstate->tmpcontext;
+	GrpSetMapping *mapping;
+	TupleTableSlot *outerslot;
+	AggStatePerPhase perphase;
+
+	/* prepare tuplestore or tuplesort for each phase */
+	for (phase = 0; phase < aggstate->numphases; phase++)
+	{
+		perphase = &aggstate->phases[phase];
+
+		if (!perphase->aggnode)
+			continue;
+
+		if (perphase->aggstrategy == AGG_SORTED)
+		{
+			PlanState *outerNode = outerPlanState(aggstate);
+			TupleDesc tupDesc = ExecGetResultType(outerNode);
+			Sort *sortnode = (Sort *) outerNode->plan;
+
+			Assert(perphase->aggstrategy == AGG_SORTED);
+
+			perphase->sort_in = tuplesort_begin_heap(tupDesc,
+													 sortnode->numCols,
+													 sortnode->sortColIdx,
+													 sortnode->sortOperators,
+													 sortnode->collations,
+													 sortnode->nullsFirst,
+													 work_mem,
+													 NULL, false);
+		}
+		else
+			perphase->store_in = tuplestore_begin_heap(false, false, work_mem);
+	}
+
+	/* 
+	 * If phase one is AGG_SORTED, we cannot perform the sort node beneath it
+	 * directly because it comes from different grouping sets, we need to
+	 * dispatch the tuples first and then do the sort.
+	 *
+	 * To do this, we replace the outerPlan of current AGG node with the child
+	 * node of sort node.
+	 *
+	 * This is unnecessary to AGG_PLAIN.
+	 */
+	if (aggstate->phase->aggstrategy == AGG_SORTED)
+	{
+		saved_sort = outerPlanState(aggstate);
+		outerPlanState(aggstate) = outerPlanState(outerPlanState(aggstate));
+	}
+
+	for (;;)
+	{
+		outerslot = fetch_input_tuple(aggstate);
+		if (TupIsNull(outerslot))
+			break;
+
+		/* set up for advance_aggregates */
+		tmpcontext->ecxt_outertuple = outerslot;
+		grpsetid = ExecEvalExprSwitchContext(aggstate->grpsetid_filter,
+											 tmpcontext,
+											 &isNull);
+
+		/* put the slot to according phase with grouping set id */
+		mapping = &aggstate->grpSetMappings[grpsetid];
+		if (!mapping->is_hashed)
+		{
+			perphase = &aggstate->phases[mapping->index];
+
+			if (perphase->aggstrategy == AGG_SORTED)
+				tuplesort_puttupleslot(perphase->sort_in, outerslot);
+			else
+				tuplestore_puttupleslot(perphase->store_in, outerslot);
+		}
+		else
+			tuplestore_puttupleslot(aggstate->phases[0].store_in, outerslot);
+
+		ResetExprContext(aggstate->tmpcontext);
+	}
+
+	/* Restore the outer plan and perform the sorting here. */
+	if (aggstate->phase->aggstrategy == AGG_SORTED)
+	{
+		outerPlanState(aggstate) = saved_sort;
+		tuplesort_performsort(aggstate->phase->sort_in);
+	}
+
+	/*
+	 * Reinitialize the phase one to use the store_in
+	 * or sort_in which contains the dispatched tuples.
+	 */
+	aggstate->sort_in = aggstate->phase->sort_in; 
+	aggstate->store_in = aggstate->phase->store_in; 
+	select_current_set(aggstate, 0, false);
+
+	/* mark the input dispatched */
+	aggstate->input_dispatched = true;
+}
+
+/*
  * ExecAgg for hashed case: read input and build hash table
  */
 static void
@@ -2146,6 +2321,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->grp_firstTuple = NULL;
 	aggstate->sort_in = NULL;
 	aggstate->sort_out = NULL;
+	aggstate->input_dispatched = false;
 
 	/*
 	 * phases[0] always exists, but is dummy in sorted/plain mode
@@ -2158,16 +2334,16 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 * determines the size of some allocations.  Also calculate the number of
 	 * phases, since all hashed/mixed nodes contribute to only a single phase.
 	 */
-	if (node->groupingSets)
+	if (node->rollup)
 	{
-		numGroupingSets = list_length(node->groupingSets);
+		numGroupingSets = list_length(node->rollup->gsets);
 
 		foreach(l, node->chain)
 		{
 			Agg		   *agg = lfirst(l);
 
 			numGroupingSets = Max(numGroupingSets,
-								  list_length(agg->groupingSets));
+								  list_length(agg->rollup->gsets));
 
 			/*
 			 * additional AGG_HASHED aggs become part of phase 0, but all
@@ -2186,6 +2362,15 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->aggcontexts = (ExprContext **)
 		palloc0(sizeof(ExprContext *) * numGroupingSets);
 
+	/* 
+	 * When combining the partial groupingsets aggregate results, we
+	 * need a grpsetid mapping to find according perhash or perphase
+	 * data.
+	 */
+	if (DO_AGGSPLIT_COMBINE(node->aggsplit) && node->rollup)
+		aggstate->grpSetMappings = (GrpSetMapping *)
+			palloc0(sizeof(GrpSetMapping) * (numPhases + numHashes));
+
 	/*
 	 * Create expression contexts.  We need three or more, one for
 	 * per-input-tuple processing, one for per-output-tuple processing, one
@@ -2243,8 +2428,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	/*
 	 * If there are more than two phases (including a potential dummy phase
 	 * 0), input will be resorted using tuplesort. Need a slot for that.
+	 *
+	 * Or we are combining the partial groupingsets aggregate results, input
+	 * belong to AGG_HASHED rollup will use a tuplestore. Need a slot for that.
 	 */
-	if (numPhases > 2)
+	if (numPhases > 2 ||
+		(DO_AGGSPLIT_COMBINE(node->aggsplit) &&
+		 node->aggstrategy == AGG_MIXED))
 	{
 		aggstate->sort_slot = ExecInitExtraTupleSlot(estate, scanDesc,
 													 &TTSOpsMinimalTuple);
@@ -2291,6 +2481,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		ExecInitQual(node->plan.qual, (PlanState *) aggstate);
 
 	/*
+	 * Initialize grouping set id expression to identify which
+	 * grouping set the input tuple belongs to when combining
+	 * partial groupingsets aggregate result.
+	 */
+	aggstate->grpsetid_filter = ExecInitExpr((Expr *) node->grpSetIdFilter,
+											 (PlanState *)aggstate);
+
+	/*
 	 * We should now have found all Aggrefs in the targetlist and quals.
 	 */
 	numaggs = aggstate->numaggs;
@@ -2348,6 +2546,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			/* but the actual Agg node representing this hash is saved here */
 			perhash->aggnode = aggnode;
 
+			if (aggnode->rollup)
+			{
+				GroupingSetData *gs =
+					linitial_node(GroupingSetData, aggnode->rollup->gsets_data);
+
+				perhash->grpsetid = gs->grpsetId;
+
+				/* add a mapping when combining */
+				if (DO_AGGSPLIT_COMBINE(aggnode->aggsplit))
+				{
+					aggstate->grpSetMappings[perhash->grpsetid].is_hashed = true;
+					aggstate->grpSetMappings[perhash->grpsetid].index = i;
+				}
+			}
+
 			phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
 
 			for (j = 0; j < aggnode->numCols; ++j)
@@ -2363,18 +2576,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			AggStatePerPhase phasedata = &aggstate->phases[++phase];
 			int			num_sets;
 
-			phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
+			phasedata->numsets = num_sets = aggnode->rollup ?
+										list_length(aggnode->rollup->gsets) : 0;
 
 			if (num_sets)
 			{
 				phasedata->gset_lengths = palloc(num_sets * sizeof(int));
 				phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
+				phasedata->grpsetids = palloc(num_sets * sizeof(int));
 
 				i = 0;
-				foreach(l, aggnode->groupingSets)
+				foreach(l, aggnode->rollup->gsets_data)
 				{
-					int			current_length = list_length(lfirst(l));
 					Bitmapset  *cols = NULL;
+					GroupingSetData *gs = lfirst_node(GroupingSetData, l);
+					int	current_length = list_length(gs->set);
 
 					/* planner forces this to be correct */
 					for (j = 0; j < current_length; ++j)
@@ -2382,12 +2598,19 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 					phasedata->grouped_cols[i] = cols;
 					phasedata->gset_lengths[i] = current_length;
-
+					phasedata->grpsetids[i] = gs->grpsetId;
 					++i;
 				}
 
 				all_grouped_cols = bms_add_members(all_grouped_cols,
 												   phasedata->grouped_cols[0]);
+
+				/* add a mapping when combining */
+				if (DO_AGGSPLIT_COMBINE(node->aggsplit))
+				{
+					aggstate->grpSetMappings[phasedata->grpsetids[0]].is_hashed = false;
+					aggstate->grpSetMappings[phasedata->grpsetids[0]].index = phase;
+				}
 			}
 			else
 			{
@@ -2871,23 +3094,50 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		if (!phase->aggnode)
 			continue;
 
-		if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 1)
+		if (aggstate->aggstrategy == AGG_MIXED &&
+			phaseidx == 1)
 		{
-			/*
-			 * Phase one, and only phase one, in a mixed agg performs both
-			 * sorting and aggregation.
-			 */
-			dohash = true;
-			dosort = true;
+			if (!DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+			{
+				/*
+				 * Phase one, and only phase one, in a mixed agg performs both
+				 * sorting and aggregation.
+				 */
+				dohash = true;
+				dosort = true;
+			}
+			else
+			{
+				/*
+				 * When combining partial groupingsets aggregate results, input
+				 * is dispatched according to the grouping set id, we cannot
+				 * perform both sorting and hashing aggregation in one phase,
+				 * just perform the sorting aggregation.
+				 */	
+				dohash = false;
+				dosort = true;
+			}
 		}
 		else if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 0)
 		{
-			/*
-			 * No need to compute a transition function for an AGG_MIXED phase
-			 * 0 - the contents of the hashtables will have been computed
-			 * during phase 1.
-			 */
-			continue;
+			if (!DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+			{
+				/*
+				 * No need to compute a transition function for an AGG_MIXED phase
+				 * 0 - the contents of the hashtables will have been computed
+				 * during phase 1.
+				 */
+				continue;
+			}
+			else
+			{
+				/*
+				 * When combining partial groupingsets aggregate results, phase
+				 * 0 need to do its own hashing aggregate.
+				 */
+				dohash = true;
+				dosort = false;
+			}
 		}
 		else if (phase->aggstrategy == AGG_PLAIN ||
 				 phase->aggstrategy == AGG_SORTED)
@@ -3440,6 +3690,7 @@ ExecReScanAgg(AggState *node)
 	int			setno;
 
 	node->agg_done = false;
+	node->input_dispatched = false;
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index a2617c7..d3ec4b5 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -986,7 +986,7 @@ _copyAgg(const Agg *from)
 	}
 	COPY_SCALAR_FIELD(numGroups);
 	COPY_BITMAPSET_FIELD(aggParams);
-	COPY_NODE_FIELD(groupingSets);
+	COPY_NODE_FIELD(rollup);
 	COPY_NODE_FIELD(chain);
 
 	return newnode;
@@ -1474,6 +1474,50 @@ _copyGroupingFunc(const GroupingFunc *from)
 }
 
 /*
+ * _copyGroupingSetId
+ */
+static GroupingSetId *
+_copyGroupingSetId(const GroupingSetId *from)
+{
+	GroupingSetId *newnode = makeNode(GroupingSetId);
+
+	return newnode;
+}
+
+/*
+ * _copyRollupData
+ */
+static RollupData*
+_copyRollupData(const RollupData *from)
+{
+	RollupData *newnode = makeNode(RollupData);
+
+	COPY_NODE_FIELD(groupClause);
+	COPY_NODE_FIELD(gsets);
+	COPY_NODE_FIELD(gsets_data);
+	COPY_SCALAR_FIELD(numGroups);
+	COPY_SCALAR_FIELD(hashable);
+	COPY_SCALAR_FIELD(is_hashed);
+
+	return newnode;
+}
+
+/*
+ * _copyGroupingSetData
+ */
+static GroupingSetData *
+_copyGroupingSetData(const GroupingSetData *from)
+{
+	GroupingSetData *newnode = makeNode(GroupingSetData);
+
+	COPY_NODE_FIELD(set);
+	COPY_SCALAR_FIELD(grpsetId);
+	COPY_SCALAR_FIELD(numGroups);
+
+	return newnode;
+}
+
+/*
  * _copyWindowFunc
  */
 static WindowFunc *
@@ -4938,6 +4982,9 @@ copyObjectImpl(const void *from)
 		case T_GroupingFunc:
 			retval = _copyGroupingFunc(from);
 			break;
+		case T_GroupingSetId:
+			retval = _copyGroupingSetId(from);
+			break;
 		case T_WindowFunc:
 			retval = _copyWindowFunc(from);
 			break;
@@ -5568,6 +5615,12 @@ copyObjectImpl(const void *from)
 		case T_SortGroupClause:
 			retval = _copySortGroupClause(from);
 			break;
+		case T_RollupData:
+			retval = _copyRollupData(from);
+			break;
+		case T_GroupingSetData:
+			retval = _copyGroupingSetData(from);
+			break;
 		case T_GroupingSet:
 			retval = _copyGroupingSet(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 4f2ebe5..dec6d4f 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -3049,6 +3049,9 @@ equal(const void *a, const void *b)
 		case T_GroupingFunc:
 			retval = _equalGroupingFunc(a, b);
 			break;
+		case T_GroupingSetId:
+			retval = true;
+			break;
 		case T_WindowFunc:
 			retval = _equalWindowFunc(a, b);
 			break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 18bd5ac..8dc702f 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -63,6 +63,9 @@ exprType(const Node *expr)
 		case T_GroupingFunc:
 			type = INT4OID;
 			break;
+		case T_GroupingSetId:
+			type = INT4OID;
+			break;
 		case T_WindowFunc:
 			type = ((const WindowFunc *) expr)->wintype;
 			break;
@@ -741,6 +744,9 @@ exprCollation(const Node *expr)
 		case T_GroupingFunc:
 			coll = InvalidOid;
 			break;
+		case T_GroupingSetId:
+			coll = InvalidOid;
+			break;
 		case T_WindowFunc:
 			coll = ((const WindowFunc *) expr)->wincollid;
 			break;
@@ -1870,6 +1876,7 @@ expression_tree_walker(Node *node,
 		case T_NextValueExpr:
 		case T_RangeTblRef:
 		case T_SortGroupClause:
+		case T_GroupingSetId:
 			/* primitive node types with no expression subnodes */
 			break;
 		case T_WithCheckOption:
@@ -2506,6 +2513,7 @@ expression_tree_mutator(Node *node,
 		case T_NextValueExpr:
 		case T_RangeTblRef:
 		case T_SortGroupClause:
+		case T_GroupingSetId:
 			return (Node *) copyObject(node);
 		case T_WithCheckOption:
 			{
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e6ce8e2..b3ff513 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -781,7 +781,7 @@ _outAgg(StringInfo str, const Agg *node)
 	WRITE_OID_ARRAY(grpCollations, node->numCols);
 	WRITE_LONG_FIELD(numGroups);
 	WRITE_BITMAPSET_FIELD(aggParams);
-	WRITE_NODE_FIELD(groupingSets);
+	WRITE_NODE_FIELD(rollup);
 	WRITE_NODE_FIELD(chain);
 }
 
@@ -1146,6 +1146,13 @@ _outGroupingFunc(StringInfo str, const GroupingFunc *node)
 }
 
 static void
+_outGroupingSetId(StringInfo str,
+				  const GroupingSetId *node __attribute__((unused)))
+{
+	WRITE_NODE_TYPE("GROUPINGSETID");
+}
+
+static void
 _outWindowFunc(StringInfo str, const WindowFunc *node)
 {
 	WRITE_NODE_TYPE("WINDOWFUNC");
@@ -1996,6 +2003,7 @@ _outGroupingSetData(StringInfo str, const GroupingSetData *node)
 	WRITE_NODE_TYPE("GSDATA");
 
 	WRITE_NODE_FIELD(set);
+	WRITE_INT_FIELD(grpsetId);
 	WRITE_FLOAT_FIELD(numGroups, "%.0f");
 }
 
@@ -3824,6 +3832,9 @@ outNode(StringInfo str, const void *obj)
 			case T_GroupingFunc:
 				_outGroupingFunc(str, obj);
 				break;
+			case T_GroupingSetId:
+				_outGroupingSetId(str, obj);
+				break;
 			case T_WindowFunc:
 				_outWindowFunc(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 764e3bb..4f76957 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -637,6 +637,50 @@ _readGroupingFunc(void)
 }
 
 /*
+ * _readGroupingSetId
+ */
+static GroupingSetId *
+_readGroupingSetId(void)
+{
+	READ_LOCALS_NO_FIELDS(GroupingSetId);
+
+	READ_DONE();
+}
+
+/*
+ * _readRollupData
+ */
+static RollupData *
+_readRollupData(void)
+{
+	READ_LOCALS(RollupData);
+
+	READ_NODE_FIELD(groupClause);
+	READ_NODE_FIELD(gsets);
+	READ_NODE_FIELD(gsets_data);
+	READ_FLOAT_FIELD(numGroups);
+	READ_BOOL_FIELD(hashable);
+	READ_BOOL_FIELD(is_hashed);
+
+	READ_DONE();
+}
+
+/*
+ * _readGroupingSetData
+ */
+static GroupingSetData *
+_readGroupingSetData(void)
+{
+	READ_LOCALS(GroupingSetData);
+
+	READ_NODE_FIELD(set);
+	READ_INT_FIELD(grpsetId);
+	READ_FLOAT_FIELD(numGroups);
+
+	READ_DONE();
+}
+
+/*
  * _readWindowFunc
  */
 static WindowFunc *
@@ -2171,7 +2215,7 @@ _readAgg(void)
 	READ_OID_ARRAY(grpCollations, local_node->numCols);
 	READ_LONG_FIELD(numGroups);
 	READ_BITMAPSET_FIELD(aggParams);
-	READ_NODE_FIELD(groupingSets);
+	READ_NODE_FIELD(rollup);
 	READ_NODE_FIELD(chain);
 
 	READ_DONE();
@@ -2607,6 +2651,12 @@ parseNodeString(void)
 		return_value = _readAggref();
 	else if (MATCH("GROUPINGFUNC", 12))
 		return_value = _readGroupingFunc();
+	else if (MATCH("GROUPINGSETID", 13))
+		return_value = _readGroupingSetId();
+	else if (MATCH("ROLLUP", 6))
+		return_value = _readRollupData();
+	else if (MATCH("GSDATA", 6))
+		return_value = _readGroupingSetData();
 	else if (MATCH("WINDOWFUNC", 10))
 		return_value = _readWindowFunc();
 	else if (MATCH("SUBSCRIPTINGREF", 15))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index db3a68a..a357f37 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2708,6 +2708,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
 						   NULL, rowsp);
 	add_path(rel, simple_gather_path);
 
+	if (root->parse->groupingSets)
+		return;
+
 	/*
 	 * For each useful ordering, we can consider an order-preserving Gather
 	 * Merge.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0c03620..6fb1a98 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1639,7 +1639,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
 								 groupColIdx,
 								 groupOperators,
 								 groupCollations,
-								 NIL,
+								 NULL,
 								 NIL,
 								 best_path->path.rows,
 								 subplan);
@@ -2091,7 +2091,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
 					extract_grouping_ops(best_path->groupClause),
 					extract_grouping_collations(best_path->groupClause,
 												subplan->targetlist),
-					NIL,
+					NULL,
 					NIL,
 					best_path->numGroups,
 					subplan);
@@ -2247,12 +2247,12 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 			agg_plan = (Plan *) make_agg(NIL,
 										 NIL,
 										 strat,
-										 AGGSPLIT_SIMPLE,
+										 best_path->aggsplit,
 										 list_length((List *) linitial(rollup->gsets)),
 										 new_grpColIdx,
 										 extract_grouping_ops(rollup->groupClause),
 										 extract_grouping_collations(rollup->groupClause, subplan->targetlist),
-										 rollup->gsets,
+										 rollup,
 										 NIL,
 										 rollup->numGroups,
 										 sort_plan);
@@ -2285,12 +2285,12 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 		plan = make_agg(build_path_tlist(root, &best_path->path),
 						best_path->qual,
 						best_path->aggstrategy,
-						AGGSPLIT_SIMPLE,
+						best_path->aggsplit,
 						numGroupCols,
 						top_grpColIdx,
 						extract_grouping_ops(rollup->groupClause),
 						extract_grouping_collations(rollup->groupClause, subplan->targetlist),
-						rollup->gsets,
+						rollup,
 						chain,
 						rollup->numGroups,
 						subplan);
@@ -6189,7 +6189,7 @@ Agg *
 make_agg(List *tlist, List *qual,
 		 AggStrategy aggstrategy, AggSplit aggsplit,
 		 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-		 List *groupingSets, List *chain,
+		 RollupData *rollup, List *chain,
 		 double dNumGroups, Plan *lefttree)
 {
 	Agg		   *node = makeNode(Agg);
@@ -6207,7 +6207,7 @@ make_agg(List *tlist, List *qual,
 	node->grpCollations = grpCollations;
 	node->numGroups = numGroups;
 	node->aggParams = NULL;		/* SS_finalize_plan() will fill this */
-	node->groupingSets = groupingSets;
+	node->rollup= rollup;
 	node->chain = chain;
 
 	plan->qual = qual;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 17c5f08..f147cac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -107,6 +107,7 @@ typedef struct
 typedef struct
 {
 	List	   *rollups;
+	List	   *final_rollups;
 	List	   *hash_sets_idx;
 	double		dNumHashGroups;
 	bool		any_hashable;
@@ -114,6 +115,7 @@ typedef struct
 	Bitmapset  *unhashable_refs;
 	List	   *unsortable_sets;
 	int		   *tleref_to_colnum_map;
+	int		   numGroupingSets;
 } grouping_sets_data;
 
 /*
@@ -127,6 +129,8 @@ typedef struct
 								 * clauses per Window */
 } WindowClauseSortData;
 
+typedef void (*add_path_callback) (RelOptInfo *parent_rel, Path *new_path);
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -143,7 +147,8 @@ static double preprocess_limit(PlannerInfo *root,
 static void remove_useless_groupby_columns(PlannerInfo *root);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
-static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
+static List *reorder_grouping_sets(grouping_sets_data *gd,
+								   List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
 static double get_number_of_groups(PlannerInfo *root,
 								   double path_rows,
@@ -176,7 +181,10 @@ static void consider_groupingsets_paths(PlannerInfo *root,
 										bool can_hash,
 										grouping_sets_data *gd,
 										const AggClauseCosts *agg_costs,
-										double dNumGroups);
+										double dNumGroups,
+										List *havingQual,
+										AggSplit aggsplit);
+
 static RelOptInfo *create_window_paths(PlannerInfo *root,
 									   RelOptInfo *input_rel,
 									   PathTarget *input_target,
@@ -2437,6 +2445,8 @@ preprocess_grouping_sets(PlannerInfo *root)
 	int			maxref = 0;
 	ListCell   *lc;
 	ListCell   *lc_set;
+	ListCell   *lc_rollup;
+	RollupData *rollup;
 	grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
 
 	parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
@@ -2488,6 +2498,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 				GroupingSetData *gs = makeNode(GroupingSetData);
 
 				gs->set = gset;
+				gs->grpsetId = gd->numGroupingSets++;
 				gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
 
 				/*
@@ -2519,8 +2530,8 @@ preprocess_grouping_sets(PlannerInfo *root)
 	foreach(lc_set, sets)
 	{
 		List	   *current_sets = (List *) lfirst(lc_set);
-		RollupData *rollup = makeNode(RollupData);
 		GroupingSetData *gs;
+		rollup = makeNode(RollupData);
 
 		/*
 		 * Reorder the current list of grouping sets into correct prefix
@@ -2532,7 +2543,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * largest-member-first, and applies the GroupingSetData annotations,
 		 * though the data will be filled in later.
 		 */
-		current_sets = reorder_grouping_sets(current_sets,
+		current_sets = reorder_grouping_sets(gd, current_sets,
 											 (list_length(sets) == 1
 											  ? parse->sortClause
 											  : NIL));
@@ -2584,6 +2595,33 @@ preprocess_grouping_sets(PlannerInfo *root)
 		gd->rollups = lappend(gd->rollups, rollup);
 	}
 
+	/* divide rollups to xxx */
+	foreach(lc_rollup, gd->rollups)
+	{
+		RollupData *initial_rollup = lfirst(lc_rollup);
+
+		foreach(lc, initial_rollup->gsets_data)
+		{
+			GroupingSetData *gs = lfirst(lc);
+			rollup = makeNode(RollupData);
+
+			if (gs->set == NIL)
+				rollup->groupClause = NIL;	
+			else
+				rollup->groupClause = preprocess_groupclause(root, gs->set);
+			rollup->gsets_data = list_make1(gs);
+			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+													 rollup->gsets_data,
+													 gd->tleref_to_colnum_map);
+
+			rollup->numGroups = gs->numGroups;
+			rollup->hashable = initial_rollup->hashable;
+			rollup->is_hashed = initial_rollup->is_hashed;
+
+			gd->final_rollups = lappend(gd->final_rollups, rollup);
+		}
+	}
+
 	if (gd->unsortable_sets)
 	{
 		/*
@@ -3541,7 +3579,7 @@ extract_rollup_sets(List *groupingSets)
  * gets implemented in one pass.)
  */
 static List *
-reorder_grouping_sets(List *groupingsets, List *sortclause)
+reorder_grouping_sets(grouping_sets_data *gd, List *groupingsets, List *sortclause)
 {
 	ListCell   *lc;
 	List	   *previous = NIL;
@@ -3575,6 +3613,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
 		previous = list_concat(previous, new_elems);
 
 		gs->set = list_copy(previous);
+		gs->grpsetId = gd->numGroupingSets++;
 		result = lcons(gs, result);
 	}
 
@@ -3725,6 +3764,30 @@ get_number_of_groups(PlannerInfo *root,
 				dNumGroups += rollup->numGroups;
 			}
 
+			foreach(lc, gd->final_rollups)
+			{
+				RollupData *rollup = lfirst_node(RollupData, lc);
+				ListCell   *lc;
+
+				groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
+													 target_list);
+
+				rollup->numGroups = 0.0;
+
+				forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
+				{
+					List	   *gset = (List *) lfirst(lc);
+					GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
+					double		numGroups = estimate_num_groups(root,
+																groupExprs,
+																path_rows,
+																&gset);
+
+					gs->numGroups = numGroups;
+					rollup->numGroups += numGroups;
+				}
+			}
+
 			if (gd->hash_sets_idx)
 			{
 				ListCell   *lc;
@@ -4190,9 +4253,26 @@ consider_groupingsets_paths(PlannerInfo *root,
 							bool can_hash,
 							grouping_sets_data *gd,
 							const AggClauseCosts *agg_costs,
-							double dNumGroups)
+							double dNumGroups,
+							List *havingQual,
+							AggSplit aggsplit)
 {
-	Query	   *parse = root->parse;
+	/* For partial path, add it to partial_pathlist */
+	add_path_callback add_path_cb =
+		(aggsplit == AGGSPLIT_INITIAL_SERIAL) ? add_partial_path : add_path;
+
+	/* 
+	 * If we are combining the partial groupingsets aggregation, the input is
+	 * mixed with tuples from different grouping sets, executor dispatch the
+	 * tuples to different rollups (phases) according to the grouping set id.
+	 *
+	 * We cannot use the same rollups with initial stage in which each tuple
+	 * is processed by one or more grouping sets in one rollup, because in
+	 * combining stage, each tuple only belong to one single grouping set.
+	 * In this case, we use final_rollups instead in which each rollup has
+	 * only one grouping set.
+	 */
+	List *rollups = DO_AGGSPLIT_COMBINE(aggsplit) ? gd->final_rollups : gd->rollups;
 
 	/*
 	 * If we're not being offered sorted input, then only consider plans that
@@ -4213,7 +4293,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 		List	   *empty_sets_data = NIL;
 		List	   *empty_sets = NIL;
 		ListCell   *lc;
-		ListCell   *l_start = list_head(gd->rollups);
+		ListCell   *l_start = list_head(rollups);
 		AggStrategy strat = AGG_HASHED;
 		double		hashsize;
 		double		exclude_groups = 0.0;
@@ -4245,7 +4325,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 		{
 			unhashed_rollup = lfirst_node(RollupData, l_start);
 			exclude_groups = unhashed_rollup->numGroups;
-			l_start = lnext(gd->rollups, l_start);
+			l_start = lnext(rollups, l_start);
 		}
 
 		hashsize = estimate_hashagg_tablesize(path,
@@ -4253,11 +4333,11 @@ consider_groupingsets_paths(PlannerInfo *root,
 											  dNumGroups - exclude_groups);
 
 		/*
-		 * gd->rollups is empty if we have only unsortable columns to work
+		 * rollups is empty if we have only unsortable columns to work
 		 * with.  Override work_mem in that case; otherwise, we'll rely on the
 		 * sorted-input case to generate usable mixed paths.
 		 */
-		if (hashsize > work_mem * 1024L && gd->rollups)
+		if (hashsize > work_mem * 1024L && rollups)
 			return;				/* nope, won't fit */
 
 		/*
@@ -4266,7 +4346,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 		 */
 		sets_data = list_copy(gd->unsortable_sets);
 
-		for_each_cell(lc, gd->rollups, l_start)
+		for_each_cell(lc, rollups, l_start)
 		{
 			RollupData *rollup = lfirst_node(RollupData, lc);
 
@@ -4334,34 +4414,60 @@ consider_groupingsets_paths(PlannerInfo *root,
 		}
 		else if (empty_sets)
 		{
-			RollupData *rollup = makeNode(RollupData);
+			/*
+			 * If we are doing combining, each empty set is made to a single
+			 * rollup, otherwise, all empty sets are made to one rollup.
+			 */
+			if (DO_AGGSPLIT_COMBINE(aggsplit))
+			{
+				ListCell *lc2;
+				forboth(lc, empty_sets, lc2, empty_sets_data)
+				{
+					GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
+					RollupData *rollup = makeNode(RollupData);
+
+					rollup->groupClause = NIL;
+					rollup->gsets_data = list_make1(gs); 
+					rollup->gsets = list_make1(NIL);
+					rollup->numGroups = 1;
+					rollup->hashable = false;
+					rollup->is_hashed = false;
+					new_rollups = lappend(new_rollups, rollup);
+				}
+			}
+			else
+			{
+				RollupData *rollup = makeNode(RollupData);
+
+				rollup->groupClause = NIL;
+				rollup->gsets_data = empty_sets_data;
+				rollup->gsets = empty_sets;
+				rollup->numGroups = list_length(empty_sets);
+				rollup->hashable = false;
+				rollup->is_hashed = false;
+				new_rollups = lappend(new_rollups, rollup);
+			}
 
-			rollup->groupClause = NIL;
-			rollup->gsets_data = empty_sets_data;
-			rollup->gsets = empty_sets;
-			rollup->numGroups = list_length(empty_sets);
-			rollup->hashable = false;
-			rollup->is_hashed = false;
-			new_rollups = lappend(new_rollups, rollup);
 			strat = AGG_MIXED;
 		}
 
-		add_path(grouped_rel, (Path *)
-				 create_groupingsets_path(root,
-										  grouped_rel,
-										  path,
-										  (List *) parse->havingQual,
-										  strat,
-										  new_rollups,
-										  agg_costs,
-										  dNumGroups));
+		add_path_cb(grouped_rel, (Path *)
+					  create_groupingsets_path(root,
+											   grouped_rel,
+											   path,
+											   havingQual,
+											   strat,
+											   new_rollups,
+											   agg_costs,
+											   dNumGroups,
+											   aggsplit));
 		return;
 	}
 
 	/*
 	 * If we have sorted input but nothing we can do with it, bail.
 	 */
-	if (list_length(gd->rollups) == 0)
+	if (list_length(rollups) == 0)
 		return;
 
 	/*
@@ -4374,7 +4480,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 	 */
 	if (can_hash && gd->any_hashable)
 	{
-		List	   *rollups = NIL;
+		List	   *mixed_rollups = NIL;
 		List	   *hash_sets = list_copy(gd->unsortable_sets);
 		double		availspace = (work_mem * 1024.0);
 		ListCell   *lc;
@@ -4386,10 +4492,10 @@ consider_groupingsets_paths(PlannerInfo *root,
 												 agg_costs,
 												 gd->dNumHashGroups);
 
-		if (availspace > 0 && list_length(gd->rollups) > 1)
+		if (availspace > 0 && list_length(rollups) > 1)
 		{
 			double		scale;
-			int			num_rollups = list_length(gd->rollups);
+			int			num_rollups = list_length(rollups);
 			int			k_capacity;
 			int		   *k_weights = palloc(num_rollups * sizeof(int));
 			Bitmapset  *hash_items = NULL;
@@ -4427,11 +4533,13 @@ consider_groupingsets_paths(PlannerInfo *root,
 			 * below, must use the same condition.
 			 */
 			i = 0;
-			for_each_cell(lc, gd->rollups, list_second_cell(gd->rollups))
+			for_each_cell(lc, rollups, list_second_cell(rollups))
 			{
 				RollupData *rollup = lfirst_node(RollupData, lc);
 
-				if (rollup->hashable)
+				/* Empty set cannot be hashed either */
+				if (rollup->hashable &&
+					list_length(linitial(rollup->gsets)) != 0)
 				{
 					double		sz = estimate_hashagg_tablesize(path,
 																agg_costs,
@@ -4458,30 +4566,31 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			if (!bms_is_empty(hash_items))
 			{
-				rollups = list_make1(linitial(gd->rollups));
+				mixed_rollups = list_make1(linitial(rollups));
 
 				i = 0;
-				for_each_cell(lc, gd->rollups, list_second_cell(gd->rollups))
+				for_each_cell(lc, rollups, list_second_cell(rollups))
 				{
 					RollupData *rollup = lfirst_node(RollupData, lc);
 
-					if (rollup->hashable)
+					if (rollup->hashable &&
+						list_length(linitial(rollup->gsets)) != 0)
 					{
 						if (bms_is_member(i, hash_items))
 							hash_sets = list_concat(hash_sets,
 													rollup->gsets_data);
 						else
-							rollups = lappend(rollups, rollup);
+							mixed_rollups = lappend(mixed_rollups, rollup);
 						++i;
 					}
 					else
-						rollups = lappend(rollups, rollup);
+						mixed_rollups = lappend(mixed_rollups, rollup);
 				}
 			}
 		}
 
-		if (!rollups && hash_sets)
-			rollups = list_copy(gd->rollups);
+		if (!mixed_rollups && hash_sets)
+			mixed_rollups = list_copy(rollups);
 
 		foreach(lc, hash_sets)
 		{
@@ -4498,20 +4607,21 @@ consider_groupingsets_paths(PlannerInfo *root,
 			rollup->numGroups = gs->numGroups;
 			rollup->hashable = true;
 			rollup->is_hashed = true;
-			rollups = lcons(rollup, rollups);
+			mixed_rollups = lcons(rollup, mixed_rollups);
 		}
 
-		if (rollups)
+		if (mixed_rollups)
 		{
-			add_path(grouped_rel, (Path *)
-					 create_groupingsets_path(root,
-											  grouped_rel,
-											  path,
-											  (List *) parse->havingQual,
-											  AGG_MIXED,
-											  rollups,
-											  agg_costs,
-											  dNumGroups));
+			add_path_cb(grouped_rel, (Path *)
+						  create_groupingsets_path(root,
+												   grouped_rel,
+												   path,
+												   havingQual,
+												   AGG_MIXED,
+												   mixed_rollups,
+												   agg_costs,
+												   dNumGroups,
+												   aggsplit));
 		}
 	}
 
@@ -4519,15 +4629,16 @@ consider_groupingsets_paths(PlannerInfo *root,
 	 * Now try the simple sorted case.
 	 */
 	if (!gd->unsortable_sets)
-		add_path(grouped_rel, (Path *)
-				 create_groupingsets_path(root,
-										  grouped_rel,
-										  path,
-										  (List *) parse->havingQual,
-										  AGG_SORTED,
-										  gd->rollups,
-										  agg_costs,
-										  dNumGroups));
+		add_path_cb(grouped_rel, (Path *)
+					  create_groupingsets_path(root,
+											   grouped_rel,
+											   path,
+											   havingQual,
+											   AGG_SORTED,
+											   rollups,
+											   agg_costs,
+											   dNumGroups,
+											   aggsplit));
 }
 
 /*
@@ -5242,6 +5353,13 @@ make_partial_grouping_target(PlannerInfo *root,
 
 	add_new_columns_to_pathtarget(partial_target, non_group_exprs);
 
+	/* Add  */
+	if (parse->groupingSets)
+	{
+		GroupingSetId *expr = makeNode(GroupingSetId);
+		add_new_column_to_pathtarget(partial_target, (Expr *)expr);
+	}
+
 	/*
 	 * Adjust Aggrefs to put them in partial mode.  At this point all Aggrefs
 	 * are at the top level of the target list, so we can just scan the list
@@ -6412,7 +6530,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					consider_groupingsets_paths(root, grouped_rel,
 												path, true, can_hash,
-												gd, agg_costs, dNumGroups);
+												gd, agg_costs, dNumGroups,
+												havingQual,
+												AGGSPLIT_SIMPLE);
 				}
 				else if (parse->hasAggs)
 				{
@@ -6479,7 +6599,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 													 -1.0);
 				}
 
-				if (parse->hasAggs)
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_final_costs, dNumGroups,
+												havingQual,
+												AGGSPLIT_FINAL_DESERIAL);
+				}
+				else if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
@@ -6514,7 +6642,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			 */
 			consider_groupingsets_paths(root, grouped_rel,
 										cheapest_path, false, true,
-										gd, agg_costs, dNumGroups);
+										gd, agg_costs, dNumGroups,
+										havingQual,
+										AGGSPLIT_SIMPLE);
 		}
 		else
 		{
@@ -6557,22 +6687,37 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = partially_grouped_rel->cheapest_total_path;
 
-			hashaggtablesize = estimate_hashagg_tablesize(path,
-														  agg_final_costs,
-														  dNumGroups);
+			if (parse->groupingSets)
+			{
+				/*
+				 * Try for a hash-only groupingsets path over unsorted input.
+				 */
+				consider_groupingsets_paths(root, grouped_rel,
+											path, false, true,
+											gd, agg_final_costs, dNumGroups,
+											havingQual,
+											AGGSPLIT_FINAL_DESERIAL);
+			}
+			else
+			{
 
-			if (hashaggtablesize < work_mem * 1024L)
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 AGG_HASHED,
-										 AGGSPLIT_FINAL_DESERIAL,
-										 parse->groupClause,
-										 havingQual,
-										 agg_final_costs,
-										 dNumGroups));
+				hashaggtablesize = estimate_hashagg_tablesize(path,
+															  agg_final_costs,
+															  dNumGroups);
+
+				if (hashaggtablesize < work_mem * 1024L)
+					add_path(grouped_rel, (Path *)
+							 create_agg_path(root,
+											 grouped_rel,
+											 path,
+											 grouped_rel->reltarget,
+											 AGG_HASHED,
+											 AGGSPLIT_FINAL_DESERIAL,
+											 parse->groupClause,
+											 havingQual,
+											 agg_final_costs,
+											 dNumGroups));
+			}
 		}
 	}
 
@@ -6789,8 +6934,16 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 path,
 													 root->group_pathkeys,
 													 -1.0);
-
-				if (parse->hasAggs)
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, partially_grouped_rel,
+												path, true, can_hash,
+												gd, agg_partial_costs,
+												dNumPartialPartialGroups,
+												NIL,
+												AGGSPLIT_INITIAL_SERIAL);
+				}
+				else if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
 									 create_agg_path(root,
 													 partially_grouped_rel,
@@ -6851,26 +7004,39 @@ create_partial_grouping_paths(PlannerInfo *root,
 	{
 		double		hashaggtablesize;
 
-		hashaggtablesize =
-			estimate_hashagg_tablesize(cheapest_partial_path,
-									   agg_partial_costs,
-									   dNumPartialPartialGroups);
-
-		/* Do the same for partial paths. */
-		if (hashaggtablesize < work_mem * 1024L &&
-			cheapest_partial_path != NULL)
+		if (parse->groupingSets)
 		{
-			add_partial_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 cheapest_partial_path,
-											 partially_grouped_rel->reltarget,
-											 AGG_HASHED,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialPartialGroups));
+			consider_groupingsets_paths(root, partially_grouped_rel,
+										cheapest_partial_path,
+										false, true,
+										gd, agg_partial_costs,
+										dNumPartialPartialGroups,
+										NIL,
+										AGGSPLIT_INITIAL_SERIAL);
+		}
+		else 
+		{
+			hashaggtablesize =
+				estimate_hashagg_tablesize(cheapest_partial_path,
+										   agg_partial_costs,
+										   dNumPartialPartialGroups);
+
+			/* Do the same for partial paths. */
+			if (hashaggtablesize < work_mem * 1024L &&
+				cheapest_partial_path != NULL)
+			{
+				add_partial_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 cheapest_partial_path,
+												 partially_grouped_rel->reltarget,
+												 AGG_HASHED,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 parse->groupClause,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -6913,6 +7079,9 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	/* Try Gather for unordered paths and Gather Merge for ordered ones. */
 	generate_gather_paths(root, rel, true);
 
+	if (root->parse->groupingSets)
+		return;
+
 	/* Try cheapest partial path + explicit Sort + Gather Merge. */
 	cheapest_partial_path = linitial(rel->partial_pathlist);
 	if (!pathkeys_contained_in(root->group_pathkeys,
@@ -6958,11 +7127,6 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs)
 		 */
 		return false;
 	}
-	else if (parse->groupingSets)
-	{
-		/* We don't know how to do grouping sets in parallel. */
-		return false;
-	}
 	else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
 	{
 		/* Insufficient support for partial mode. */
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 566ee96..d8723b9 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -728,6 +728,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 					plan->qual = (List *)
 						convert_combining_aggrefs((Node *) plan->qual,
 												  NULL);
+
+					/*
+					 * If this node is combining partial-groupingsets-aggregation,
+					 * we must add reference to the GroupingSetsId expression in
+					 * the targetlist of child plan node.
+					 */
+					if (agg->rollup)
+					{
+						GroupingSetId	*expr = makeNode(GroupingSetId);
+						indexed_tlist	*subplan_itlist = build_tlist_index(plan->lefttree->targetlist);
+
+						agg->grpSetIdFilter = fix_upper_expr(root, (Node *)expr,
+															 subplan_itlist,
+															 OUTER_VAR,
+															 rtoffset);
+					}
 				}
 
 				set_upper_references(root, plan, rtoffset);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 34acb73..578ad60 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2992,7 +2992,8 @@ create_groupingsets_path(PlannerInfo *root,
 						 AggStrategy aggstrategy,
 						 List *rollups,
 						 const AggClauseCosts *agg_costs,
-						 double numGroups)
+						 double numGroups,
+						 AggSplit aggsplit)
 {
 	GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
 	PathTarget *target = rel->reltarget;
@@ -3010,6 +3011,7 @@ create_groupingsets_path(PlannerInfo *root,
 		subpath->parallel_safe;
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->subpath = subpath;
+	pathnode->aggsplit= aggsplit;
 
 	/*
 	 * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 3e64390..f3e5766 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -7874,6 +7874,12 @@ get_rule_expr(Node *node, deparse_context *context,
 			}
 			break;
 
+		case T_GroupingSetId:
+			{
+				appendStringInfoString(buf, "GROUPINGSETID()");
+			}
+			break;
+
 		case T_WindowFunc:
 			get_windowfunc_expr((WindowFunc *) node, context);
 			break;
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index d21dbead..1361955 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -216,6 +216,7 @@ typedef enum ExprEvalOp
 	EEOP_XMLEXPR,
 	EEOP_AGGREF,
 	EEOP_GROUPING_FUNC,
+	EEOP_GROUPING_SET_ID,
 	EEOP_WINDOW_FUNC,
 	EEOP_SUBPLAN,
 	EEOP_ALTERNATIVE_SUBPLAN,
@@ -226,6 +227,7 @@ typedef enum ExprEvalOp
 	EEOP_AGG_STRICT_INPUT_CHECK_ARGS,
 	EEOP_AGG_STRICT_INPUT_CHECK_NULLS,
 	EEOP_AGG_INIT_TRANS,
+	EEOP_AGG_PERHASH_NULL_CHECK,
 	EEOP_AGG_STRICT_TRANS_CHECK,
 	EEOP_AGG_PLAIN_TRANS_BYVAL,
 	EEOP_AGG_PLAIN_TRANS,
@@ -573,6 +575,12 @@ typedef struct ExprEvalStep
 			List	   *clauses;	/* integer list of column numbers */
 		}			grouping_func;
 
+		/* for EEOP_GROUPING_SET_ID */
+		struct
+		{
+			AggState   *parent; /* parent Agg */
+		}			grouping_set_id;
+
 		/* for EEOP_WINDOW_FUNC */
 		struct
 		{
@@ -634,6 +642,17 @@ typedef struct ExprEvalStep
 			int			jumpnull;
 		}			agg_init_trans;
 
+		/* for EEOP_AGG_PERHASH_NULL_CHECK */
+		struct
+		{
+			AggState   *aggstate;
+			AggStatePerTrans pertrans;
+			int			setno;
+			int			transno;
+			int			setoff;
+			int			jumpnull;
+		}			agg_perhash_null_check;
+
 		/* for EEOP_AGG_STRICT_TRANS_CHECK */
 		struct
 		{
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 1a8ca98..4e5ec06 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -280,6 +280,11 @@ typedef struct AggStatePerPhaseData
 	Sort	   *sortnode;		/* Sort node for input ordering for phase */
 
 	ExprState  *evaltrans;		/* evaluation of transition functions  */
+
+	/* field for parallel grouping sets */
+	int *grpsetids;
+	Tuplesortstate *sort_in;	/* sorted input to phases > 1 */
+	Tuplestorestate *store_in;	/* sorted input to phases > 1 */
 }			AggStatePerPhaseData;
 
 /*
@@ -302,8 +307,10 @@ typedef struct AggStatePerHashData
 	AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
 	AttrNumber *hashGrpColIdxHash;	/* indices in hash table tuples */
 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
-}			AggStatePerHashData;
 
+	/* field for parallel grouping sets */
+	int grpsetid;
+}			AggStatePerHashData;
 
 extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
 extern void ExecEndAgg(AggState *node);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 063b490..0ba408e 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1972,6 +1972,13 @@ typedef struct GroupState
  *	expressions and run the aggregate transition functions.
  * ---------------------
  */
+/* mapping from grouping set id to perphase or perhash data */
+typedef struct GrpSetMapping
+{
+	bool	is_hashed;
+	int		index; 		/* index of aggstate->perhash[] or aggstate->phases[]*/
+} GrpSetMapping;
+
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
 typedef struct AggStatePerTransData *AggStatePerTrans;
@@ -2013,6 +2020,7 @@ typedef struct AggState
 	Tuplesortstate *sort_in;	/* sorted input to phases > 1 */
 	Tuplesortstate *sort_out;	/* input is copied here for next phase */
 	TupleTableSlot *sort_slot;	/* slot for sort results */
+	Tuplestorestate *store_in;	/* sorted input to phases > 1 */
 	/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
 	AggStatePerGroup *pergroups;	/* grouping set indexed array of per-group
 									 * pointers */
@@ -2029,6 +2037,12 @@ typedef struct AggState
 	AggStatePerGroup *all_pergroups;	/* array of first ->pergroups, than
 										 * ->hash_pergroup */
 	ProjectionInfo *combinedproj;	/* projection machinery */
+
+	/* support for parallel grouping sets */
+	bool input_dispatched;
+	ExprState *grpsetid_filter;				/* filter to fetch grouping set id
+											   from child targetlist */
+	struct GrpSetMapping *grpSetMappings;	/* grpsetid <-> perhash or perphase data */
 } AggState;
 
 /* ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 3cbb08d..9594201 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -153,6 +153,7 @@ typedef enum NodeTag
 	T_Param,
 	T_Aggref,
 	T_GroupingFunc,
+	T_GroupingSetId,
 	T_WindowFunc,
 	T_SubscriptingRef,
 	T_FuncExpr,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 13b147d..6d6fc55 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1674,6 +1674,7 @@ typedef struct GroupingSetData
 {
 	NodeTag		type;
 	List	   *set;			/* grouping set as list of sortgrouprefs */
+	int			grpsetId;			/* unique grouping set identifier */
 	double		numGroups;		/* est. number of result groups */
 } GroupingSetData;
 
@@ -1699,6 +1700,7 @@ typedef struct GroupingSetsPath
 	AggStrategy aggstrategy;	/* basic strategy */
 	List	   *rollups;		/* list of RollupData */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	AggSplit   aggsplit;
 } GroupingSetsPath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 8e6594e..74e8fb5 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -20,6 +20,7 @@
 #include "nodes/bitmapset.h"
 #include "nodes/lockoptions.h"
 #include "nodes/primnodes.h"
+#include "nodes/pathnodes.h"
 
 
 /* ----------------------------------------------------------------
@@ -811,8 +812,9 @@ typedef struct Agg
 	long		numGroups;		/* estimated number of groups in input */
 	Bitmapset  *aggParams;		/* IDs of Params used in Aggref inputs */
 	/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
-	List	   *groupingSets;	/* grouping sets to use */
+	RollupData *rollup;			/* grouping sets to use */
 	List	   *chain;			/* chained Agg/Sort nodes */
+	Node	   *grpSetIdFilter;
 } Agg;
 
 /* ----------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 860a84d..e96cbfc 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -350,6 +350,12 @@ typedef struct GroupingFunc
 	int			location;		/* token location */
 } GroupingFunc;
 
+/* add comment */
+typedef struct GroupingSetId
+{
+	Expr		xpr;
+} GroupingSetId;
+
 /*
  * WindowFunc
  */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index a12af54..900070b 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -217,7 +217,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
 												  AggStrategy aggstrategy,
 												  List *rollups,
 												  const AggClauseCosts *agg_costs,
-												  double numGroups);
+												  double numGroups,
+												  AggSplit aggsplit);
 extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
 											RelOptInfo *rel,
 											PathTarget *target,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index e7aaddd..b28476f 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -54,7 +54,7 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
 extern Agg *make_agg(List *tlist, List *qual,
 					 AggStrategy aggstrategy, AggSplit aggsplit,
 					 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-					 List *groupingSets, List *chain,
+					 RollupData *rollup, List *chain,
 					 double dNumGroups, Plan *lefttree);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
 
-- 
1.8.3.1