>>>>> "Thom" == Thom Brown <t...@linux.com> writes:
Thom> This doesn't apply cleanly to latest master. Could you please Thom> post a rebased patch? Sure. -- Andrew (irc:RhodiumToad)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index c9e0a3e..480a07e 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -996,6 +996,10 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = "HashAggregate"; strategy = "Hashed"; break; + case AGG_MIXED: + pname = "MixedAggregate"; + strategy = "Mixed"; + break; default: pname = "Aggregate ???"; strategy = "???"; @@ -1924,6 +1928,19 @@ show_grouping_set_keys(PlanState *planstate, ListCell *lc; List *gsets = aggnode->groupingSets; AttrNumber *keycols = aggnode->grpColIdx; + const char *keyname; + const char *keysetname; + + if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED) + { + keyname = "Hash Key"; + keysetname = "Hash Keys"; + } + else + { + keyname = "Group Key"; + keysetname = "Group Keys"; + } ExplainOpenGroup("Grouping Set", NULL, true, es); @@ -1938,7 +1955,7 @@ show_grouping_set_keys(PlanState *planstate, es->indent++; } - ExplainOpenGroup("Group Keys", "Group Keys", false, es); + ExplainOpenGroup(keysetname, keysetname, false, es); foreach(lc, gsets) { @@ -1962,12 +1979,12 @@ show_grouping_set_keys(PlanState *planstate, } if (!result && es->format == EXPLAIN_FORMAT_TEXT) - ExplainPropertyText("Group Key", "()", es); + ExplainPropertyText(keyname, "()", es); else - ExplainPropertyListNested("Group Key", result, es); + ExplainPropertyListNested(keyname, result, es); } - ExplainCloseGroup("Group Keys", "Group Keys", false, es); + ExplainCloseGroup(keysetname, keysetname, false, es); if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) es->indent--; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index aa08152..f059560 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -122,12 +122,19 @@ * specific). * * Where more complex grouping sets are used, we break them down into - * "phases", where each phase has a different sort order. During each - * phase but the last, the input tuples are additionally stored in a - * tuplesort which is keyed to the next phase's sort order; during each - * phase but the first, the input tuples are drawn from the previously - * sorted data. (The sorting of the data for the first phase is handled by - * the planner, as it might be satisfied by underlying nodes.) + * "phases", where each phase has a different sort order (except phase 0 + * which is reserved for hashing). During each phase but the last, the + * input tuples are additionally stored in a tuplesort which is keyed to the + * next phase's sort order; during each phase but the first, the input + * tuples are drawn from the previously sorted data. (The sorting of the + * data for the first phase is handled by the planner, as it might be + * satisfied by underlying nodes.) + * + * Hashing can be mixed with sorted grouping. To do this, we have an + * AGG_MIXED strategy that populates the hashtables during the first sorted + * phase, and switches to reading them out after completing all sort phases. + * We can also support AGG_HASHED with multiple hash tables and no sorting + * at all. * * From the perspective of aggregate transition and final functions, the * only issue regarding grouping sets is this: a single call site (flinfo) @@ -139,8 +146,6 @@ * sensitive to the grouping set for which the aggregate function is * currently being called. * - * TODO: AGG_HASHED doesn't support multiple grouping sets yet. - * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -432,6 +437,7 @@ typedef struct AggStatePerGroupData */ typedef struct AggStatePerPhaseData { + AggStrategy aggstrategy; /* strategy for this phase */ int numsets; /* number of grouping sets (or 0) */ int *gset_lengths; /* lengths of grouping sets */ Bitmapset **grouped_cols; /* column groupings for rollup */ @@ -440,7 +446,31 @@ typedef struct AggStatePerPhaseData Sort *sortnode; /* Sort node for input ordering for phase */ } AggStatePerPhaseData; +/* + * AggStatePerHashData - per-hashtable state + * + * When doing grouping sets with hashing, we have one of these for each + * grouping set. (When doing hashing without grouping sets, we have just one of + * them.) + */ + +typedef struct AggStatePerHashData +{ + TupleHashTable hashtable; /* hash table with one entry per group */ + TupleHashIterator hashiter; /* for iterating through hash table */ + TupleTableSlot *hashslot; /* slot for loading hash table */ + FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ + FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ + int numCols; /* number of hash key columns */ + int numhashGrpCols; /* number of columns in hash table */ + int largestGrpColIdx; /* largest column required for hashing */ + AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */ + AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */ + Agg *aggnode; /* original Agg node, for numGroups etc. */ +} AggStatePerHashData; + +static void select_current_set(AggState *aggstate, int setno, bool is_hash); static void initialize_phase(AggState *aggstate, int newphase); static TupleTableSlot *fetch_input_tuple(AggState *aggstate); static void initialize_aggregates(AggState *aggstate, @@ -449,7 +479,7 @@ static void initialize_aggregates(AggState *aggstate, static void advance_transition_function(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroupstate); -static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); +static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups); static void advance_combine_function(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroupstate); @@ -479,8 +509,8 @@ static TupleTableSlot *project_aggregates(AggState *aggstate); static Bitmapset *find_unaggregated_cols(AggState *aggstate); static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos); static void build_hash_table(AggState *aggstate); -static TupleHashEntryData *lookup_hash_entry(AggState *aggstate, - TupleTableSlot *inputslot); +static TupleHashEntryData *lookup_hash_entry(AggState *aggstate); +static AggStatePerGroup *lookup_hash_entries(AggState *aggstate); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate); static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate); @@ -501,13 +531,29 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg, /* - * Switch to phase "newphase", which must either be 0 (to reset) or + * Select the current grouping set; affects current_set and + * curaggcontext. + */ + +static void +select_current_set(AggState *aggstate, int setno, bool is_hash) +{ + if (is_hash) + aggstate->curaggcontext = aggstate->hashcontext; + else + aggstate->curaggcontext = aggstate->aggcontexts[setno]; + + aggstate->current_set = setno; +} + +/* + * Switch to phase "newphase", which must either be 0 or 1 (to reset) or * current_phase + 1. Juggle the tuplesorts accordingly. */ static void initialize_phase(AggState *aggstate, int newphase) { - Assert(newphase == 0 || newphase == aggstate->current_phase + 1); + Assert(newphase <= 1 || newphase == aggstate->current_phase + 1); /* * Whatever the previous state, we're now done with whatever input @@ -519,7 +565,7 @@ initialize_phase(AggState *aggstate, int newphase) aggstate->sort_in = NULL; } - if (newphase == 0) + if (newphase <= 1) { /* * Discard any existing output tuplesort. @@ -546,7 +592,7 @@ initialize_phase(AggState *aggstate, int newphase) * If this isn't the last phase, we need to sort appropriately for the * next phase in sequence. */ - if (newphase < aggstate->numphases - 1) + if (newphase > 0 && newphase < aggstate->numphases - 1) { Sort *sortnode = aggstate->phases[newphase + 1].sortnode; PlanState *outerNode = outerPlanState(aggstate); @@ -567,7 +613,7 @@ initialize_phase(AggState *aggstate, int newphase) } /* - * Fetch a tuple from either the outer plan (for phase 0) or from the sorter + * Fetch a tuple from either the outer plan (for phase 1) or from the sorter * populated by the previous phase. Copy it to the sorter for the next phase * if any. */ @@ -595,8 +641,8 @@ fetch_input_tuple(AggState *aggstate) /* * (Re)Initialize an individual aggregate. * - * This function handles only one grouping set (already set in - * aggstate->current_set). + * This function handles only one grouping set, already set in + * aggstate->current_set. * * When called, CurrentMemoryContext should be the per-query context. */ @@ -652,8 +698,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, { MemoryContext oldContext; - oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(pertrans->initValue, pertrans->transtypeByVal, pertrans->transtypeLen); @@ -676,8 +721,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, * * If there are multiple grouping sets, we initialize only the first numReset * of them (the grouping sets are ordered so that the most specific one, which - * is reset most often, is first). As a convenience, if numReset is < 1, we - * reinitialize all sets. + * is reset most often, is first). As a convenience, if numReset is 0, we + * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in + * which case the caller must have used select_current_set appropriately. * * When called, CurrentMemoryContext should be the per-query context. */ @@ -691,23 +737,34 @@ initialize_aggregates(AggState *aggstate, int setno = 0; AggStatePerTrans transstates = aggstate->pertrans; - if (numReset < 1) + if (numReset == 0) numReset = numGroupingSets; for (transno = 0; transno < aggstate->numtrans; transno++) { AggStatePerTrans pertrans = &transstates[transno]; - for (setno = 0; setno < numReset; setno++) + if (numReset < 0) { AggStatePerGroup pergroupstate; - pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))]; - - aggstate->current_set = setno; + pergroupstate = &pergroup[transno]; initialize_aggregate(aggstate, pertrans, pergroupstate); } + else + { + for (setno = 0; setno < numReset; setno++) + { + AggStatePerGroup pergroupstate; + + pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))]; + + select_current_set(aggstate, setno, false); + + initialize_aggregate(aggstate, pertrans, pergroupstate); + } + } } } @@ -757,7 +814,7 @@ advance_transition_function(AggState *aggstate, * do not need to pfree the old transValue, since it's NULL. */ oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(fcinfo->arg[1], pertrans->transtypeByVal, pertrans->transtypeLen); @@ -807,7 +864,7 @@ advance_transition_function(AggState *aggstate, { if (!fcinfo->isnull) { - MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); if (DatumIsReadWriteExpandedObject(newVal, false, pertrans->transtypeLen) && @@ -838,17 +895,21 @@ advance_transition_function(AggState *aggstate, /* * Advance each aggregate transition state for one input tuple. The input * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is - * accessible to ExecEvalExpr. pergroup is the array of per-group structs to - * use (this might be in a hashtable entry). + * accessible to ExecEvalExpr. + * + * We have two sets of transition states to handle: one for sorted aggregation + * and one for hashed; we do them both here, to avoid multiple evaluation of + * the inputs. * * When called, CurrentMemoryContext should be the per-query context. */ static void -advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) +advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups) { int transno; int setno = 0; int numGroupingSets = Max(aggstate->phase->numsets, 1); + int numHashes = aggstate->num_hashes; int numTrans = aggstate->numtrans; TupleTableSlot *slot = aggstate->evalslot; @@ -880,6 +941,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) { /* DISTINCT and/or ORDER BY case */ Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff)); + Assert(!pergroups); /* * If the transfn is strict, we want to check for nullity before @@ -940,13 +1002,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff]; } - for (setno = 0; setno < numGroupingSets; setno++) + if (pergroup) + { + /* advance transition states for ordered grouping */ + + for (setno = 0; setno < numGroupingSets; setno++) + { + AggStatePerGroup pergroupstate; + + select_current_set(aggstate, setno, false); + + pergroupstate = &pergroup[transno + (setno * numTrans)]; + + advance_transition_function(aggstate, pertrans, pergroupstate); + } + } + + if (pergroups) { - AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)]; + /* advance transition states for hashed grouping */ + + for (setno = 0; setno < numHashes; setno++) + { + AggStatePerGroup pergroupstate; + + select_current_set(aggstate, setno, true); - aggstate->current_set = setno; + pergroupstate = &pergroups[setno][transno]; - advance_transition_function(aggstate, pertrans, pergroupstate); + advance_transition_function(aggstate, pertrans, pergroupstate); + } } } } @@ -967,7 +1052,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup) TupleTableSlot *slot; /* combine not supported with grouping sets */ - Assert(aggstate->phase->numsets == 0); + Assert(aggstate->phase->numsets <= 1); /* compute input for all aggregates */ slot = ExecProject(aggstate->evalproj); @@ -1060,7 +1145,7 @@ advance_combine_function(AggState *aggstate, if (!pertrans->transtypeByVal) { oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(fcinfo->arg[1], pertrans->transtypeByVal, pertrans->transtypeLen); @@ -1105,7 +1190,7 @@ advance_combine_function(AggState *aggstate, { if (!fcinfo->isnull) { - MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); if (DatumIsReadWriteExpandedObject(newVal, false, pertrans->transtypeLen) && @@ -1559,7 +1644,9 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet /* * Compute the final value of all aggregates for one group. * - * This function handles only one grouping set at a time. + * This function handles only one grouping set at a time. But in the hash + * case, it's the caller's responsibility to have selected the set already, and + * we pass in -1 here to flag that and to control the indexing into pertrans. * * Results are stored in the output econtext aggvalues/aggnulls. */ @@ -1575,10 +1662,11 @@ finalize_aggregates(AggState *aggstate, int aggno; int transno; - Assert(currentSet == 0 || - ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); - - aggstate->current_set = currentSet; + /* ugly hack for hash */ + if (currentSet >= 0) + select_current_set(aggstate, currentSet, false); + else + currentSet = 0; /* * If there were any DISTINCT and/or ORDER BY aggregates, sort their @@ -1593,7 +1681,8 @@ finalize_aggregates(AggState *aggstate, if (pertrans->numSortCols > 0) { - Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + Assert(aggstate->aggstrategy != AGG_HASHED && + aggstate->aggstrategy != AGG_MIXED); if (pertrans->numInputs == 1) process_ordered_aggregate_single(aggstate, @@ -1697,7 +1786,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) } /* - * Initialize the hash table to empty. + * Initialize the hash table(s) to empty. * * To implement hashed aggregation, we need a hashtable that stores a * representative tuple and an array of AggStatePerGroup structs for each @@ -1705,29 +1794,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) * GROUP BY columns. The per-group data is allocated in lookup_hash_entry(), * for each entry. * - * The hash table always lives in the aggcontext memory context. + * We have a separate hashtable and associated perhash data structure for each + * grouping set for which we're doing hashing. + * + * The hash tables always live in the hashcontext's per-tuple memory context + * (there is only one of these for all tables together, since they are all + * reset at the same time). */ static void build_hash_table(AggState *aggstate) { - Agg *node = (Agg *) aggstate->ss.ps.plan; MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; Size additionalsize; + int i; - Assert(node->aggstrategy == AGG_HASHED); - Assert(node->numGroups > 0); + Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED); - additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData); + additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData); - aggstate->hashtable = BuildTupleHashTable(node->numCols, - aggstate->hashGrpColIdxHash, - aggstate->phase->eqfunctions, - aggstate->hashfunctions, - node->numGroups, - additionalsize, - aggstate->aggcontexts[0]->ecxt_per_tuple_memory, - tmpmem, + for (i = 0; i < aggstate->num_hashes; ++i) + { + AggStatePerHash perhash = &aggstate->perhash[i]; + + Assert(perhash->aggnode->numGroups > 0); + + perhash->hashtable = BuildTupleHashTable(perhash->numCols, + perhash->hashGrpColIdxHash, + perhash->eqfunctions, + perhash->hashfunctions, + perhash->aggnode->numGroups, + additionalsize, + aggstate->hashcontext->ecxt_per_tuple_memory, + tmpmem, DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)); + } } /* @@ -1750,72 +1850,98 @@ build_hash_table(AggState *aggstate) * the array is preserved over ExecReScanAgg, so we allocate it in the * per-query context (unlike the hash table itself). */ -static List * +static void find_hash_columns(AggState *aggstate) { - Agg *node = (Agg *) aggstate->ss.ps.plan; - Bitmapset *colnos; - List *collist; - TupleDesc hashDesc; + Bitmapset *base_colnos; List *outerTlist = outerPlanState(aggstate)->plan->targetlist; - List *hashTlist = NIL; - int i; - - aggstate->largestGrpColIdx = 0; + int numHashes = aggstate->num_hashes; + int j; /* Find Vars that will be needed in tlist and qual */ - colnos = find_unaggregated_cols(aggstate); - /* Add in all the grouping columns */ - for (i = 0; i < node->numCols; i++) - colnos = bms_add_member(colnos, node->grpColIdx[i]); - /* Convert to list, using lcons so largest element ends up first */ - collist = NIL; - - aggstate->hashGrpColIdxInput = - palloc(bms_num_members(colnos) * sizeof(AttrNumber)); - aggstate->hashGrpColIdxHash = - palloc(node->numCols * sizeof(AttrNumber)); + base_colnos = find_unaggregated_cols(aggstate); - /* - * First build mapping for columns directly hashed. These are the first, - * because they'll be accessed when computing hash values and comparing - * tuples for exact matches. We also build simple mapping for - * execGrouping, so it knows where to find the to-be-hashed / compared - * columns in the input. - */ - for (i = 0; i < node->numCols; i++) + for (j = 0; j < numHashes; ++j) { - aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i]; - aggstate->hashGrpColIdxHash[i] = i + 1; - aggstate->numhashGrpCols++; - /* delete already mapped columns */ - bms_del_member(colnos, node->grpColIdx[i]); - } + AggStatePerHash perhash = &aggstate->perhash[j]; + Bitmapset *colnos = bms_copy(base_colnos); + AttrNumber *grpColIdx = perhash->aggnode->grpColIdx; + List *hashTlist = NIL; + TupleDesc hashDesc; + int i; - /* and add the remaining columns */ - while ((i = bms_first_member(colnos)) >= 0) - { - aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i; - aggstate->numhashGrpCols++; - } + perhash->largestGrpColIdx = 0; - /* and build a tuple descriptor for the hashtable */ - for (i = 0; i < aggstate->numhashGrpCols; i++) - { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + /* + * If we're doing grouping sets, then some Vars might be referenced in + * tlist/qual for the benefit of other grouping sets, but not needed + * when hashing; i.e. prepare_projection_slot will null them out, so + * there'd be no point storing them. Use prepare_projection_slot's + * logic to determine which. + */ + if (aggstate->phases[0].grouped_cols) + { + Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j]; + ListCell *lc; - hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber)); - aggstate->largestGrpColIdx = - Max(varNumber + 1, aggstate->largestGrpColIdx); - } + foreach(lc, aggstate->all_grouped_cols) + { + int attnum = lfirst_int(lc); - hashDesc = ExecTypeFromTL(hashTlist, false); - ExecSetSlotDescriptor(aggstate->hashslot, hashDesc); + if (!bms_is_member(attnum, grouped_cols)) + colnos = bms_del_member(colnos, attnum); + } + } + /* Add in all the grouping columns */ + for (i = 0; i < perhash->numCols; i++) + colnos = bms_add_member(colnos, grpColIdx[i]); + + perhash->hashGrpColIdxInput = + palloc(bms_num_members(colnos) * sizeof(AttrNumber)); + perhash->hashGrpColIdxHash = + palloc(perhash->numCols * sizeof(AttrNumber)); + + /* + * First build mapping for columns directly hashed. These are the + * first, because they'll be accessed when computing hash values and + * comparing tuples for exact matches. We also build simple mapping for + * execGrouping, so it knows where to find the to-be-hashed / compared + * columns in the input. + */ + for (i = 0; i < perhash->numCols; i++) + { + perhash->hashGrpColIdxInput[i] = grpColIdx[i]; + perhash->hashGrpColIdxHash[i] = i + 1; + perhash->numhashGrpCols++; + /* delete already mapped columns */ + bms_del_member(colnos, grpColIdx[i]); + } - list_free(hashTlist); - bms_free(colnos); + /* and add the remaining columns */ + while ((i = bms_first_member(colnos)) >= 0) + { + perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i; + perhash->numhashGrpCols++; + } - return collist; + /* and build a tuple descriptor for the hashtable */ + for (i = 0; i < perhash->numhashGrpCols; i++) + { + int varNumber = perhash->hashGrpColIdxInput[i] - 1; + + hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber)); + perhash->largestGrpColIdx = + Max(varNumber + 1, perhash->largestGrpColIdx); + } + + hashDesc = ExecTypeFromTL(hashTlist, false); + ExecSetSlotDescriptor(perhash->hashslot, hashDesc); + + list_free(hashTlist); + bms_free(colnos); + } + + bms_free(base_colnos); } /* @@ -1840,26 +1966,30 @@ hash_agg_entry_size(int numAggs) } /* - * Find or create a hashtable entry for the tuple group containing the - * given tuple. + * Find or create a hashtable entry for the tuple group containing the current + * tuple (already set in tmpcontext's outertuple slot), in the current grouping + * set (which the caller must have selected - note that initialize_aggregate + * depends on this). * * When called, CurrentMemoryContext should be the per-query context. */ static TupleHashEntryData * -lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) +lookup_hash_entry(AggState *aggstate) { - TupleTableSlot *hashslot = aggstate->hashslot; + TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple; + AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; + TupleTableSlot *hashslot = perhash->hashslot; TupleHashEntryData *entry; bool isnew; int i; /* transfer just the needed columns into hashslot */ - slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx); + slot_getsomeattrs(inputslot, perhash->largestGrpColIdx); ExecClearTuple(hashslot); - for (i = 0; i < aggstate->numhashGrpCols; i++) + for (i = 0; i < perhash->numhashGrpCols; i++) { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + int varNumber = perhash->hashGrpColIdxInput[i] - 1; hashslot->tts_values[i] = inputslot->tts_values[varNumber]; hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber]; @@ -1867,22 +1997,44 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) ExecStoreVirtualTuple(hashslot); /* find or create the hashtable entry using the filtered tuple */ - entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew); + entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew); if (isnew) { entry->additional = (AggStatePerGroup) - MemoryContextAlloc(aggstate->hashtable->tablecxt, + MemoryContextAlloc(perhash->hashtable->tablecxt, sizeof(AggStatePerGroupData) * aggstate->numtrans); /* initialize aggregates for new tuple group */ initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional, - 0); + -1); } return entry; } /* + * Look up hash entries for the current tuple in all hashed grouping sets, + * returning an array of pergroup pointers suitable for advance_aggregates. + * + * Be aware that lookup_hash_entry can reset the tmpcontext. + */ +static AggStatePerGroup * +lookup_hash_entries(AggState *aggstate) +{ + int numHashes = aggstate->num_hashes; + AggStatePerGroup *pergroup = aggstate->hash_pergroup; + int setno; + + for (setno = 0; setno < numHashes; setno++) + { + select_current_set(aggstate, setno, true); + pergroup[setno] = lookup_hash_entry(aggstate)->additional; + } + + return pergroup; +} + +/* * ExecAgg - * * ExecAgg receives tuples from its outer subplan and aggregates over @@ -1903,14 +2055,17 @@ ExecAgg(AggState *node) if (!node->agg_done) { /* Dispatch based on strategy */ - switch (node->phase->aggnode->aggstrategy) + switch (node->phase->aggstrategy) { case AGG_HASHED: if (!node->table_filled) agg_fill_hash_table(node); + /*FALLTHROUGH*/ + case AGG_MIXED: result = agg_retrieve_hash_table(node); break; - default: + case AGG_PLAIN: + case AGG_SORTED: result = agg_retrieve_direct(node); break; } @@ -1933,6 +2088,7 @@ agg_retrieve_direct(AggState *aggstate) ExprContext *tmpcontext; AggStatePerAgg peragg; AggStatePerGroup pergroup; + AggStatePerGroup *hash_pergroups = NULL; TupleTableSlot *outerslot; TupleTableSlot *firstSlot; TupleTableSlot *result; @@ -2019,6 +2175,18 @@ agg_retrieve_direct(AggState *aggstate) node = aggstate->phase->aggnode; numReset = numGroupingSets; } + else if (aggstate->aggstrategy == AGG_MIXED) + { + /* + * Mixed mode; we've output all the grouped stuff and have + * full hashtables, so switch to outputting those. + */ + initialize_phase(aggstate, 0); + aggstate->table_filled = true; + ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter); + select_current_set(aggstate, 0, true); + return agg_retrieve_hash_table(aggstate); + } else { aggstate->agg_done = true; @@ -2055,7 +2223,7 @@ agg_retrieve_direct(AggState *aggstate) *---------- */ if (aggstate->input_done || - (node->aggstrategy == AGG_SORTED && + (node->aggstrategy != AGG_PLAIN && aggstate->projected_set != -1 && aggstate->projected_set < (numGroupingSets - 1) && nextSetSize > 0 && @@ -2168,10 +2336,21 @@ agg_retrieve_direct(AggState *aggstate) */ for (;;) { + /* + * During phase 1 only of a mixed agg, we need to update + * hashtables as well in advance_aggregates. + */ + if (aggstate->aggstrategy == AGG_MIXED && aggstate->current_phase == 1) + { + hash_pergroups = lookup_hash_entries(aggstate); + } + else + hash_pergroups = NULL; + if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) combine_aggregates(aggstate, pergroup); else - advance_aggregates(aggstate, pergroup); + advance_aggregates(aggstate, pergroup, hash_pergroups); /* Reset per-input-tuple context after each tuple */ ResetExprContext(tmpcontext); @@ -2198,7 +2377,7 @@ agg_retrieve_direct(AggState *aggstate) * If we are grouping, check whether we've crossed a group * boundary. */ - if (node->aggstrategy == AGG_SORTED) + if (node->aggstrategy != AGG_PLAIN) { if (!execTuplesMatch(firstSlot, outerslot, @@ -2247,21 +2426,13 @@ agg_retrieve_direct(AggState *aggstate) } /* - * ExecAgg for hashed case: phase 1, read input and build hash table + * ExecAgg for hashed case: read input and build hash table */ static void agg_fill_hash_table(AggState *aggstate) { - ExprContext *tmpcontext; - TupleHashEntryData *entry; TupleTableSlot *outerslot; - - /* - * get state info from node - * - * tmpcontext is the per-input-tuple expression context - */ - tmpcontext = aggstate->tmpcontext; + ExprContext *tmpcontext = aggstate->tmpcontext; /* * Process each outer-plan tuple, and then fetch the next one, until we @@ -2269,32 +2440,39 @@ agg_fill_hash_table(AggState *aggstate) */ for (;;) { + AggStatePerGroup *pergroups; + outerslot = fetch_input_tuple(aggstate); if (TupIsNull(outerslot)) break; - /* set up for advance_aggregates call */ + + /* set up for lookup_hash_entries and advance_aggregates */ tmpcontext->ecxt_outertuple = outerslot; - /* Find or build hashtable entry for this tuple's group */ - entry = lookup_hash_entry(aggstate, outerslot); + /* Find or build hashtable entries */ + pergroups = lookup_hash_entries(aggstate); /* Advance the aggregates */ if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) - combine_aggregates(aggstate, (AggStatePerGroup) entry->additional); + combine_aggregates(aggstate, pergroups[0]); else - advance_aggregates(aggstate, (AggStatePerGroup) entry->additional); + advance_aggregates(aggstate, NULL, pergroups); - /* Reset per-input-tuple context after each tuple */ - ResetExprContext(tmpcontext); + /* + * Reset per-input-tuple context after each tuple, but note that the + * hash lookups do this too + */ + ResetExprContext(aggstate->tmpcontext); } aggstate->table_filled = true; - /* Initialize to walk the hash table */ - ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter); + /* Initialize to walk the first hash table */ + select_current_set(aggstate, 0, true); + ResetTupleHashIterator(aggstate->perhash[0].hashtable, &aggstate->perhash[0].hashiter); } /* - * ExecAgg for hashed case: phase 2, retrieving groups from hash table + * ExecAgg for hashed case: retrieving groups from hash table */ static TupleTableSlot * agg_retrieve_hash_table(AggState *aggstate) @@ -2305,17 +2483,22 @@ agg_retrieve_hash_table(AggState *aggstate) TupleHashEntryData *entry; TupleTableSlot *firstSlot; TupleTableSlot *result; - TupleTableSlot *hashslot; + AggStatePerHash perhash; /* - * get state info from node + * get state info from node. + * + * econtext is the per-output-tuple expression context. */ - /* econtext is the per-output-tuple expression context */ econtext = aggstate->ss.ps.ps_ExprContext; peragg = aggstate->peragg; firstSlot = aggstate->ss.ss_ScanTupleSlot; - hashslot = aggstate->hashslot; + /* + * Note that perhash (and therefore anything accessed through it) can + * change inside the loop, as we change between grouping sets. + */ + perhash = &aggstate->perhash[aggstate->current_set]; /* * We loop retrieving groups until we find one satisfying @@ -2323,17 +2506,37 @@ agg_retrieve_hash_table(AggState *aggstate) */ while (!aggstate->agg_done) { + TupleTableSlot *hashslot = perhash->hashslot; int i; /* * Find the next entry in the hash table */ - entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter); + entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter); if (entry == NULL) { - /* No more entries in hashtable, so done */ - aggstate->agg_done = TRUE; - return NULL; + int nextset = aggstate->current_set + 1; + + if (nextset < aggstate->num_hashes) + { + /* + * Switch to next grouping set, reinitialize, and restart the + * loop. + */ + select_current_set(aggstate, nextset, true); + + perhash = &aggstate->perhash[aggstate->current_set]; + + ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter); + + continue; + } + else + { + /* No more hashtables, so done */ + aggstate->agg_done = TRUE; + return NULL; + } } /* @@ -2356,9 +2559,9 @@ agg_retrieve_hash_table(AggState *aggstate) memset(firstSlot->tts_isnull, true, firstSlot->tts_tupleDescriptor->natts * sizeof(bool)); - for (i = 0; i < aggstate->numhashGrpCols; i++) + for (i = 0; i < perhash->numhashGrpCols; i++) { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + int varNumber = perhash->hashGrpColIdxInput[i] - 1; firstSlot->tts_values[varNumber] = hashslot->tts_values[i]; firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i]; @@ -2367,14 +2570,16 @@ agg_retrieve_hash_table(AggState *aggstate) pergroup = (AggStatePerGroup) entry->additional; - finalize_aggregates(aggstate, peragg, pergroup, 0); - /* * Use the representative input tuple for any references to * non-aggregated input columns in the qual and tlist. */ econtext->ecxt_outertuple = firstSlot; + prepare_projection_slot(aggstate, econtext->ecxt_outertuple, aggstate->current_set); + + finalize_aggregates(aggstate, peragg, pergroup, -1); + result = project_aggregates(aggstate); if (result) return result; @@ -2388,7 +2593,36 @@ agg_retrieve_hash_table(AggState *aggstate) * ExecInitAgg * * Creates the run-time information for the agg node produced by the - * planner and initializes its outer subtree + * planner and initializes its outer subtree. + * + * What we get from the planner is actually one "real" Agg node which is part + * of the plan tree proper, but which optionally has an additional list of Agg + * nodes hung off the side via the "chain" field. This is because an Agg node + * happens to be a convenient representation of all the data we need for + * grouping sets. + * + * For many purposes, we treat the "real" node as if it were just the first + * node in the chain. The chain must be ordered such that hashed entries come + * before sorted/plain entries; the real node is marked AGG_MIXED if there are + * mixed types (in which case the real node describes one of the hashed + * groupings, other AGG_HASHED nodes may optionally follow in the chain, + * followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If the real node + * is marked AGG_HASHED or AGG_SORTED, then all the chained nodes must be of + * the same type; if it is AGG_PLAIN, there can be no chained nodes. + * + * We collect all hashed nodes into a single "phase", numbered 0, and create a + * sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. Phase + * 0 is allocated even if there are no hashes, but remains unused in that + * case. + * + * AGG_HASHED nodes actually refer to only a single grouping set each, because + * for each hashed grouping we need a separate grpColIdx and numGroups + * estimate. AGG_SORTED nodes represent a "rollup", a list of grouping sets + * that share a sort order. Each AGG_SORTED node other than the first one has + * an associated Sort node which describes the sort order to be used; the + * first sorted node takes its input from the outer subtree, which the planner + * has already arranged to provide ordered data. + * * ----------------- */ AggState * @@ -2403,14 +2637,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) transno, aggno; int phase; + int phaseidx; List *combined_inputeval; ListCell *l; Bitmapset *all_grouped_cols = NULL; int numGroupingSets = 1; int numPhases; + int numHashes; int column_offset; int i = 0; int j = 0; + bool use_hashing = (node->aggstrategy == AGG_HASHED || + node->aggstrategy == AGG_MIXED); /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2425,9 +2663,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->aggs = NIL; aggstate->numaggs = 0; aggstate->numtrans = 0; + aggstate->aggstrategy = node->aggstrategy; aggstate->aggsplit = node->aggsplit; aggstate->maxsets = 0; - aggstate->hashfunctions = NULL; aggstate->projected_set = -1; aggstate->current_set = 0; aggstate->peragg = NULL; @@ -2437,18 +2675,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->agg_done = false; aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; - aggstate->hashtable = NULL; aggstate->sort_in = NULL; aggstate->sort_out = NULL; /* + * phases[0] always exists, but is dummy in sorted/plain mode + */ + numPhases = (use_hashing ? 1 : 2); + numHashes = (use_hashing ? 1 : 0); + + /* * Calculate the maximum number of grouping sets in any phase; this - * determines the size of some allocations. + * determines the size of some allocations. Also calculate the number of + * phases, since all hashed/mixed nodes contribute to only a single phase. */ if (node->groupingSets) { - Assert(node->aggstrategy != AGG_HASHED); - numGroupingSets = list_length(node->groupingSets); foreach(l, node->chain) @@ -2457,22 +2699,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) numGroupingSets = Max(numGroupingSets, list_length(agg->groupingSets)); + /* + * additional AGG_HASHED aggs become part of phase 0, + * but all others add an extra phase. + */ + if (agg->aggstrategy != AGG_HASHED) + ++numPhases; + else + ++numHashes; } } aggstate->maxsets = numGroupingSets; - aggstate->numphases = numPhases = 1 + list_length(node->chain); + aggstate->numphases = numPhases; aggstate->aggcontexts = (ExprContext **) palloc0(sizeof(ExprContext *) * numGroupingSets); /* * Create expression contexts. We need three or more, one for - * per-input-tuple processing, one for per-output-tuple processing, and - * one for each grouping set. The per-tuple memory context of the - * per-grouping-set ExprContexts (aggcontexts) replaces the standalone - * memory context formerly used to hold transition values. We cheat a - * little by using ExecAssignExprContext() to build all of them. + * per-input-tuple processing, one for per-output-tuple processing, one for + * all the hashtables, and one for each grouping set. The per-tuple memory + * context of the per-grouping-set ExprContexts (aggcontexts) replaces the + * standalone memory context formerly used to hold transition values. We + * cheat a little by using ExecAssignExprContext() to build all of them. * * NOTE: the details of what is stored in aggcontexts and what is stored * in the regular per-query memory context are driven by a simple @@ -2488,14 +2738,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext; } + if (use_hashing) + { + ExecAssignExprContext(estate, &aggstate->ss.ps); + aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext; + } + ExecAssignExprContext(estate, &aggstate->ss.ps); /* - * tuple table initialization + * tuple table initialization. + * + * For hashtables, we create some additional slots below. */ ExecInitScanTupleSlot(estate, &aggstate->ss); ExecInitResultTupleSlot(estate, &aggstate->ss.ps); - aggstate->hashslot = ExecInitExtraTupleSlot(estate); aggstate->sort_slot = ExecInitExtraTupleSlot(estate); /* @@ -2560,19 +2817,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * For each phase, prepare grouping set data and fmgr lookup data for * compare functions. Accumulate all_grouped_cols in passing. */ - aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData)); - for (phase = 0; phase < numPhases; ++phase) + aggstate->num_hashes = numHashes; + if (numHashes) + { + aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes); + aggstate->phases[0].numsets = 0; + aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int)); + aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *)); + } + + for (phaseidx = 0, phase = 0; phaseidx <= list_length(node->chain); ++phaseidx) { - AggStatePerPhase phasedata = &aggstate->phases[phase]; Agg *aggnode; Sort *sortnode; - int num_sets; - if (phase > 0) + if (phaseidx > 0) { - aggnode = castNode(Agg, list_nth(node->chain, phase - 1)); + aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1)); sortnode = castNode(Sort, aggnode->plan.lefttree); } else @@ -2581,53 +2844,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) sortnode = NULL; } - phasedata->numsets = num_sets = list_length(aggnode->groupingSets); + Assert(phase <= 1 || sortnode); - if (num_sets) + if (aggnode->aggstrategy == AGG_HASHED + || aggnode->aggstrategy == AGG_MIXED) { - phasedata->gset_lengths = palloc(num_sets * sizeof(int)); - phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *)); + AggStatePerPhase phasedata = &aggstate->phases[0]; + AggStatePerHash perhash; + Bitmapset *cols = NULL; - i = 0; - foreach(l, aggnode->groupingSets) - { - int current_length = list_length(lfirst(l)); - Bitmapset *cols = NULL; + Assert(phase == 0); + i = phasedata->numsets++; + perhash = &aggstate->perhash[i]; - /* planner forces this to be correct */ - for (j = 0; j < current_length; ++j) - cols = bms_add_member(cols, aggnode->grpColIdx[j]); + /* phase 0 always points to the "real" Agg in the hash case */ + phasedata->aggnode = node; + phasedata->aggstrategy = node->aggstrategy; - phasedata->grouped_cols[i] = cols; - phasedata->gset_lengths[i] = current_length; - ++i; - } + /* but the actual Agg node representing this hash is saved here */ + perhash->aggnode = aggnode; + + phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols; + + for (j = 0; j < aggnode->numCols; ++j) + cols = bms_add_member(cols, aggnode->grpColIdx[j]); + + phasedata->grouped_cols[i] = cols; - all_grouped_cols = bms_add_members(all_grouped_cols, - phasedata->grouped_cols[0]); + all_grouped_cols = bms_add_members(all_grouped_cols, cols); + continue; } else { - Assert(phase == 0); + AggStatePerPhase phasedata = &aggstate->phases[++phase]; + int num_sets; - phasedata->gset_lengths = NULL; - phasedata->grouped_cols = NULL; - } + phasedata->numsets = num_sets = list_length(aggnode->groupingSets); - /* - * If we are grouping, precompute fmgr lookup data for inner loop. - */ - if (aggnode->aggstrategy == AGG_SORTED) - { - Assert(aggnode->numCols > 0); + if (num_sets) + { + phasedata->gset_lengths = palloc(num_sets * sizeof(int)); + phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *)); - phasedata->eqfunctions = - execTuplesMatchPrepare(aggnode->numCols, - aggnode->grpOperators); - } + i = 0; + foreach(l, aggnode->groupingSets) + { + int current_length = list_length(lfirst(l)); + Bitmapset *cols = NULL; + + /* planner forces this to be correct */ + for (j = 0; j < current_length; ++j) + cols = bms_add_member(cols, aggnode->grpColIdx[j]); + + phasedata->grouped_cols[i] = cols; + phasedata->gset_lengths[i] = current_length; + + ++i; + } + + all_grouped_cols = bms_add_members(all_grouped_cols, + phasedata->grouped_cols[0]); + } + else + { + Assert(phaseidx == 0); + + phasedata->gset_lengths = NULL; + phasedata->grouped_cols = NULL; + } + + /* + * If we are grouping, precompute fmgr lookup data for inner loop. + */ + if (aggnode->aggstrategy == AGG_SORTED) + { + Assert(aggnode->numCols > 0); + + phasedata->eqfunctions = + execTuplesMatchPrepare(aggnode->numCols, + aggnode->grpOperators); + } - phasedata->aggnode = aggnode; - phasedata->sortnode = sortnode; + phasedata->aggnode = aggnode; + phasedata->aggstrategy = aggnode->aggstrategy; + phasedata->sortnode = sortnode; + } } /* @@ -2638,13 +2939,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols); /* - * Initialize current phase-dependent values to initial phase - */ - - aggstate->current_phase = 0; - initialize_phase(aggstate, 0); - - /* * Set up aggregate-result storage in the output expr context, and also * allocate my private per-agg working storage */ @@ -2658,23 +2952,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->peragg = peraggs; aggstate->pertrans = pertransstates; - /* * Hashing can only appear in the initial phase. */ - if (node->aggstrategy == AGG_HASHED) + if (use_hashing) { - find_hash_columns(aggstate); + for (i = 0; i < numHashes; ++i) + { + aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate); + + execTuplesHashPrepare(aggstate->perhash[i].numCols, + aggstate->perhash[i].aggnode->grpOperators, + &aggstate->perhash[i].eqfunctions, + &aggstate->perhash[i].hashfunctions); + } - execTuplesHashPrepare(node->numCols, - node->grpOperators, - &aggstate->phases[0].eqfunctions, - &aggstate->hashfunctions); + /* this is an array of pointers, not structures */ + aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes); + find_hash_columns(aggstate); build_hash_table(aggstate); aggstate->table_filled = false; } - else + + if (node->aggstrategy != AGG_HASHED) { AggStatePerGroup pergroup; @@ -2685,6 +2986,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->pergroup = pergroup; } + /* + * Initialize current phase-dependent values to initial phase. The initial + * phase is 1 (first sort pass) for all strategies that use sorting (if + * hashing is being done too, then phase 0 is processed last); but if only + * hashing is being done, then phase 0 is all there is. + */ + + if (node->aggstrategy == AGG_HASHED) + { + aggstate->current_phase = 0; + initialize_phase(aggstate, 0); + select_current_set(aggstate, 0, true); + } + else + { + aggstate->current_phase = 1; + initialize_phase(aggstate, 1); + select_current_set(aggstate, 0, false); + } + /* ----------------- * Perform lookups of aggregate function info, and initialize the * unchanging fields of the per-agg and per-trans data. @@ -3263,7 +3584,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, * We don't implement DISTINCT or ORDER BY aggs in the HASHED case * (yet) */ - Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED); /* If we have only one input, we need its len/byval info. */ if (numInputs == 1) @@ -3512,6 +3833,8 @@ ExecEndAgg(AggState *node) /* And ensure any agg shutdown callbacks have been called */ for (setno = 0; setno < numGroupingSets; setno++) ReScanExprContext(node->aggcontexts[setno]); + if (node->hashcontext) + ReScanExprContext(node->hashcontext); /* * We don't actually free any ExprContexts here (see comment in @@ -3539,7 +3862,7 @@ ExecReScanAgg(AggState *node) node->agg_done = false; - if (aggnode->aggstrategy == AGG_HASHED) + if (node->aggstrategy == AGG_HASHED) { /* * In the hashed case, if we haven't yet built the hash table then we @@ -3559,7 +3882,8 @@ ExecReScanAgg(AggState *node) if (outerPlan->chgParam == NULL && !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { - ResetTupleHashIterator(node->hashtable, &node->hashiter); + ResetTupleHashIterator(node->perhash[0].hashtable, &node->perhash[0].hashiter); + select_current_set(node, 0, true); return; } } @@ -3584,11 +3908,7 @@ ExecReScanAgg(AggState *node) * ExecReScan already did it. But we do need to reset our per-grouping-set * contexts, which may have transvalues stored in them. (We use rescan * rather than just reset because transfns may have registered callbacks - * that need to be run now.) - * - * Note that with AGG_HASHED, the hash table is allocated in a sub-context - * of the aggcontext. This used to be an issue, but now, resetting a - * context automatically deletes sub-contexts too. + * that need to be run now.) For the AGG_HASHED case, see below. */ for (setno = 0; setno < numGroupingSets; setno++) @@ -3608,13 +3928,22 @@ ExecReScanAgg(AggState *node) MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs); - if (aggnode->aggstrategy == AGG_HASHED) + /* + * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of + * the hashcontext. This used to be an issue, but now, resetting a context + * automatically deletes sub-contexts too. + */ + + if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED) { + ReScanExprContext(node->hashcontext); /* Rebuild an empty hash table */ build_hash_table(node); node->table_filled = false; + /* iterator will be reset when the table is filled */ } - else + + if (node->aggstrategy != AGG_HASHED) { /* * Reset the per-group state (in particular, mark transvalues null) @@ -3622,8 +3951,8 @@ ExecReScanAgg(AggState *node) MemSet(node->pergroup, 0, sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets); - /* reset to phase 0 */ - initialize_phase(node, 0); + /* reset to phase 1 */ + initialize_phase(node, 1); node->input_done = false; node->projected_set = -1; @@ -3664,7 +3993,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) if (aggcontext) { AggState *aggstate = ((AggState *) fcinfo->context); - ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; + ExprContext *cxt = aggstate->curaggcontext; *aggcontext = cxt->ecxt_per_tuple_memory; } @@ -3753,7 +4082,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo, if (fcinfo->context && IsA(fcinfo->context, AggState)) { AggState *aggstate = (AggState *) fcinfo->context; - ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; + ExprContext *cxt = aggstate->curaggcontext; RegisterExprContextCallback(cxt, func, arg); diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile index 2d2ba84..f222c6c 100644 --- a/src/backend/lib/Makefile +++ b/src/backend/lib/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/lib top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \ - rbtree.o stringinfo.o +OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \ + pairingheap.o rbtree.o stringinfo.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c new file mode 100644 index 0000000..54301b9 --- /dev/null +++ b/src/backend/lib/knapsack.c @@ -0,0 +1,112 @@ +/*------------------------------------------------------------------------- + * + * knapsack.c + * Knapsack problem solver + * + * Given input vectors of integral item weights (must be >= 0) and values + * (double >= 0), compute the set of items which produces the greatest total + * value without exceeding a specified total weight; each item is included at + * most once (this is the 0/1 knapsack problem). Weight 0 items will always be + * included. + * + * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the + * weight limit. To use with non-integral weights or approximate solutions, + * the caller should pre-scale the input weights to a suitable range. This + * allows approximate solutions in polynomial time (the general case of the + * exact problem is NP-hard). + * + * Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/knapsack.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> +#include <limits.h> + +#include "lib/knapsack.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/palloc.h" + +/* + * DiscreteKnapsack + * + * The item_values input is optional; if omitted, all the items are assumed to + * have value 1. + * + * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for + * inclusion in the solution. + * + * This uses the usual dynamic-programming algorithm, adapted to reuse the + * memory on each pass (by working from larger weights to smaller). At the + * start of pass number i, the values[w] array contains the largest value + * computed with total weight <= w, using only items with indices < i; and + * sets[w] contains the bitmap of items actually used for that value. (The + * bitmapsets are all pre-initialized with an unused high bit so that memory + * allocation is done only once.) + */ +Bitmapset * +DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values) +{ + MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext, + "Knapsack", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + MemoryContext oldctx = MemoryContextSwitchTo(local_ctx); + double *values; + Bitmapset **sets; + Bitmapset *result; + int i,j; + + Assert(max_weight >= 0); + Assert(num_items > 0 && item_weights); + + values = palloc((1 + max_weight) * sizeof(double)); + sets = palloc((1 + max_weight) * sizeof(Bitmapset *)); + + for (i = 0; i <= max_weight; ++i) + { + values[i] = 0; + sets[i] = bms_make_singleton(num_items); + } + + for (i = 0; i < num_items; ++i) + { + int iw = item_weights[i]; + double iv = item_values ? item_values[i] : 1; + + for (j = max_weight; j >= 0; --j) + { + int ow = j - iw; + if (iw <= j && values[j] <= values[ow] + iv) + { + /* copy sets[ow] to sets[j] without realloc */ + if (j != ow) + { + sets[j] = bms_del_members(sets[j], sets[j]); + sets[j] = bms_add_members(sets[j], sets[ow]); + } + + sets[j] = bms_add_member(sets[j], i); + + values[j] = values[ow] + iv; + } + } + } + + MemoryContextSwitchTo(oldctx); + + result = bms_del_member(bms_copy(sets[max_weight]), num_items); + + MemoryContextDelete(local_ctx); + + return result; +} diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 252af5c..16a357e 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -21,7 +21,7 @@ #include "postgres.h" #include "access/hash.h" - +#include "nodes/pg_list.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) @@ -458,6 +458,36 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b) } /* + * bms_overlap_list - does a set overlap an integer list? + */ +bool +bms_overlap_list(const Bitmapset *a, const List *b) +{ + ListCell *lc; + int wordnum, + bitnum; + + if (a == NULL || b == NIL) + return false; + + foreach(lc, b) + { + int x = lfirst_int(lc); + + /* XXX better to just return false for x<0 ? */ + if (x < 0) + elog(ERROR, "negative bitmapset member not allowed"); + wordnum = WORDNUM(x); + bitnum = BITNUM(x); + if (wordnum < a->nwords) + if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0) + return true; + } + + return false; +} + +/* * bms_nonempty_difference - do sets have a nonempty difference? */ bool diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b3802b4..59a7044 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1875,6 +1875,28 @@ _outAggPath(StringInfo str, const AggPath *node) } static void +_outRollupData(StringInfo str, const RollupData *node) +{ + WRITE_NODE_TYPE("ROLLUP"); + + WRITE_NODE_FIELD(groupClause); + WRITE_NODE_FIELD(gsets); + WRITE_NODE_FIELD(gsets_data); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); + WRITE_BOOL_FIELD(hashable); + WRITE_BOOL_FIELD(is_hashed); +} + +static void +_outGroupingSetData(StringInfo str, const GroupingSetData *node) +{ + WRITE_NODE_TYPE("GSDATA"); + + WRITE_NODE_FIELD(set); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); +} + +static void _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) { WRITE_NODE_TYPE("GROUPINGSETSPATH"); @@ -1882,8 +1904,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) _outPathInfo(str, (const Path *) node); WRITE_NODE_FIELD(subpath); - WRITE_NODE_FIELD(rollup_groupclauses); - WRITE_NODE_FIELD(rollup_lists); + WRITE_ENUM_FIELD(aggstrategy, AggStrategy); + WRITE_NODE_FIELD(rollups); WRITE_NODE_FIELD(qual); } @@ -3800,6 +3822,12 @@ outNode(StringInfo str, const void *obj) case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; + case T_RollupData: + _outRollupData(str, obj); + break; + case T_GroupingSetData: + _outGroupingSetData(str, obj); + break; case T_ExtensibleNode: _outExtensibleNode(str, obj); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index c138f57..937e30d 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1739,11 +1739,16 @@ cost_agg(Path *path, PlannerInfo *root, total_cost = startup_cost + cpu_tuple_cost; output_tuples = 1; } - else if (aggstrategy == AGG_SORTED) + else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED) { /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; + if (aggstrategy == AGG_MIXED && !enable_hashagg) + { + startup_cost += disable_cost; + total_cost += disable_cost; + } /* calcs phrased this way to match HASHED case, see note above */ total_cost += aggcosts->transCost.startup; total_cost += aggcosts->transCost.per_tuple * input_tuples; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1e953b4..7137244 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1707,18 +1707,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) { Agg *plan; Plan *subplan; - List *rollup_groupclauses = best_path->rollup_groupclauses; - List *rollup_lists = best_path->rollup_lists; + List *rollups = best_path->rollups; AttrNumber *grouping_map; int maxref; List *chain; - ListCell *lc, - *lc2; + ListCell *lc; /* Shouldn't get here without grouping sets */ Assert(root->parse->groupingSets); - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); + Assert(rollups != NIL); /* * Agg can project, so no need to be terribly picky about child tlist, but @@ -1770,72 +1767,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) * costs will be shown by EXPLAIN. */ chain = NIL; - if (list_length(rollup_groupclauses) > 1) + if (list_length(rollups) > 1) { - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + ListCell *lc2 = lnext(list_head(rollups)); + bool is_first_sort = ((RollupData *)linitial(rollups))->is_hashed; + + for_each_cell(lc, lc2) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); + RollupData *rollup = lfirst(lc); AttrNumber *new_grpColIdx; - Plan *sort_plan; + Plan *sort_plan = NULL; Plan *agg_plan; + AggStrategy strat; - /* We want to iterate over all but the last rollup list elements */ - if (lnext(lc) == NULL) - break; + new_grpColIdx = remap_groupColIdx(root, rollup->groupClause); + + if (!rollup->is_hashed && !is_first_sort) + { + sort_plan = (Plan *) + make_sort_from_groupcols(rollup->groupClause, + new_grpColIdx, + subplan); + } - new_grpColIdx = remap_groupColIdx(root, groupClause); + if (!rollup->is_hashed) + is_first_sort = false; - sort_plan = (Plan *) - make_sort_from_groupcols(groupClause, - new_grpColIdx, - subplan); + if (rollup->is_hashed) + strat = AGG_HASHED; + else if (list_length(linitial(rollup->gsets)) == 0) + strat = AGG_PLAIN; + else + strat = AGG_SORTED; agg_plan = (Plan *) make_agg(NIL, NIL, - AGG_SORTED, + strat, AGGSPLIT_SIMPLE, - list_length((List *) linitial(gsets)), + list_length((List *) linitial(rollup->gsets)), new_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, NIL, - 0, /* numGroups not needed */ + rollup->numGroups, sort_plan); /* * Nuke stuff we don't need to avoid bloating debug output. */ - sort_plan->targetlist = NIL; - sort_plan->lefttree = NULL; + if (sort_plan) + { + sort_plan->targetlist = NIL; + sort_plan->lefttree = NULL; + } chain = lappend(chain, agg_plan); } } /* - * Now make the final Agg node + * Now make the real Agg node */ { - List *groupClause = (List *) llast(rollup_groupclauses); - List *gsets = (List *) llast(rollup_lists); + RollupData *rollup = linitial(rollups); AttrNumber *top_grpColIdx; int numGroupCols; - top_grpColIdx = remap_groupColIdx(root, groupClause); + top_grpColIdx = remap_groupColIdx(root, rollup->groupClause); - numGroupCols = list_length((List *) linitial(gsets)); + numGroupCols = list_length((List *) linitial(rollup->gsets)); plan = make_agg(build_path_tlist(root, &best_path->path), best_path->qual, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, + best_path->aggstrategy, AGGSPLIT_SIMPLE, numGroupCols, top_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, chain, - 0, /* numGroups not needed */ + rollup->numGroups, subplan); /* Copy cost data from Path to Plan */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ca0ae78..c49bd82 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -30,6 +30,7 @@ #include "foreign/fdwapi.h" #include "miscadmin.h" #include "lib/bipartite_match.h" +#include "lib/knapsack.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG @@ -89,12 +90,31 @@ typedef struct List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; +/* + * Data specific to grouping sets + */ + +typedef struct +{ + List *rollups; + List *hash_sets_idx; + double dNumHashGroups; + bool any_hashable; + Bitmapset *unsortable_refs; + Bitmapset *unhashable_refs; + List *unsortable_sets; + int *tleref_to_colnum_map; +} grouping_sets_data; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static void inheritance_planner(PlannerInfo *root); static void grouping_planner(PlannerInfo *root, bool inheritance_update, double tuple_fraction); +static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); +static List *remap_to_groupclause_idx(List *groupClause, List *gsets, + int *tleref_to_colnum_map); static void preprocess_rowmarks(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, @@ -107,8 +127,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, double dNumGroups); @@ -116,8 +135,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); +static void consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups); static RelOptInfo *create_window_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *input_target, @@ -1494,8 +1521,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; - List *rollup_lists = NIL; - List *rollup_groupclauses = NIL; + grouping_sets_data *gset_data = NULL; standard_qp_extra qp_extra; /* A recursive query should always have setOperations */ @@ -1504,84 +1530,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Preprocess grouping sets and GROUP BY clause, if any */ if (parse->groupingSets) { - int *tleref_to_colnum_map; - List *sets; - int maxref; - ListCell *lc; - ListCell *lc2; - ListCell *lc_set; - - parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); - - /* Identify max SortGroupRef in groupClause, for array sizing */ - maxref = 0; - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst(lc); - - if (gc->tleSortGroupRef > maxref) - maxref = gc->tleSortGroupRef; - } - - /* Allocate workspace array for remapping */ - tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); - - /* Examine the rollup sets */ - sets = extract_rollup_sets(parse->groupingSets); - - foreach(lc_set, sets) - { - List *current_sets = (List *) lfirst(lc_set); - List *groupclause; - int ref; - - /* - * Reorder the current list of grouping sets into correct - * prefix order. If only one aggregation pass is needed, try - * to make the list match the ORDER BY clause; if more than - * one pass is needed, we don't bother with that. - */ - current_sets = reorder_grouping_sets(current_sets, - (list_length(sets) == 1 - ? parse->sortClause - : NIL)); - - /* - * Order the groupClause appropriately. If the first grouping - * set is empty, this can match regular GROUP BY - * preprocessing, otherwise we have to force the groupClause - * to match that grouping set's order. - */ - groupclause = preprocess_groupclause(root, - linitial(current_sets)); - - /* - * Now that we've pinned down an order for the groupClause for - * this list of grouping sets, we need to remap the entries in - * the grouping sets from sortgrouprefs to plain indices - * (0-based) into the groupClause for this collection of - * grouping sets. - */ - ref = 0; - foreach(lc, groupclause) - { - SortGroupClause *gc = lfirst(lc); - - tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; - } - - foreach(lc, current_sets) - { - foreach(lc2, (List *) lfirst(lc)) - { - lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)]; - } - } - - /* Save the reordered sets and corresponding groupclauses */ - rollup_lists = lcons(current_sets, rollup_lists); - rollup_groupclauses = lcons(groupclause, rollup_groupclauses); - } + gset_data = preprocess_grouping_sets(root); } else { @@ -1675,8 +1624,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = - parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; + qp_extra.groupClause = (gset_data + ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL) + : parse->groupClause); /* * Generate the best unsorted and presorted paths for the scan/join @@ -1876,8 +1826,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel, grouping_target, &agg_costs, - rollup_lists, - rollup_groupclauses); + gset_data); /* Fix things up if grouping_target contains SRFs */ if (parse->hasTargetSRFs) adjust_paths_for_srfs(root, current_rel, @@ -1914,7 +1863,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel = create_distinct_paths(root, current_rel); } - } /* end of if (setOperations) */ /* @@ -2067,6 +2015,213 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, } +static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{ + Query *parse = root->parse; + List *sets; + int maxref = 0; + ListCell *lc; + ListCell *lc_set; + grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); + + parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); + + gd->any_hashable = false; + gd->unhashable_refs = NULL; + gd->unsortable_refs = NULL; + gd->unsortable_sets = NIL; + + if (parse->groupClause) + { + ListCell *lc; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst(lc); + Index ref = gc->tleSortGroupRef; + + if (ref > maxref) + maxref = ref; + + if (!gc->hashable) + gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref); + + if (!OidIsValid(gc->sortop)) + gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref); + } + } + + /* Allocate workspace array for remapping */ + gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); + + /* + * If we have any unsortable sets, we must extract them before trying to + * prepare rollups. Unsortable sets don't go through reorder_grouping_sets, + * so we must apply the GroupingSetData annotation here. + */ + if (!bms_is_empty(gd->unsortable_refs)) + { + List *sortable_sets = NIL; + + foreach(lc, parse->groupingSets) + { + List *gset = lfirst(lc); + + if (bms_overlap_list(gd->unsortable_refs, gset)) + { + GroupingSetData *gs = makeNode(GroupingSetData); + gs->set = gset; + gd->unsortable_sets = lappend(gd->unsortable_sets, gs); + + /* + * We must enforce here that an unsortable set is hashable; + * later code assumes this. Parse analysis only checks that + * every individual column is either hashable or sortable. + * + * Note that passing this test doesn't guarantee we can + * generate a plan; there might be other showstoppers. + */ + + if (bms_overlap_list(gd->unhashable_refs, gset)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + } + else + sortable_sets = lappend(sortable_sets, gset); + } + + if (sortable_sets) + sets = extract_rollup_sets(sortable_sets); + else + sets = NIL; + } + else + sets = extract_rollup_sets(parse->groupingSets); + + foreach(lc_set, sets) + { + List *current_sets = (List *) lfirst(lc_set); + RollupData *rollup = makeNode(RollupData); + GroupingSetData *gs; + + /* + * Reorder the current list of grouping sets into correct + * prefix order. If only one aggregation pass is needed, try + * to make the list match the ORDER BY clause; if more than + * one pass is needed, we don't bother with that. + * + * Note that this reorders the sets from smallest-member-first + * to largest-member-first, and applies the GroupingSetData + * annotations, though the data will be filled in later. + */ + current_sets = reorder_grouping_sets(current_sets, + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + + /* + * Get the initial (and therefore largest) grouping set. + */ + gs = linitial(current_sets); + + /* + * Order the groupClause appropriately. If the first grouping set is + * empty, then the groupClause must also be empty; otherwise we have to + * force the groupClause to match that grouping set's order. + * + * (The first grouping set can be empty even though parse->groupClause + * is not empty only if all non-empty grouping sets are unsortable. The + * groupClauses for hashed grouping sets are built later on.) + */ + if (gs->set) + rollup->groupClause = preprocess_groupclause(root, gs->set); + else + rollup->groupClause = NIL; + + /* + * is it hashable? we pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets. + */ + if (gs->set && + !bms_overlap_list(gd->unhashable_refs, gs->set)) + { + rollup->hashable = true; + gd->any_hashable = true; + } + + /* + * Now that we've pinned down an order for the groupClause for this + * list of grouping sets, we need to remap the entries in the grouping + * sets from sortgrouprefs to plain indices (0-based) into the + * groupClause for this collection of grouping sets. We keep the + * original form for later use, though. + */ + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + current_sets, + gd->tleref_to_colnum_map); + rollup->gsets_data = current_sets; + + gd->rollups = lappend(gd->rollups, rollup); + } + + if (gd->unsortable_sets) + { + /* + * we have not yet pinned down a groupclause for this, but we will need + * index-based lists for estimation purposes. Construct hash_sets_idx + * based on the entire original groupclause for now. + */ + gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause, + gd->unsortable_sets, + gd->tleref_to_colnum_map); + gd->any_hashable = true; + } + + return gd; +} + +/* + * Given a groupclause and a list of GroupingSetData, return equivalent sets + * (without annotation) mapped to indexes into the given groupclause. + */ +static List * +remap_to_groupclause_idx(List *groupClause, + List *gsets, + int *tleref_to_colnum_map) +{ + int ref = 0; + List *result = NIL; + ListCell *lc; + + foreach(lc, groupClause) + { + SortGroupClause *gc = lfirst(lc); + tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; + } + + foreach(lc, gsets) + { + List *set = NIL; + ListCell *lc2; + GroupingSetData *gs = lfirst(lc); + + foreach(lc2, gs->set) + { + set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]); + } + + result = lappend(result, set); + } + + return result; +} + + + /* * Detect whether a plan node is a "dummy" plan created when a relation * is deemed not to need scanning due to constraint exclusion. @@ -2981,7 +3136,7 @@ extract_rollup_sets(List *groupingSets) /* * Reorder the elements of a list of grouping sets such that they have correct - * prefix relationships. + * prefix relationships. Also inserts the GroupingSetData annotations. * * The input must be ordered with smallest sets first; the result is returned * with largest sets first. Note that the result shares no list substructure @@ -3004,6 +3159,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) { List *candidate = lfirst(lc); List *new_elems = list_difference_int(candidate, previous); + GroupingSetData *gs = makeNode(GroupingSetData); if (list_length(new_elems) > 0) { @@ -3031,7 +3187,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) } } - result = lcons(list_copy(previous), result); + gs->set = list_copy(previous); + result = lcons(gs, result); list_free(new_elems); } @@ -3126,15 +3283,16 @@ standard_qp_callback(PlannerInfo *root, void *extra) * Estimate number of groups produced by grouping clauses (1 if not grouping) * * path_rows: number of output rows from scan/join step - * rollup_lists: list of grouping sets, or NIL if not doing grouping sets - * rollup_groupclauses: list of grouping clauses for grouping sets, - * or NIL if not doing grouping sets + * gsets: grouping set data, or NULL if not doing grouping sets + * + * If doing grouping sets, we also annotate the gsets data with the estimates + * for each set and each individual rollup list, with a view to later + * determining whether some combination of them could be hashed instead. */ static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; double dNumGroups; @@ -3146,28 +3304,58 @@ get_number_of_groups(PlannerInfo *root, if (parse->groupingSets) { /* Add up the estimates for each grouping set */ - ListCell *lc, - *lc2; + ListCell *lc; + ListCell *lc2; dNumGroups = 0; - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + + foreach(lc, gd->rollups) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); - ListCell *lc3; + RollupData *rollup = lfirst(lc); + ListCell *lc; - groupExprs = get_sortgrouplist_exprs(groupClause, + groupExprs = get_sortgrouplist_exprs(rollup->groupClause, parse->targetList); - foreach(lc3, gsets) + rollup->numGroups = 0.0; + + forboth(lc, rollup->gsets, lc2, rollup->gsets_data) { - List *gset = (List *) lfirst(lc3); + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + gs->numGroups = numGroups; + rollup->numGroups += numGroups; + } + + dNumGroups += rollup->numGroups; + } - dNumGroups += estimate_num_groups(root, - groupExprs, - path_rows, - &gset); + if (gd->hash_sets_idx) + { + ListCell *lc; + + gd->dNumHashGroups = 0; + + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets) + { + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + gs->numGroups = numGroups; + gd->dNumHashGroups += numGroups; } + + dNumGroups += gd->dNumHashGroups; } } else @@ -3203,6 +3391,11 @@ get_number_of_groups(PlannerInfo *root, * estimate_hashagg_tablesize * estimate the number of bytes that a hash aggregate hashtable will * require based on the agg_costs, path width and dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? */ static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, @@ -3253,8 +3446,7 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; @@ -3362,8 +3554,7 @@ create_grouping_paths(PlannerInfo *root, */ dNumGroups = get_number_of_groups(root, cheapest_path->rows, - rollup_lists, - rollup_groupclauses); + gd); /* * Determine whether it's possible to perform sort-based implementations @@ -3371,15 +3562,22 @@ create_grouping_paths(PlannerInfo *root, * grouping_is_sortable() is trivially true, and all the * pathkeys_contained_in() tests will succeed too, so that we'll consider * every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all of + * them; in this case, we need can_sort to be true as long as we must + * consider any sorted-input plan. */ - can_sort = grouping_is_sortable(parse->groupClause); + can_sort = (gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause); /* * Determine whether we should consider hash-based implementations of * grouping. * - * Hashed aggregation only applies if we're grouping. We currently can't - * hash if there are grouping sets, though. + * Hashed aggregation only applies if we're grouping. If we have grouping + * sets, some groups might be hashable but others not; in this case we set + * can_hash true as long as there is nothing globally preventing us from + * hashing (and we should therefore consider plans with hashes). * * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY * aggregates. (Doing so would imply storing *all* the input values in @@ -3392,9 +3590,8 @@ create_grouping_paths(PlannerInfo *root, * other gating conditions, so we want to do it last. */ can_hash = (parse->groupClause != NIL && - parse->groupingSets == NIL && agg_costs->numOrderedAggs == 0 && - grouping_is_hashable(parse->groupClause)); + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); /* * If grouped_rel->consider_parallel is true, then paths that we generate @@ -3460,8 +3657,7 @@ create_grouping_paths(PlannerInfo *root, /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, cheapest_partial_path->rows, - NIL, - NIL); + gd); /* * Collect statistics about aggregates for estimating costs of @@ -3594,20 +3790,9 @@ create_grouping_paths(PlannerInfo *root, /* Now decide what to stick atop it */ if (parse->groupingSets) { - /* - * We have grouping sets, possibly with aggregation. Make - * a GroupingSetsPath. - */ - add_path(grouped_rel, (Path *) - create_groupingsets_path(root, - grouped_rel, - path, - target, - (List *) parse->havingQual, - rollup_lists, - rollup_groupclauses, - agg_costs, - dNumGroups)); + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, target, + gd, agg_costs, dNumGroups); } else if (parse->hasAggs) { @@ -3705,33 +3890,45 @@ create_grouping_paths(PlannerInfo *root, if (can_hash) { - hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, - agg_costs, - dNumGroups); - - /* - * Provided that the estimated size of the hashtable does not exceed - * work_mem, we'll generate a HashAgg Path, although if we were unable - * to sort above, then we'd better generate a Path, so that we at - * least have one. - */ - if (hashaggtablesize < work_mem * 1024L || - grouped_rel->pathlist == NIL) + if (parse->groupingSets) { /* - * We just need an Agg over the cheapest-total input path, since - * input order won't matter. + * Try for a hash-only groupingsets path over unsorted input. */ - add_path(grouped_rel, (Path *) - create_agg_path(root, grouped_rel, - cheapest_path, - target, - AGG_HASHED, - AGGSPLIT_SIMPLE, - parse->groupClause, - (List *) parse->havingQual, - agg_costs, - dNumGroups)); + consider_groupingsets_paths(root, grouped_rel, + cheapest_path, false, true, target, + gd, agg_costs, dNumGroups); + } + else + { + hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, + agg_costs, + dNumGroups); + + /* + * Provided that the estimated size of the hashtable does not exceed + * work_mem, we'll generate a HashAgg Path, although if we were unable + * to sort above, then we'd better generate a Path, so that we at + * least have one. + */ + if (hashaggtablesize < work_mem * 1024L || + grouped_rel->pathlist == NIL) + { + /* + * We just need an Agg over the cheapest-total input path, since + * input order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + cheapest_path, + target, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->groupClause, + (List *) parse->havingQual, + agg_costs, + dNumGroups)); + } } /* @@ -3800,6 +3997,310 @@ create_grouping_paths(PlannerInfo *root, return grouped_rel; } + +/* + * For a given input path, consider the possible ways of doing grouping sets on + * it, by combinations of hashing and sorting. This can be called multiple + * times, so it's important that it not scribble on input. No result is + * returned, but any generated paths are added to grouped_rel. + */ + +static void +consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashable_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashable_rollup = lfirst(l_start); + exclude_groups = unhashable_rollup->numGroups; + l_start = lnext(l_start); + } + + hashsize = estimate_hashagg_tablesize(path, + agg_costs, + dNumGroups - exclude_groups); + + if (hashsize > work_mem * 1024L && gd->rollups) + return; /* nope, won't fit */ + + /* + * We need to burst the existing rollups list into individual grouping + * sets and recompute a groupClause for each set. + */ + sets_data = list_copy(gd->unsortable_sets); + + for_each_cell(lc, l_start) + { + RollupData *rollup = lfirst(lc); + + /* + * If we find an unhashable rollup that's not been skipped by the + * "actually sorted" check above, we can't cope; we'd need sorted + * input (with a different sort order) but we can't get that here. + * So bail out; we'll get a valid path from the is_sorted case + * instead. + */ + if (!rollup->hashable) + return; + else + sets_data = list_concat(sets_data, list_copy(rollup->gsets_data)); + } + foreach(lc, sets_data) + { + GroupingSetData *gs = lfirst(lc); + List *gset = gs->set; + RollupData *rollup; + + if (gset == NIL) + { + empty_sets_data = lappend(empty_sets_data, gs); + empty_sets = lappend(empty_sets, NIL); + } + else + { + rollup = makeNode(RollupData); + + rollup->groupClause = preprocess_groupclause(root, gset); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + new_rollups = lappend(new_rollups, rollup); + } + } + + /* If we didn't find anything nonempty to hash, then bail. */ + if (new_rollups == NIL) + return; + + /* + * If there were empty grouping sets they should have been in the first + * rollup. + */ + Assert(!unhashable_rollup || !empty_sets); + + if (unhashable_rollup) + { + new_rollups = lappend(new_rollups, unhashable_rollup); + strat = AGG_MIXED; + } + else if (empty_sets) + { + RollupData *rollup = makeNode(RollupData); + + rollup->groupClause = NIL; + rollup->gsets_data = empty_sets_data; + rollup->gsets = empty_sets; + rollup->numGroups = list_length(empty_sets); + rollup->hashable = false; + rollup->is_hashed = false; + new_rollups = lappend(new_rollups, rollup); + strat = AGG_MIXED; + } + + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + strat, + new_rollups, + agg_costs, + dNumGroups)); + return; + } + + /* + * If we have sorted input but nothing we can do with it, bail. + */ + + if (list_length(gd->rollups) == 0) + return; + + /* + * Given sorted input, we try and make two paths: one sorted and one mixed + * sort/hash. (We need to try both because hashagg might be disabled, or + * some columns might not be sortable.) + * + * can_hash is passed in as false if some obstacle elsewhere (such as + * ordered aggs) means that we shouldn't consider hashing at all. + */ + + if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0); + ListCell *lc; + + /* + * Account first for space needed for groups we can't sort at all. + */ + availspace -= (double) estimate_hashagg_tablesize(path, agg_costs, gd->dNumHashGroups); + + if (availspace > 0 && list_length(gd->rollups) > 1) + { + double scale; + int k_capacity; + int *k_weights = palloc(list_length(gd->rollups) * sizeof(int)); + Bitmapset *hash_items = NULL; + int i; + + /* + * To use the discrete knapsack, we need to scale the values to a + * reasonably small bounded range. We choose to allow a 5% error + * margin; we have no more than 4096 rollups in the worst possible + * case, which with a 5% error margin will require a bit over 42MB + * of workspace. (Anyone wanting to plan queries that complex had + * better have the memory for it. In more reasonable cases, with + * no more than a couple of dozen rollups, the memory usage will be + * negligible.) + */ + scale = availspace / (20.0 * list_length(gd->rollups)); + k_capacity = (int) floor(availspace / scale); + + /* + * We leave the first rollup out of consideration since it's the + * one that matches the input sort order. We assign indexes "i" to + * only those entries considered for hashing; the second loop, + * below, must use the same condition. + */ + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + if (rollup->hashable) + { + double sz = estimate_hashagg_tablesize(path, + agg_costs, + rollup->numGroups); + k_weights[i] = (int) floor(sz / scale); + ++i; + } + } + + /* + * Apply knapsack algorithm; compute the set of items which + * maximizes the value stored (in this case the number of sorts + * saved) while keeping the total size (approximately) within + * capacity. + */ + if (i > 0) + hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL); + + if (!bms_is_empty(hash_items)) + { + rollups = list_make1(linitial(gd->rollups)); + + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + if (rollup->hashable) + { + if (bms_is_member(i, hash_items)) + hash_sets = list_concat(hash_sets, list_copy(rollup->gsets_data)); + else + rollups = lappend(rollups, rollup); + ++i; + } + else + rollups = lappend(rollups, rollup); + } + } + } + + if (!rollups && hash_sets) + rollups = list_copy(gd->rollups); + + foreach(lc, hash_sets) + { + GroupingSetData *gs = lfirst(lc); + RollupData *rollup = makeNode(RollupData); + + Assert(gs->set != NIL); + + rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + rollups = lcons(rollup, rollups); + } + + if (rollups) + { + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_MIXED, + rollups, + agg_costs, + dNumGroups)); + } + } + + /* + * Now try the simple sorted case. + */ + if (!gd->unsortable_sets) + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_SORTED, + gd->rollups, + agg_costs, + dNumGroups)); +} + /* * create_window_paths * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3248296..67590b8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2607,10 +2607,9 @@ create_agg_path(PlannerInfo *root, * 'subpath' is the path representing the source of data * 'target' is the PathTarget to be computed * 'having_qual' is the HAVING quals if any - * 'rollup_lists' is a list of grouping sets - * 'rollup_groupclauses' is a list of grouping clauses for grouping sets + * 'rollups' is a list of RollupData nodes * 'agg_costs' contains cost info about the aggregate functions to be computed - * 'numGroups' is the estimated number of groups + * 'numGroups' is the estimated total number of groups */ GroupingSetsPath * create_groupingsets_path(PlannerInfo *root, @@ -2618,13 +2617,15 @@ create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups) { GroupingSetsPath *pathnode = makeNode(GroupingSetsPath); - int numGroupCols; + ListCell *lc; + bool is_first = true; + bool is_first_sort = true; /* The topmost generated Plan node will be an Agg */ pathnode->path.pathtype = T_Agg; @@ -2638,74 +2639,107 @@ create_groupingsets_path(PlannerInfo *root, pathnode->subpath = subpath; /* + * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, + * and AGG_MIXED to AGG_HASHED, here if possible. + */ + if (aggstrategy == AGG_SORTED && + list_length(rollups) == 1 && + ((RollupData *) linitial(rollups))->groupClause == NIL) + aggstrategy = AGG_PLAIN; + + if (aggstrategy == AGG_MIXED && + list_length(rollups) == 1) + aggstrategy = AGG_HASHED; + + /* * Output will be in sorted order by group_pathkeys if, and only if, there * is a single rollup operation on a non-empty list of grouping * expressions. */ - if (list_length(rollup_groupclauses) == 1 && - ((List *) linitial(rollup_groupclauses)) != NIL) + if (aggstrategy == AGG_SORTED && list_length(rollups) == 1) pathnode->path.pathkeys = root->group_pathkeys; else pathnode->path.pathkeys = NIL; - pathnode->rollup_groupclauses = rollup_groupclauses; - pathnode->rollup_lists = rollup_lists; + pathnode->aggstrategy = aggstrategy; + pathnode->rollups = rollups; pathnode->qual = having_qual; - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); - - /* Account for cost of the topmost Agg node */ - numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists))); - - cost_agg(&pathnode->path, root, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, - agg_costs, - numGroupCols, - numGroups, - subpath->startup_cost, - subpath->total_cost, - subpath->rows); + Assert(rollups != NIL); + Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1); + Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1); - /* - * Add in the costs and output rows of the additional sorting/aggregation - * steps, if any. Only total costs count, since the extra sorts aren't - * run on startup. - */ - if (list_length(rollup_lists) > 1) + foreach(lc, rollups) { - ListCell *lc; + RollupData *rollup = lfirst(lc); + List *gsets = rollup->gsets; + int numGroupCols = list_length(linitial(gsets)); + + /* + * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the + * (already-sorted) input, and following ones do their own sort. + * + * In AGG_HASHED mode, there is one rollup for each grouping set. + * + * In AGG_MIXED mode, the first rollups are hashed, the first + * non-hashed one takes the (already-sorted) input, and + * following ones do their own sort. + */ - foreach(lc, rollup_lists) + if (is_first) + { + cost_agg(&pathnode->path, root, + aggstrategy, + agg_costs, + numGroupCols, + rollup->numGroups, + subpath->startup_cost, + subpath->total_cost, + subpath->rows); + is_first = false; + if (!rollup->is_hashed) + is_first_sort = false; + } + else { - List *gsets = (List *) lfirst(lc); Path sort_path; /* dummy for result of cost_sort */ Path agg_path; /* dummy for result of cost_agg */ - /* We must iterate over all but the last rollup_lists element */ - if (lnext(lc) == NULL) - break; - - /* Account for cost of sort, but don't charge input cost again */ - cost_sort(&sort_path, root, NIL, - 0.0, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - -1.0); - - /* Account for cost of aggregation */ - numGroupCols = list_length((List *) linitial(gsets)); - - cost_agg(&agg_path, root, - AGG_SORTED, - agg_costs, - numGroupCols, - numGroups, /* XXX surely not right for all steps? */ - sort_path.startup_cost, - sort_path.total_cost, - sort_path.rows); + if (rollup->is_hashed || is_first_sort) + { + /* Account for cost of aggregation, but don't charge input cost again */ + cost_agg(&agg_path, root, + rollup->is_hashed ? AGG_HASHED : AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + 0.0, 0.0, + subpath->rows); + if (!rollup->is_hashed) + is_first_sort = false; + } + else + { + /* Account for cost of sort, but don't charge input cost again */ + cost_sort(&sort_path, root, NIL, + 0.0, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* Account for cost of aggregation */ + + cost_agg(&agg_path, root, + AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + sort_path.startup_cost, + sort_path.total_cost, + sort_path.rows); + } pathnode->path.total_cost += agg_path.total_cost; pathnode->path.rows += agg_path.rows; diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h new file mode 100644 index 0000000..1d5b3b8 --- /dev/null +++ b/src/include/lib/knapsack.h @@ -0,0 +1,17 @@ +/* + * knapsack.h + * + * Copyright (c) 2017, PostgreSQL Global Development Group + * + * src/include/lib/knapsack.h + */ +#ifndef KNAPSACK_H +#define KNAPSACK_H + +#include "postgres.h" +#include "nodes/bitmapset.h" + +extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values); + +#endif /* KNAPSACK_H */ diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 4f1910e..109f7b0 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -21,6 +21,11 @@ #define BITMAPSET_H /* + * Forward decl to save including pg_list.h + */ +struct List; + +/* * Data representation */ @@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b); +extern bool bms_overlap_list(const Bitmapset *a, const struct List *b); extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b); extern int bms_singleton_member(const Bitmapset *a); extern bool bms_get_singleton_member(const Bitmapset *a, int *member); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 6332ea0..4314447 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1861,6 +1861,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg; typedef struct AggStatePerTransData *AggStatePerTrans; typedef struct AggStatePerGroupData *AggStatePerGroup; typedef struct AggStatePerPhaseData *AggStatePerPhase; +typedef struct AggStatePerHashData *AggStatePerHash; typedef struct AggState { @@ -1868,15 +1869,17 @@ typedef struct AggState List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ int numtrans; /* number of pertrans items */ + AggStrategy aggstrategy; /* strategy mode */ AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ AggStatePerPhase phase; /* pointer to current phase data */ - int numphases; /* number of phases */ + int numphases; /* number of phases (including phase 0) */ int current_phase; /* current phase number */ - FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ AggStatePerTrans pertrans; /* per-Trans state information */ + ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */ ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ + ExprContext *curaggcontext; /* currently active aggcontext */ AggStatePerTrans curpertrans; /* currently active trans state */ bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ @@ -1888,21 +1891,17 @@ typedef struct AggState /* These fields are for grouping set phase data */ int maxsets; /* The max number of sets in any phase */ AggStatePerPhase phases; /* array of all phases */ - Tuplesortstate *sort_in; /* sorted input to phases > 0 */ + Tuplesortstate *sort_in; /* sorted input to phases > 1 */ Tuplesortstate *sort_out; /* input is copied here for next phase */ TupleTableSlot *sort_slot; /* slot for sort results */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ - /* these fields are used in AGG_HASHED mode: */ - TupleHashTable hashtable; /* hash table with one entry per group */ - TupleTableSlot *hashslot; /* slot for loading hash table */ - int numhashGrpCols; /* number of columns in hash table */ - int largestGrpColIdx; /* largest column required for hashing */ - AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */ - AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */ + /* these fields are used in AGG_HASHED and AGG_MIXED modes: */ bool table_filled; /* hash table filled yet? */ - TupleHashIterator hashiter; /* for iterating through hash table */ + int num_hashes; + AggStatePerHash perhash; + AggStatePerGroup *hash_pergroup; /* array of per-group pointers */ /* support for evaluation of agg inputs */ TupleTableSlot *evalslot; /* slot for agg inputs */ ProjectionInfo *evalproj; /* projection machinery */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 95dd8ba..111fad1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -272,6 +272,8 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_RollupData, + T_GroupingSetData, /* * TAGS FOR MEMORY NODES (memnodes.h) @@ -728,7 +730,8 @@ typedef enum AggStrategy { AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ - AGG_HASHED /* grouped agg, use internal hashtable */ + AGG_HASHED, /* grouped agg, use internal hashtable */ + AGG_MIXED /* grouped agg, hash and sort concurrently */ } AggStrategy; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f72f7a8..b3a2a03 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -737,7 +737,7 @@ typedef struct Agg Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ - /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ + /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ } Agg; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f7ac6f6..4d0791d 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1378,17 +1378,37 @@ typedef struct AggPath } AggPath; /* + * Various annotations used for grouping sets in the planner. + */ + +typedef struct GroupingSetData +{ + NodeTag type; + List *set; /* grouping set as list of sortgrouprefs */ + double numGroups; /* est. number of result groups */ +} GroupingSetData; + +typedef struct RollupData +{ + NodeTag type; + List *groupClause; /* applicable subset of parse->groupClause */ + List *gsets; /* lists of integer indexes into groupClause */ + List *gsets_data; /* list of GroupingSetData */ + double numGroups; /* est. number of result groups */ + bool hashable; /* can be hashed */ + bool is_hashed; /* to be implemented as a hashagg */ +} RollupData; + +/* * GroupingSetsPath represents a GROUPING SETS aggregation - * - * Currently we only support this in sorted not hashed form, so the input - * must always be appropriately presorted. */ + typedef struct GroupingSetsPath { Path path; Path *subpath; /* path representing input source */ - List *rollup_groupclauses; /* list of lists of SortGroupClause's */ - List *rollup_lists; /* parallel list of lists of grouping sets */ + AggStrategy aggstrategy; /* basic strategy */ + List *rollups; /* list of RollupData */ List *qual; /* quals (HAVING quals), if any */ } GroupingSetsPath; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 53cad24..a084ba1 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -181,8 +181,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups); extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root, diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 260ccd5..7c46441 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -13,6 +13,13 @@ copy gstest2 from stdin; create temp table gstest3 (a integer, b integer, c integer, d integer); copy gstest3 from stdin; alter table gstest3 add primary key (a); +create temp table gstest4(id integer, v integer, + unhashable_col bit(4), unsortable_col xid); +insert into gstest4 +values (1,1,b'0000','1'), (2,2,b'0001','1'), + (3,4,b'0010','2'), (4,8,b'0011','2'), + (5,16,b'0000','2'), (6,32,b'0001','2'), + (7,64,b'0010','1'), (8,128,b'0011','1'); create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) returns setof record @@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer) end; $f$ language plpgsql; -- basic functionality +set enable_hashagg = false; -- test hashing explicitly later -- simple rollup with multiple plain aggregates, with and without ordering -- (and with ordering differing from grouping) select a, b, grouping(a,b), sum(v), count(*), max(v) @@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); -- Tests for chained aggregates select a, b, grouping(a,b), sum(v), count(*), max(v) - from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; a | b | grouping | sum | count | max ---+---+----------+-----+-------+----- 1 | 1 | 0 | 21 | 2 | 11 @@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v) 3 | 4 | 0 | 17 | 1 | 17 4 | 1 | 0 | 37 | 2 | 19 | | 3 | 21 | 2 | 11 - | | 3 | 25 | 2 | 13 - | | 3 | 14 | 1 | 14 - | | 3 | 15 | 1 | 15 - | | 3 | 16 | 1 | 16 - | | 3 | 17 | 1 | 17 - | | 3 | 37 | 2 | 19 | | 3 | 21 | 2 | 11 | | 3 | 25 | 2 | 13 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 | | 3 | 14 | 1 | 14 | | 3 | 15 | 1 | 15 + | | 3 | 15 | 1 | 15 | | 3 | 16 | 1 | 16 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 | | 3 | 17 | 1 | 17 | | 3 | 37 | 2 | 19 + | | 3 | 37 | 2 | 19 (21 rows) select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); @@ -847,4 +855,548 @@ select sum(ten) from onek group by rollup(four::text), two order by 1; 2500 (6 rows) +-- hashing support +set enable_hashagg = true; +-- failure cases +select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col); +ERROR: could not implement GROUP BY +DETAIL: Some of the datatypes only support hashing, while others only support sorting. +select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id)); +ERROR: could not implement GROUP BY +DETAIL: Some of the datatypes only support hashing, while others only support sorting. +-- simple cases +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | | 1 | 60 | 5 | 14 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 4 | | 1 | 37 | 2 | 19 + | 1 | 2 | 58 | 4 | 19 + | 2 | 2 | 25 | 2 | 13 + | 3 | 2 | 45 | 3 | 16 + | 4 | 2 | 17 | 1 | 17 +(8 rows) + +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2 + -> HashAggregate + Hash Key: "*VALUES*".column1 + Hash Key: "*VALUES*".column2 + -> Values Scan on "*VALUES*" +(6 rows) + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + 1 | | 1 | 60 | 5 | 14 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 4 | | 1 | 37 | 2 | 19 + | 1 | 2 | 58 | 4 | 19 + | 2 | 2 | 25 | 2 | 13 + | 3 | 2 | 45 | 3 | 16 + | 4 | 2 | 17 | 1 | 17 + | | 3 | 145 | 10 | 19 +(16 rows) + +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2 + -> MixedAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + Hash Key: "*VALUES*".column2 + Group Key: () + -> Values Scan on "*VALUES*" +(8 rows) + +-- shouldn't try and hash +explain (costs off) + select a, b, grouping(a,b), array_agg(v order by v) + from gstest1 group by cube(a,b); + QUERY PLAN +---------------------------------------------------------- + GroupAggregate + Group Key: "*VALUES*".column1, "*VALUES*".column2 + Group Key: "*VALUES*".column1 + Group Key: () + Sort Key: "*VALUES*".column2 + Group Key: "*VALUES*".column2 + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" +(9 rows) + +-- mixed hashable/sortable cases +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3, 5; + unhashable_col | unsortable_col | grouping | count | sum +----------------+----------------+----------+-------+----- + 0000 | | 1 | 2 | 17 + 0001 | | 1 | 2 | 34 + 0010 | | 1 | 2 | 68 + 0011 | | 1 | 2 | 136 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 +(6 rows) + +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v)) + -> MixedAggregate + Hash Key: unsortable_col + Group Key: unhashable_col + -> Sort + Sort Key: unhashable_col + -> Seq Scan on gstest4 +(8 rows) + +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + unhashable_col | unsortable_col | grouping | count | sum +----------------+----------------+----------+-------+----- + 0000 | | 1 | 1 | 1 + 0001 | | 1 | 1 | 2 + 0010 | | 1 | 1 | 4 + 0011 | | 1 | 1 | 8 + 0000 | | 1 | 1 | 16 + 0001 | | 1 | 1 | 32 + 0010 | | 1 | 1 | 64 + 0011 | | 1 | 1 | 128 + | 1 | 2 | 1 | 1 + | 1 | 2 | 1 | 2 + | 2 | 2 | 1 | 4 + | 2 | 2 | 1 | 8 + | 2 | 2 | 1 | 16 + | 2 | 2 | 1 | 32 + | 1 | 2 | 1 | 64 + | 1 | 2 | 1 | 128 +(16 rows) + +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v)) + -> MixedAggregate + Hash Key: v, unsortable_col + Group Key: v, unhashable_col + -> Sort + Sort Key: v, unhashable_col + -> Seq Scan on gstest4 +(8 rows) + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); + a | b | sum | count +---+---+-----+------- +(0 rows) + +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); + QUERY PLAN +-------------------------------- + HashAggregate + Hash Key: a, b + Hash Key: a + -> Seq Scan on gstest_empty +(4 rows) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); + a | b | sum | count +---+---+-----+------- + | | | 0 +(1 row) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); + a | b | sum | count +---+---+-----+------- + | | | 0 + | | | 0 + | | | 0 +(3 rows) + +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); + QUERY PLAN +-------------------------------- + MixedAggregate + Hash Key: a, b + Group Key: () + Group Key: () + Group Key: () + -> Seq Scan on gstest_empty +(6 rows) + +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + sum | count +-----+------- + | 0 + | 0 + | 0 +(3 rows) + +explain (costs off) + select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + QUERY PLAN +-------------------------------- + Aggregate + Group Key: () + Group Key: () + Group Key: () + -> Seq Scan on gstest_empty +(5 rows) + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + a | d | grouping +---+---+---------- + 1 | 1 | 1 + 2 | 2 | 1 + 1 | 1 | 2 + 2 | 2 | 2 +(4 rows) + +explain (costs off) + select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + QUERY PLAN +--------------------------- + HashAggregate + Hash Key: a, b + Hash Key: a, c + -> Seq Scan on gstest3 +(4 rows) + +-- simple rescan tests +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + a | b | sum +---+---+----- + 2 | | 6 + 1 | | 3 + | 2 | 3 + | 3 | 3 + | 1 | 3 +(5 rows) + +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + QUERY PLAN +------------------------------------------ + HashAggregate + Hash Key: gstest_data.a + Hash Key: gstest_data.b + -> Nested Loop + -> Values Scan on "*VALUES*" + -> Function Scan on gstest_data +(6 rows) + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +ERROR: aggregate functions are not allowed in FROM clause of their own query level +LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ... + ^ +explain (costs off) + select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +ERROR: aggregate functions are not allowed in FROM clause of their own query level +LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x... + ^ +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 + | | 3 | 37 | 2 | 19 +(21 rows) + +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; + a | b | sum | rsum +---+---+-----+------ + 1 | 1 | 8 | 8 + 1 | 2 | 2 | 10 + 1 | | 10 | 20 + 2 | 2 | 2 | 22 + 2 | | 2 | 24 + | 1 | 8 | 32 + | 2 | 4 | 36 + | | 12 | 48 +(8 rows) + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + a | b | sum +---+---+----- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | | 3 + 2 | 1 | 2 + 2 | 2 | 2 + 2 | 3 | 2 + 2 | | 6 + | 1 | 3 + | 2 | 3 + | 3 | 3 + | | 9 +(12 rows) + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | 0 | 50 + 1 | 1 | 0 | 2 | 50 + 1 | 1 | 0 | 4 | 50 + 1 | 1 | 0 | 6 | 50 + 1 | 1 | 0 | 8 | 50 + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | 1 | 50 + 1 | 1 | 1 | 3 | 50 + 1 | 1 | 1 | 5 | 50 + 1 | 1 | 1 | 7 | 50 + 1 | 1 | 1 | 9 | 50 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | 0 | 50 + 1 | 1 | 2 | 2 | 50 + 1 | 1 | 2 | 4 | 50 + 1 | 1 | 2 | 6 | 50 + 1 | 1 | 2 | 8 | 50 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | 1 | 50 + 1 | 1 | 3 | 3 | 50 + 1 | 1 | 3 | 5 | 50 + 1 | 1 | 3 | 7 | 50 + 1 | 1 | 3 | 9 | 50 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 1 | 1 | | | 1000 + 2 | 2 | 0 | 0 | 50 + 2 | 2 | 0 | 2 | 50 + 2 | 2 | 0 | 4 | 50 + 2 | 2 | 0 | 6 | 50 + 2 | 2 | 0 | 8 | 50 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | 1 | 50 + 2 | 2 | 1 | 3 | 50 + 2 | 2 | 1 | 5 | 50 + 2 | 2 | 1 | 7 | 50 + 2 | 2 | 1 | 9 | 50 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | 0 | 50 + 2 | 2 | 2 | 2 | 50 + 2 | 2 | 2 | 4 | 50 + 2 | 2 | 2 | 6 | 50 + 2 | 2 | 2 | 8 | 50 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | 1 | 50 + 2 | 2 | 3 | 3 | 50 + 2 | 2 | 3 | 5 | 50 + 2 | 2 | 3 | 7 | 50 + 2 | 2 | 3 | 9 | 50 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 + 2 | 2 | | | 1000 +(70 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +------------------------------------------------------------------------------------------------------------------------------------------------------ + {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"} + {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"} +(2 rows) + +-- Rescan logic changes when there are no empty grouping sets, so test +-- that too: +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 +(28 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +--------------------------------------------------------------------------------- + {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"} + {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"} +(2 rows) + +-- test the knapsack +set work_mem = '64kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Group Key: unique1 + Sort Key: twothousand + Group Key: twothousand + Sort Key: thousand + Group Key: thousand + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(13 rows) + +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Group Key: unique1 + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(9 rows) + +set work_mem = '384kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Hash Key: thousand + Group Key: unique1 + Sort Key: twothousand + Group Key: twothousand + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(12 rows) + -- end diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out index 0eeaf9e..33f370b 100644 --- a/src/test/regress/expected/tsrf.out +++ b/src/test/regress/expected/tsrf.out @@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU (6 rows) -- grouping sets are a bit special, they produce NULLs in columns not actually NULL +set enable_hashagg = false; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); dataa | b | g | count -------+-----+---+------- @@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d b | bar | | 2 b | | | 2 | | | 6 - a | | 1 | 2 - b | | 1 | 1 - | | 1 | 3 - a | | 2 | 2 - b | | 2 | 1 - | | 2 | 3 | bar | 1 | 2 | bar | 2 | 2 | bar | | 4 | foo | 1 | 1 | foo | 2 | 1 | foo | | 2 + a | | 1 | 2 + b | | 1 | 1 + | | 1 | 3 + a | | 2 | 2 + b | | 2 | 1 + | | 2 | 3 (24 rows) SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; dataa | b | g | count -------+-----+---+------- + a | foo | | 2 + a | | | 4 + a | | 2 | 2 a | bar | 1 | 1 a | bar | 2 | 1 a | bar | | 2 a | foo | 1 | 1 a | foo | 2 | 1 - a | foo | | 2 - a | | | 4 a | | 1 | 2 - a | | 2 | 2 - b | bar | 2 | 1 + b | bar | 1 | 1 b | | | 2 b | | 1 | 1 - b | | 2 | 1 - b | bar | 1 | 1 + b | bar | 2 | 1 b | bar | | 2 - | foo | | 2 - | foo | 1 | 1 + b | | 2 | 1 | | 2 | 3 + | | | 6 | bar | 1 | 2 | bar | 2 | 2 - | | | 6 - | foo | 2 | 1 | bar | | 4 + | foo | 1 | 1 + | foo | 2 | 1 + | foo | | 2 | | 1 | 3 (24 rows) @@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d a | bar | 1 | 1 a | foo | 1 | 1 b | bar | 1 | 1 + | bar | 1 | 2 + | foo | 1 | 1 a | | 1 | 2 b | | 1 | 1 | | 1 | 3 - | bar | 1 | 2 - | foo | 1 | 1 - | foo | 2 | 1 - | bar | 2 | 2 a | | 2 | 2 b | | 2 | 1 - a | bar | 2 | 1 + | bar | 2 | 2 | | 2 | 3 + | foo | 2 | 1 + a | bar | 2 | 1 a | foo | 2 | 1 b | bar | 2 | 1 - a | foo | | 2 + a | | | 4 b | bar | | 2 b | | | 2 | | | 6 - a | | | 4 + a | foo | | 2 + a | bar | | 2 | bar | | 4 | foo | | 2 - a | bar | | 2 (24 rows) +reset enable_hashagg; -- data modification CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data; INSERT INTO fewmore VALUES(generate_series(4,5)); diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 71cc0ec..3f425eb 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -31,6 +31,14 @@ copy gstest3 from stdin; \. alter table gstest3 add primary key (a); +create temp table gstest4(id integer, v integer, + unhashable_col bit(4), unsortable_col xid); +insert into gstest4 +values (1,1,b'0000','1'), (2,2,b'0001','1'), + (3,4,b'0010','2'), (4,8,b'0011','2'), + (5,16,b'0000','2'), (6,32,b'0001','2'), + (7,64,b'0010','1'), (8,128,b'0011','1'); + create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) @@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer) -- basic functionality +set enable_hashagg = false; -- test hashing explicitly later + -- simple rollup with multiple plain aggregates, with and without ordering -- (and with ordering differing from grouping) + select a, b, grouping(a,b), sum(v), count(*), max(v) from gstest1 group by rollup (a,b); select a, b, grouping(a,b), sum(v), count(*), max(v) @@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); -- Tests for chained aggregates select a, b, grouping(a,b), sum(v), count(*), max(v) - from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum @@ -224,4 +235,136 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou select sum(ten) from onek group by two, rollup(four::text) order by 1; select sum(ten) from onek group by rollup(four::text), two order by 1; +-- hashing support + +set enable_hashagg = true; + +-- failure cases + +select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col); +select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id)); + +-- simple cases + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + +-- shouldn't try and hash +explain (costs off) + select a, b, grouping(a,b), array_agg(v order by v) + from gstest1 group by cube(a,b); + +-- mixed hashable/sortable cases +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3, 5; +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3,5; + +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); +explain (costs off) + select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); +explain (costs off) + select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + +-- simple rescan tests + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +explain (costs off) + select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; + +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + +-- Rescan logic changes when there are no empty grouping sets, so test +-- that too: +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a); + +-- test the knapsack + +set work_mem = '64kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,hundred,ten,four,two); + +set work_mem = '384kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + -- end diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql index e627bb9..417e78c 100644 --- a/src/test/regress/sql/tsrf.sql +++ b/src/test/regress/sql/tsrf.sql @@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1; -- grouping sets are a bit special, they produce NULLs in columns not actually NULL +set enable_hashagg = false; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g; +reset enable_hashagg; -- data modification CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers