On Fri, 2020-06-05 at 21:11 -0700, Andres Freund wrote: > Hi, > > While preparing my pgcon talk I noticed that our hash-agg performance > degraded noticeably. Looks to me like it's due to the spilling- > hashagg > changes.
Attached a proposed fix. (Might require some minor cleanup). The only awkward part is that LookupTupleHashEntry() needs a new out parameter to pass the hash value back to the caller. Ordinarily, the caller can get that from the returned entry, but if isnew==NULL, then the function might return NULL (and the caller wouldn't have an entry from which to read the hash value). Regards, Jeff Davis
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 8be36ca7634..27dbf264766 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -24,9 +24,10 @@ static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple); -static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, - TupleTableSlot *slot, - bool *isnew, uint32 hash); +static inline TupleHashEntry LookupTupleHashEntry_internal( + TupleHashTable hashtable, + TupleTableSlot *slot, + bool *isnew, uint32 hash); /* * Define parameters for tuple hash table code generation. The interface is @@ -291,6 +292,9 @@ ResetTupleHashTable(TupleHashTable hashtable) * If isnew is NULL, we do not create new entries; we return NULL if no * match is found. * + * If hash is not NULL, we set it to the calculated hash value. This allows + * callers access to the hash value even if no entry is returned. + * * If isnew isn't NULL, then a new entry is created if no existing entry * matches. On return, *isnew is true if the entry is newly created, * false if it existed already. ->additional_data in the new entry has @@ -298,11 +302,11 @@ ResetTupleHashTable(TupleHashTable hashtable) */ TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, - bool *isnew) + bool *isnew, uint32 *hash) { TupleHashEntry entry; MemoryContext oldContext; - uint32 hash; + uint32 local_hash; /* Need to run the hash functions in short-lived context */ oldContext = MemoryContextSwitchTo(hashtable->tempcxt); @@ -312,8 +316,12 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, hashtable->in_hash_funcs = hashtable->tab_hash_funcs; hashtable->cur_eq_func = hashtable->tab_eq_func; - hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); - entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); + local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); + if (hash != NULL) + *hash = local_hash; + + entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash); + Assert(entry == NULL || entry->hash == local_hash); MemoryContextSwitchTo(oldContext); @@ -362,6 +370,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, hashtable->cur_eq_func = hashtable->tab_eq_func; entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); + Assert(entry == NULL || entry->hash == hash); MemoryContextSwitchTo(oldContext); @@ -480,7 +489,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, * NB: This function may or may not change the memory context. Caller is * expected to change it back. */ -static TupleHashEntry +static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash) { diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 331acee2814..0447d473c82 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -403,8 +403,9 @@ static int hash_choose_num_partitions(uint64 input_groups, double hashentrysize, int used_bits, int *log2_npartittions); -static AggStatePerGroup lookup_hash_entry(AggState *aggstate, uint32 hash, - bool *in_hash_table); +static void initialize_hash_entry(AggState *aggstate, + TupleHashTable hashtable, + TupleHashEntry entry); static void lookup_hash_entries(AggState *aggstate); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate); @@ -1979,75 +1980,39 @@ hash_choose_num_partitions(uint64 input_groups, double hashentrysize, } /* - * Find or create a hashtable entry for the tuple group containing the current - * tuple (already set in tmpcontext's outertuple slot), in the current grouping - * set (which the caller must have selected - note that initialize_aggregate - * depends on this). - * - * When called, CurrentMemoryContext should be the per-query context. The - * already-calculated hash value for the tuple must be specified. - * - * If in "spill mode", then only find existing hashtable entries; don't create - * new ones. If a tuple's group is not already present in the hash table for - * the current grouping set, assign *in_hash_table=false and the caller will - * spill it to disk. + * Initialize a freshly-created TupleHashEntry. */ -static AggStatePerGroup -lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table) +static void +initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable, + TupleHashEntry entry) { - AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; - TupleTableSlot *hashslot = perhash->hashslot; - TupleHashEntryData *entry; - bool isnew = false; - bool *p_isnew; - - /* if hash table already spilled, don't create new entries */ - p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; - - /* find or create the hashtable entry using the filtered tuple */ - entry = LookupTupleHashEntryHash(perhash->hashtable, hashslot, p_isnew, - hash); - - if (entry == NULL) - { - *in_hash_table = false; - return NULL; - } - else - *in_hash_table = true; - - if (isnew) - { - AggStatePerGroup pergroup; - int transno; + AggStatePerGroup pergroup; + int transno; - aggstate->hash_ngroups_current++; - hash_agg_check_limits(aggstate); + aggstate->hash_ngroups_current++; + hash_agg_check_limits(aggstate); - /* no need to allocate or initialize per-group state */ - if (aggstate->numtrans == 0) - return NULL; + /* no need to allocate or initialize per-group state */ + if (aggstate->numtrans == 0) + return; - pergroup = (AggStatePerGroup) - MemoryContextAlloc(perhash->hashtable->tablecxt, - sizeof(AggStatePerGroupData) * aggstate->numtrans); + pergroup = (AggStatePerGroup) + MemoryContextAlloc(hashtable->tablecxt, + sizeof(AggStatePerGroupData) * aggstate->numtrans); - entry->additional = pergroup; + entry->additional = pergroup; - /* - * Initialize aggregates for new tuple group, lookup_hash_entries() - * already has selected the relevant grouping set. - */ - for (transno = 0; transno < aggstate->numtrans; transno++) - { - AggStatePerTrans pertrans = &aggstate->pertrans[transno]; - AggStatePerGroup pergroupstate = &pergroup[transno]; + /* + * Initialize aggregates for new tuple group, lookup_hash_entries() + * already has selected the relevant grouping set. + */ + for (transno = 0; transno < aggstate->numtrans; transno++) + { + AggStatePerTrans pertrans = &aggstate->pertrans[transno]; + AggStatePerGroup pergroupstate = &pergroup[transno]; - initialize_aggregate(aggstate, pertrans, pergroupstate); - } + initialize_aggregate(aggstate, pertrans, pergroupstate); } - - return entry->additional; } /* @@ -2077,16 +2042,27 @@ lookup_hash_entries(AggState *aggstate) for (setno = 0; setno < aggstate->num_hashes; setno++) { AggStatePerHash perhash = &aggstate->perhash[setno]; + TupleHashEntry entry; uint32 hash; - bool in_hash_table; + bool isnew = false; + bool *p_isnew; + + /* if hash table already spilled, don't create new entries */ + p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; select_current_set(aggstate, setno, true); prepare_hash_slot(aggstate); - hash = TupleHashTableHash(perhash->hashtable, perhash->hashslot); - pergroup[setno] = lookup_hash_entry(aggstate, hash, &in_hash_table); - /* check to see if we need to spill the tuple for this grouping set */ - if (!in_hash_table) + entry = LookupTupleHashEntry(perhash->hashtable, perhash->hashslot, + p_isnew, &hash); + + if (entry != NULL) + { + if (isnew) + initialize_hash_entry(aggstate, perhash->hashtable, entry); + pergroup[setno] = entry->additional; + } + else { HashAggSpill *spill = &aggstate->hash_spills[setno]; TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple; @@ -2097,6 +2073,7 @@ lookup_hash_entries(AggState *aggstate) aggstate->hashentrysize); hashagg_spill_tuple(spill, slot, hash); + pergroup[setno] = NULL; } } } @@ -2554,6 +2531,7 @@ static bool agg_refill_hash_table(AggState *aggstate) { HashAggBatch *batch; + AggStatePerHash perhash; HashAggSpill spill; HashTapeInfo *tapeinfo = aggstate->hash_tapeinfo; uint64 ngroups_estimate; @@ -2605,6 +2583,8 @@ agg_refill_hash_table(AggState *aggstate) select_current_set(aggstate, batch->setno, true); + perhash = &aggstate->perhash[aggstate->current_set]; + /* * Spilled tuples are always read back as MinimalTuples, which may be * different from the outer plan, so recompile the aggregate expressions. @@ -2618,10 +2598,12 @@ agg_refill_hash_table(AggState *aggstate) HASHAGG_READ_BUFFER_SIZE); for (;;) { - TupleTableSlot *slot = aggstate->hash_spill_slot; - MinimalTuple tuple; - uint32 hash; - bool in_hash_table; + TupleTableSlot *slot = aggstate->hash_spill_slot; + TupleHashEntry entry; + MinimalTuple tuple; + uint32 hash; + bool isnew = false; + bool *p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; CHECK_FOR_INTERRUPTS(); @@ -2633,12 +2615,14 @@ agg_refill_hash_table(AggState *aggstate) aggstate->tmpcontext->ecxt_outertuple = slot; prepare_hash_slot(aggstate); - aggstate->hash_pergroup[batch->setno] = - lookup_hash_entry(aggstate, hash, &in_hash_table); + entry = LookupTupleHashEntryHash( + perhash->hashtable, perhash->hashslot, p_isnew, hash); - if (in_hash_table) + if (entry != NULL) { - /* Advance the aggregates (or combine functions) */ + if (isnew) + initialize_hash_entry(aggstate, perhash->hashtable, entry); + aggstate->hash_pergroup[batch->setno] = entry->additional; advance_aggregates(aggstate); } else @@ -2655,6 +2639,8 @@ agg_refill_hash_table(AggState *aggstate) } /* no memory for a new group, spill */ hashagg_spill_tuple(&spill, slot, hash); + + aggstate->hash_pergroup[batch->setno] = NULL; } /* diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 620414a1edc..046242682f0 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -94,7 +94,7 @@ ExecRecursiveUnion(PlanState *pstate) if (plan->numCols > 0) { /* Find or build hashtable entry for this tuple's group */ - LookupTupleHashEntry(node->hashtable, slot, &isnew); + LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); /* Must reset temp context after each hashtable lookup */ MemoryContextReset(node->tempContext); /* Ignore tuple if already seen */ @@ -141,7 +141,7 @@ ExecRecursiveUnion(PlanState *pstate) if (plan->numCols > 0) { /* Find or build hashtable entry for this tuple's group */ - LookupTupleHashEntry(node->hashtable, slot, &isnew); + LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); /* Must reset temp context after each hashtable lookup */ MemoryContextReset(node->tempContext); /* Ignore tuple if already seen */ diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index bfd148a41a2..8d4ccff19cc 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -381,7 +381,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* Find or build hashtable entry for this tuple's group */ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, - &isnew); + &isnew, NULL); /* If new tuple group, initialize counts */ if (isnew) @@ -402,7 +402,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* For tuples not seen previously, do not make hashtable entry */ entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, - NULL); + NULL, NULL); /* Advance the counts if entry is already present */ if (entry) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 298b7757f57..38c2fc0b50b 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -595,12 +595,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) */ if (slotNoNulls(slot)) { - (void) LookupTupleHashEntry(node->hashtable, slot, &isnew); + (void) LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); node->havehashrows = true; } else if (node->hashnulls) { - (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew); + (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew, NULL); node->havenullrows = true; } diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index c7deeac662f..415e117407c 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -139,7 +139,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent, MemoryContext tempcxt, bool use_variable_hash_iv); extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, - bool *isnew); + bool *isnew, uint32 *hash); extern uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot); extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,