On Fri, 2020-01-24 at 17:01 -0800, Jeff Davis wrote:
> New patch attached.
Three minor independent refactoring patches:
1. Add new entry points for the tuple hash table:
TupleHashTableHash()
LookupTupleHashEntryHash()
which are useful for saving and reusing hash values to avoid
recomputing.
2. Refactor hash_agg_entry_size() so that the callers don't need to do
as much work.
3. Save calculated aggcosts->transitionSpace in the Agg node for later
use, rather than discarding it.
These are helpful for the upcoming Hash Aggregation work.
Regards,
Jeff Davis
From f47cdd10f04baa3b41eaf0fb8c17f41dda4d0bd4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 14:45:25 -0800
Subject: [PATCH 1/2] HashAgg: TupleHashTableHash() and
LookupTupleHashEntryHash().
Expose two new entry points; one for only calculating the hash value
of a tuple, and another for looking up a hash entry when the hash
value is already known.
---
src/backend/executor/execGrouping.c | 105 ++++++++++++++++++++--------
src/include/executor/executor.h | 5 ++
2 files changed, 80 insertions(+), 30 deletions(-)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 3603c58b63e..94439e2ab9e 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -25,8 +25,9 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
+static TupleHashEntry LookupTupleHashEntry_internal(
+ TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash);
/*
* Define parameters for tuple hash table code generation. The interface is
@@ -300,10 +301,9 @@ TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
bool *isnew)
{
- TupleHashEntryData *entry;
- MemoryContext oldContext;
- bool found;
- MinimalTuple key;
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+ uint32 hash;
/* Need to run the hash functions in short-lived context */
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -313,32 +313,29 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
hashtable->cur_eq_func = hashtable->tab_eq_func;
- key = NULL; /* flag to reference inputslot */
+ hash = TupleHashTableHash(hashtable->hashtab, NULL);
+ entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
- if (isnew)
- {
- entry = tuplehash_insert(hashtable->hashtab, key, &found);
+ MemoryContextSwitchTo(oldContext);
- if (found)
- {
- /* found pre-existing entry */
- *isnew = false;
- }
- else
- {
- /* created new entry */
- *isnew = true;
- /* zero caller data */
- entry->additional = NULL;
- MemoryContextSwitchTo(hashtable->tablecxt);
- /* Copy the first tuple into the table context */
- entry->firstTuple = ExecCopySlotMinimalTuple(slot);
- }
- }
- else
- {
- entry = tuplehash_lookup(hashtable->hashtab, key);
- }
+ return entry;
+}
+
+/*
+ * A variant of LookupTupleHashEntry for callers that have already computed
+ * the hash value.
+ */
+TupleHashEntry
+LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
+ bool *isnew, uint32 hash)
+{
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
MemoryContextSwitchTo(oldContext);
@@ -389,7 +386,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
* Also, the caller must select an appropriate memory context for running
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
*/
-static uint32
+uint32
TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
{
TupleHashTable hashtable = (TupleHashTable) tb->private_data;
@@ -450,6 +447,54 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
return murmurhash32(hashkey);
}
+/*
+ * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
+ * so that we can avoid switching the memory context multiple times for
+ * LookupTupleHashEntry.
+ */
+static TupleHashEntry
+LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
+ bool *isnew, uint32 hash)
+{
+ TupleHashEntryData *entry;
+ bool found;
+ MinimalTuple key;
+
+ /* set up data needed by hash and match functions */
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->cur_eq_func = hashtable->tab_eq_func;
+
+ key = NULL; /* flag to reference inputslot */
+
+ if (isnew)
+ {
+ entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
+
+ if (found)
+ {
+ /* found pre-existing entry */
+ *isnew = false;
+ }
+ else
+ {
+ /* created new entry */
+ *isnew = true;
+ /* zero caller data */
+ entry->additional = NULL;
+ MemoryContextSwitchTo(hashtable->tablecxt);
+ /* Copy the first tuple into the table context */
+ entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+ }
+ }
+ else
+ {
+ entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
+ }
+
+ return entry;
+}
+
/*
* See whether two tuples (presumably of the same hash value) match
*/
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6ef3e1fe069..76215992647 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -140,10 +140,15 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
TupleTableSlot *slot,
bool *isnew);
+extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
+ TupleTableSlot *slot,
+ bool *isnew, uint32 hash);
extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
TupleTableSlot *slot,
ExprState *eqcomp,
FmgrInfo *hashfunctions);
+extern uint32 TupleHashTableHash(struct tuplehash_hash *tb,
+ const MinimalTuple tuple);
extern void ResetTupleHashTable(TupleHashTable hashtable);
/*
--
2.17.1
From 2db9ae43db11fb28d6b8397a1858c996a8a00b19 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 15:12:41 -0800
Subject: [PATCH 2/2] HashAgg: make hash_agg_entry_size() account for all
space.
Previously, it neglected to account for pass-by-reference transition
data values and the representative tuple, requiring the caller to do
so.
---
src/backend/executor/nodeAgg.c | 23 +++++++++--------------
src/backend/optimizer/plan/planner.c | 9 ++-------
src/backend/utils/adt/selfuncs.c | 12 ++----------
src/include/executor/nodeAgg.h | 3 ++-
4 files changed, 15 insertions(+), 32 deletions(-)
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 9073395eacf..ac3908ba142 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1423,23 +1423,18 @@ find_hash_columns(AggState *aggstate)
/*
* Estimate per-hash-table-entry overhead for the planner.
- *
- * Note that the estimate does not include space for pass-by-reference
- * transition data values, nor for the representative tuple of each group.
- * Nor does this account of the target fill-factor and growth policy of the
- * hash table.
*/
Size
-hash_agg_entry_size(int numAggs)
+hash_agg_entry_size(int numAggs, Size tupleWidth, Size transitionSpace)
{
- Size entrysize;
-
- /* This must match build_hash_table */
- entrysize = sizeof(TupleHashEntryData) +
- numAggs * sizeof(AggStatePerGroupData);
- entrysize = MAXALIGN(entrysize);
-
- return entrysize;
+ return
+ /* key */
+ MAXALIGN(SizeofMinimalTupleHeader) +
+ MAXALIGN(tupleWidth) +
+ /* data */
+ MAXALIGN(sizeof(TupleHashEntryData) +
+ numAggs * sizeof(AggStatePerGroupData)) +
+ transitionSpace;
}
/*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6f21535937..b44efd6314c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4867,13 +4867,8 @@ create_distinct_paths(PlannerInfo *root,
allow_hash = false; /* policy-based decision not to hash */
else
{
- Size hashentrysize;
-
- /* Estimate per-hash-entry space at tuple width... */
- hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
- MAXALIGN(SizeofMinimalTupleHeader);
- /* plus the per-hash-entry overhead */
- hashentrysize += hash_agg_entry_size(0);
+ Size hashentrysize = hash_agg_entry_size(
+ 0, cheapest_input_path->pathtarget->width, 0);
/* Allow hashing only if hashtable is predicted to fit in work_mem */
allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 7c6f0574b37..0be26fe0378 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3526,16 +3526,8 @@ double
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
double dNumGroups)
{
- Size hashentrysize;
-
- /* Estimate per-hash-entry space at tuple width... */
- hashentrysize = MAXALIGN(path->pathtarget->width) +
- MAXALIGN(SizeofMinimalTupleHeader);
-
- /* plus space for pass-by-ref transition values... */
- hashentrysize += agg_costs->transitionSpace;
- /* plus the per-hash-entry overhead */
- hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+ Size hashentrysize = hash_agg_entry_size(
+ agg_costs->numAggs, path->pathtarget->width, agg_costs->transitionSpace);
/*
* Note that this disregards the effect of fill-factor and growth policy
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 2fe82da6ff7..264916f9a92 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -309,6 +309,7 @@ extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
extern void ExecEndAgg(AggState *node);
extern void ExecReScanAgg(AggState *node);
-extern Size hash_agg_entry_size(int numAggs);
+extern Size hash_agg_entry_size(int numAggs, Size tupleWidth,
+ Size transitionSpace);
#endif /* NODEAGG_H */
--
2.17.1
From b69e6fbcb8a92d0af4d9ba2f24cfb7d07dfdff9d Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 15:18:52 -0800
Subject: [PATCH 3/3] HashAgg: save calculated transitionSpace in Agg node.
This is useful to improve estimates of how many groups can fit in the
hash table without exceeding work_mem.
---
src/backend/optimizer/plan/createplan.c | 9 +++++++--
src/backend/optimizer/util/pathnode.c | 2 ++
src/include/nodes/pathnodes.h | 2 ++
src/include/nodes/plannodes.h | 1 +
src/include/optimizer/planmain.h | 4 ++--
5 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index e048d200bb4..090919e39a0 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1644,6 +1644,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
NIL,
NIL,
best_path->path.rows,
+ 0,
subplan);
}
else
@@ -2096,6 +2097,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
NIL,
NIL,
best_path->numGroups,
+ best_path->transitionSpace,
subplan);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -2257,6 +2259,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
rollup->gsets,
NIL,
rollup->numGroups,
+ best_path->transitionSpace,
sort_plan);
/*
@@ -2295,6 +2298,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
rollup->gsets,
chain,
rollup->numGroups,
+ best_path->transitionSpace,
subplan);
/* Copy cost data from Path to Plan */
@@ -6192,8 +6196,8 @@ Agg *
make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
- List *groupingSets, List *chain,
- double dNumGroups, Plan *lefttree)
+ List *groupingSets, List *chain, double dNumGroups,
+ int32 transitionSpace, Plan *lefttree)
{
Agg *node = makeNode(Agg);
Plan *plan = &node->plan;
@@ -6209,6 +6213,7 @@ make_agg(List *tlist, List *qual,
node->grpOperators = grpOperators;
node->grpCollations = grpCollations;
node->numGroups = numGroups;
+ node->transitionSpace = transitionSpace;
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
node->groupingSets = groupingSets;
node->chain = chain;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e6d08aede56..d9ce5162116 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2949,6 +2949,7 @@ create_agg_path(PlannerInfo *root,
pathnode->aggstrategy = aggstrategy;
pathnode->aggsplit = aggsplit;
pathnode->numGroups = numGroups;
+ pathnode->transitionSpace = aggcosts ? aggcosts->transitionSpace : 0;
pathnode->groupClause = groupClause;
pathnode->qual = qual;
@@ -3036,6 +3037,7 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->aggstrategy = aggstrategy;
pathnode->rollups = rollups;
pathnode->qual = having_qual;
+ pathnode->transitionSpace = agg_costs ? agg_costs->transitionSpace : 0;
Assert(rollups != NIL);
Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be197e0e..be592d0fee4 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1663,6 +1663,7 @@ typedef struct AggPath
AggStrategy aggstrategy; /* basic strategy, see nodes.h */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
double numGroups; /* estimated number of groups in input */
+ int32 transitionSpace; /* estimated transition state size */
List *groupClause; /* a list of SortGroupClause's */
List *qual; /* quals (HAVING quals), if any */
} AggPath;
@@ -1700,6 +1701,7 @@ typedef struct GroupingSetsPath
AggStrategy aggstrategy; /* basic strategy */
List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
+ int32 transitionSpace; /* estimated transition state size */
} GroupingSetsPath;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 32c0d87f80e..f4183e1efa5 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -813,6 +813,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
Oid *grpCollations;
long numGroups; /* estimated number of groups in input */
+ int32 transitionSpace; /* estimated transition state size */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index eab486a6214..c7bda2b0917 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -54,8 +54,8 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
- List *groupingSets, List *chain,
- double dNumGroups, Plan *lefttree);
+ List *groupingSets, List *chain, double dNumGroups,
+ int32 transitionSpace, Plan *lefttree);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
/*
--
2.17.1