Re: Memory-Bounded Hash Aggregation

Jeff Davis Wed, 05 Feb 2020 18:21:40 -0800

On Fri, 2020-01-24 at 17:01 -0800, Jeff Davis wrote:
> New patch attached.


Three minor independent refactoring patches:

1. Add new entry points for the tuple hash table:

  TupleHashTableHash()
  LookupTupleHashEntryHash()

which are useful for saving and reusing hash values to avoid
recomputing.

2. Refactor hash_agg_entry_size() so that the callers don't need to do
as much work.

3. Save calculated aggcosts->transitionSpace in the Agg node for later
use, rather than discarding it.

These are helpful for the upcoming Hash Aggregation work.

Regards,
        Jeff Davis

From f47cdd10f04baa3b41eaf0fb8c17f41dda4d0bd4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 14:45:25 -0800
Subject: [PATCH 1/2] HashAgg: TupleHashTableHash() and
 LookupTupleHashEntryHash().

Expose two new entry points; one for only calculating the hash value
of a tuple, and another for looking up a hash entry when the hash
value is already known.
---
 src/backend/executor/execGrouping.c | 105 ++++++++++++++++++++--------
 src/include/executor/executor.h     |   5 ++
 2 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 3603c58b63e..94439e2ab9e 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -25,8 +25,9 @@
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 
-static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
+static TupleHashEntry LookupTupleHashEntry_internal(
+	TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash);
 
 /*
  * Define parameters for tuple hash table code generation. The interface is
@@ -300,10 +301,9 @@ TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 					 bool *isnew)
 {
-	TupleHashEntryData *entry;
-	MemoryContext oldContext;
-	bool		found;
-	MinimalTuple key;
+	TupleHashEntry	entry;
+	MemoryContext	oldContext;
+	uint32			hash;
 
 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -313,32 +313,29 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;
 
-	key = NULL;					/* flag to reference inputslot */
+	hash = TupleHashTableHash(hashtable->hashtab, NULL);
+	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
 
-	if (isnew)
-	{
-		entry = tuplehash_insert(hashtable->hashtab, key, &found);
+	MemoryContextSwitchTo(oldContext);
 
-		if (found)
-		{
-			/* found pre-existing entry */
-			*isnew = false;
-		}
-		else
-		{
-			/* created new entry */
-			*isnew = true;
-			/* zero caller data */
-			entry->additional = NULL;
-			MemoryContextSwitchTo(hashtable->tablecxt);
-			/* Copy the first tuple into the table context */
-			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
-		}
-	}
-	else
-	{
-		entry = tuplehash_lookup(hashtable->hashtab, key);
-	}
+	return entry;
+}
+
+/*
+ * A variant of LookupTupleHashEntry for callers that have already computed
+ * the hash value.
+ */
+TupleHashEntry
+LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
+						 bool *isnew, uint32 hash)
+{
+	TupleHashEntry	entry;
+	MemoryContext	oldContext;
+
+	/* Need to run the hash functions in short-lived context */
+	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
 
 	MemoryContextSwitchTo(oldContext);
 
@@ -389,7 +386,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
  * Also, the caller must select an appropriate memory context for running
  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
  */
-static uint32
+uint32
 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
 {
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
@@ -450,6 +447,54 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
 	return murmurhash32(hashkey);
 }
 
+/*
+ * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
+ * so that we can avoid switching the memory context multiple times for
+ * LookupTupleHashEntry.
+ */
+static TupleHashEntry
+LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
+							  bool *isnew, uint32 hash)
+{
+	TupleHashEntryData *entry;
+	bool		found;
+	MinimalTuple key;
+
+	/* set up data needed by hash and match functions */
+	hashtable->inputslot = slot;
+	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->cur_eq_func = hashtable->tab_eq_func;
+
+	key = NULL;					/* flag to reference inputslot */
+
+	if (isnew)
+	{
+		entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
+
+		if (found)
+		{
+			/* found pre-existing entry */
+			*isnew = false;
+		}
+		else
+		{
+			/* created new entry */
+			*isnew = true;
+			/* zero caller data */
+			entry->additional = NULL;
+			MemoryContextSwitchTo(hashtable->tablecxt);
+			/* Copy the first tuple into the table context */
+			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+		}
+	}
+	else
+	{
+		entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
+	}
+
+	return entry;
+}
+
 /*
  * See whether two tuples (presumably of the same hash value) match
  */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6ef3e1fe069..76215992647 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -140,10 +140,15 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 										   TupleTableSlot *slot,
 										   bool *isnew);
+extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
+											   TupleTableSlot *slot,
+											   bool *isnew, uint32 hash);
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 TupleTableSlot *slot,
 										 ExprState *eqcomp,
 										 FmgrInfo *hashfunctions);
+extern uint32 TupleHashTableHash(struct tuplehash_hash *tb,
+								 const MinimalTuple tuple);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
 
 /*
-- 
2.17.1

From 2db9ae43db11fb28d6b8397a1858c996a8a00b19 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 15:12:41 -0800
Subject: [PATCH 2/2] HashAgg: make hash_agg_entry_size() account for all
 space.

Previously, it neglected to account for pass-by-reference transition
data values and the representative tuple, requiring the caller to do
so.
---
 src/backend/executor/nodeAgg.c       | 23 +++++++++--------------
 src/backend/optimizer/plan/planner.c |  9 ++-------
 src/backend/utils/adt/selfuncs.c     | 12 ++----------
 src/include/executor/nodeAgg.h       |  3 ++-
 4 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 9073395eacf..ac3908ba142 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1423,23 +1423,18 @@ find_hash_columns(AggState *aggstate)
 
 /*
  * Estimate per-hash-table-entry overhead for the planner.
- *
- * Note that the estimate does not include space for pass-by-reference
- * transition data values, nor for the representative tuple of each group.
- * Nor does this account of the target fill-factor and growth policy of the
- * hash table.
  */
 Size
-hash_agg_entry_size(int numAggs)
+hash_agg_entry_size(int numAggs, Size tupleWidth, Size transitionSpace)
 {
-	Size		entrysize;
-
-	/* This must match build_hash_table */
-	entrysize = sizeof(TupleHashEntryData) +
-		numAggs * sizeof(AggStatePerGroupData);
-	entrysize = MAXALIGN(entrysize);
-
-	return entrysize;
+	return
+		/* key */
+		MAXALIGN(SizeofMinimalTupleHeader) +
+		MAXALIGN(tupleWidth) +
+		/* data */
+		MAXALIGN(sizeof(TupleHashEntryData) +
+				 numAggs * sizeof(AggStatePerGroupData)) +
+		transitionSpace;
 }
 
 /*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6f21535937..b44efd6314c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4867,13 +4867,8 @@ create_distinct_paths(PlannerInfo *root,
 		allow_hash = false;		/* policy-based decision not to hash */
 	else
 	{
-		Size		hashentrysize;
-
-		/* Estimate per-hash-entry space at tuple width... */
-		hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
-			MAXALIGN(SizeofMinimalTupleHeader);
-		/* plus the per-hash-entry overhead */
-		hashentrysize += hash_agg_entry_size(0);
+		Size		hashentrysize = hash_agg_entry_size(
+			0, cheapest_input_path->pathtarget->width, 0);
 
 		/* Allow hashing only if hashtable is predicted to fit in work_mem */
 		allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 7c6f0574b37..0be26fe0378 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3526,16 +3526,8 @@ double
 estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
 						   double dNumGroups)
 {
-	Size		hashentrysize;
-
-	/* Estimate per-hash-entry space at tuple width... */
-	hashentrysize = MAXALIGN(path->pathtarget->width) +
-		MAXALIGN(SizeofMinimalTupleHeader);
-
-	/* plus space for pass-by-ref transition values... */
-	hashentrysize += agg_costs->transitionSpace;
-	/* plus the per-hash-entry overhead */
-	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+	Size		hashentrysize = hash_agg_entry_size(
+		agg_costs->numAggs, path->pathtarget->width, agg_costs->transitionSpace);
 
 	/*
 	 * Note that this disregards the effect of fill-factor and growth policy
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 2fe82da6ff7..264916f9a92 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -309,6 +309,7 @@ extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
 extern void ExecEndAgg(AggState *node);
 extern void ExecReScanAgg(AggState *node);
 
-extern Size hash_agg_entry_size(int numAggs);
+extern Size hash_agg_entry_size(int numAggs, Size tupleWidth,
+								Size transitionSpace);
 
 #endif							/* NODEAGG_H */
-- 
2.17.1

From b69e6fbcb8a92d0af4d9ba2f24cfb7d07dfdff9d Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 3 Feb 2020 15:18:52 -0800
Subject: [PATCH 3/3] HashAgg: save calculated transitionSpace in Agg node.

This is useful to improve estimates of how many groups can fit in the
hash table without exceeding work_mem.
---
 src/backend/optimizer/plan/createplan.c | 9 +++++++--
 src/backend/optimizer/util/pathnode.c   | 2 ++
 src/include/nodes/pathnodes.h           | 2 ++
 src/include/nodes/plannodes.h           | 1 +
 src/include/optimizer/planmain.h        | 4 ++--
 5 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index e048d200bb4..090919e39a0 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1644,6 +1644,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
 								 NIL,
 								 NIL,
 								 best_path->path.rows,
+								 0,
 								 subplan);
 	}
 	else
@@ -2096,6 +2097,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
 					NIL,
 					NIL,
 					best_path->numGroups,
+					best_path->transitionSpace,
 					subplan);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -2257,6 +2259,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 										 rollup->gsets,
 										 NIL,
 										 rollup->numGroups,
+										 best_path->transitionSpace,
 										 sort_plan);
 
 			/*
@@ -2295,6 +2298,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 						rollup->gsets,
 						chain,
 						rollup->numGroups,
+						best_path->transitionSpace,
 						subplan);
 
 		/* Copy cost data from Path to Plan */
@@ -6192,8 +6196,8 @@ Agg *
 make_agg(List *tlist, List *qual,
 		 AggStrategy aggstrategy, AggSplit aggsplit,
 		 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-		 List *groupingSets, List *chain,
-		 double dNumGroups, Plan *lefttree)
+		 List *groupingSets, List *chain, double dNumGroups,
+		 int32 transitionSpace, Plan *lefttree)
 {
 	Agg		   *node = makeNode(Agg);
 	Plan	   *plan = &node->plan;
@@ -6209,6 +6213,7 @@ make_agg(List *tlist, List *qual,
 	node->grpOperators = grpOperators;
 	node->grpCollations = grpCollations;
 	node->numGroups = numGroups;
+	node->transitionSpace = transitionSpace;
 	node->aggParams = NULL;		/* SS_finalize_plan() will fill this */
 	node->groupingSets = groupingSets;
 	node->chain = chain;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e6d08aede56..d9ce5162116 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2949,6 +2949,7 @@ create_agg_path(PlannerInfo *root,
 	pathnode->aggstrategy = aggstrategy;
 	pathnode->aggsplit = aggsplit;
 	pathnode->numGroups = numGroups;
+	pathnode->transitionSpace = aggcosts ? aggcosts->transitionSpace : 0;
 	pathnode->groupClause = groupClause;
 	pathnode->qual = qual;
 
@@ -3036,6 +3037,7 @@ create_groupingsets_path(PlannerInfo *root,
 	pathnode->aggstrategy = aggstrategy;
 	pathnode->rollups = rollups;
 	pathnode->qual = having_qual;
+	pathnode->transitionSpace = agg_costs ? agg_costs->transitionSpace : 0;
 
 	Assert(rollups != NIL);
 	Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be197e0e..be592d0fee4 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1663,6 +1663,7 @@ typedef struct AggPath
 	AggStrategy aggstrategy;	/* basic strategy, see nodes.h */
 	AggSplit	aggsplit;		/* agg-splitting mode, see nodes.h */
 	double		numGroups;		/* estimated number of groups in input */
+	int32		transitionSpace;	/* estimated transition state size */
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	List	   *qual;			/* quals (HAVING quals), if any */
 } AggPath;
@@ -1700,6 +1701,7 @@ typedef struct GroupingSetsPath
 	AggStrategy aggstrategy;	/* basic strategy */
 	List	   *rollups;		/* list of RollupData */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	int32		transitionSpace;	/* estimated transition state size */
 } GroupingSetsPath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 32c0d87f80e..f4183e1efa5 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -813,6 +813,7 @@ typedef struct Agg
 	Oid		   *grpOperators;	/* equality operators to compare with */
 	Oid		   *grpCollations;
 	long		numGroups;		/* estimated number of groups in input */
+	int32		transitionSpace;	/* estimated transition state size */
 	Bitmapset  *aggParams;		/* IDs of Params used in Aggref inputs */
 	/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
 	List	   *groupingSets;	/* grouping sets to use */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index eab486a6214..c7bda2b0917 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -54,8 +54,8 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
 extern Agg *make_agg(List *tlist, List *qual,
 					 AggStrategy aggstrategy, AggSplit aggsplit,
 					 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-					 List *groupingSets, List *chain,
-					 double dNumGroups, Plan *lefttree);
+					 List *groupingSets, List *chain, double dNumGroups,
+					 int32 transitionSpace, Plan *lefttree);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
 
 /*
-- 
2.17.1

Re: Memory-Bounded Hash Aggregation

Reply via email to