From 07b26551b82b603812de06a160263d755cd66b92 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Wed, 7 Aug 2024 16:56:48 +1200
Subject: [PATCH v1] Use ExprStates for hashing in GROUP BY and SubPlans

This speeds up obtaining hash values for GROUP BY and for hashed
SubPlan by using the ExprState support for hashing.  This allows JIT
compilation for hash value.
---
 src/backend/executor/execExpr.c     | 115 ++++++++++++++++++++++++++++
 src/backend/executor/execGrouping.c |  73 ++++++++----------
 src/backend/executor/nodeSubplan.c  |  11 ++-
 src/include/executor/executor.h     |  10 ++-
 src/include/nodes/execnodes.h       |   8 +-
 5 files changed, 170 insertions(+), 47 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 63289ee35e..881ab6516e 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3969,6 +3969,121 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	}
 }
 
+/*
+ * Build an ExprState that calls the given hash function(s) on the attnums
+ * given by 'keyColIdx' .  When numCols > 1, the hash values returned by each
+ * hash function are combined to produce a single hash value.
+ *
+ * desc: tuple descriptor for the to-be-hashed expressions
+ * ops: TupleTableSlotOps for the TupleDesc
+ * hashfunctions: FmgrInfos for each hash function to call one per numCols
+ * collations: collation to use when calling the hash function.
+ * numCols: array length of hashfunctions, collations and keyColIdx.
+ * parent: PlanState node that the 'hash_exprs' will be evaluated at
+ * init_value: Normally 0, but can be set to other values to seed the hash
+ * with some other value.  Using non-zero is slightly less efficient but can
+ * be useful.
+ */
+ExprState *
+ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops,
+						 FmgrInfo *hashfunctions, Oid *collations,
+						 int numCols, AttrNumber *keyColIdx,
+						 PlanState *parent, uint32 init_value)
+{
+	ExprState  *state = makeNode(ExprState);
+	ExprEvalStep scratch = {0};
+	intptr_t	opcode;
+	AttrNumber	last_attnum = 0;
+
+	state->parent = parent;
+
+	/* find the highest attnum so we deform the tuple to that point */
+	for (int i = 0; i < numCols; i++)
+		last_attnum = Max(last_attnum, keyColIdx[i]);
+
+	scratch.opcode = EEOP_INNER_FETCHSOME;
+	scratch.d.fetch.last_var = last_attnum;
+	scratch.d.fetch.fixed = false;
+	scratch.d.fetch.kind = NULL;
+	scratch.d.fetch.known_desc = NULL;
+	ExprEvalPushStep(state, &scratch);
+
+	if (init_value == 0)
+	{
+		/*
+		 * No initial value, so we can assign the result of the hash function
+		 * for the first hash_expr without having to concern ourselves with
+		 * combining the result with any initial value.
+		 */
+		opcode = EEOP_HASHDATUM_FIRST;
+	}
+	else
+	{
+		/* Set up operation to set the initial value. */
+		scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
+		scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
+		scratch.resvalue = &state->resvalue;
+		scratch.resnull = &state->resnull;
+
+		ExprEvalPushStep(state, &scratch);
+
+		/*
+		 * When using an initial value use the NEXT32 ops as the FIRST ops
+		 * would overwrite the stored initial value.
+		 */
+		opcode = EEOP_HASHDATUM_NEXT32;
+	}
+
+	for (int i = 0; i < numCols; i++)
+	{
+		FmgrInfo   *finfo;
+		FunctionCallInfo fcinfo;
+		Oid			inputcollid = collations[i];
+		AttrNumber	attnum = keyColIdx[i] - 1;
+
+		finfo = &hashfunctions[i];
+		fcinfo = palloc0(SizeForFunctionCallInfo(1));
+
+		/* Initialize function call parameter structure too */
+		InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
+
+		/*
+		 * Fetch inner Var for this attnum and store it in the 1st arg of the
+		 * hash func.
+		 */
+		scratch.opcode = EEOP_INNER_VAR;
+		scratch.resvalue = &fcinfo->args[0].value;
+		scratch.resnull = &fcinfo->args[0].isnull;
+		scratch.d.var.attnum = attnum;
+		scratch.d.var.vartype = TupleDescAttr(desc, attnum)->atttypid;
+
+		ExprEvalPushStep(state, &scratch);
+
+		/* Call the hash function */
+		scratch.opcode = opcode;
+		scratch.resvalue = &state->resvalue;
+		scratch.resnull = &state->resnull;
+		scratch.d.hashdatum.finfo = finfo;
+		scratch.d.hashdatum.fcinfo_data = fcinfo;
+		scratch.d.hashdatum.fn_addr = finfo->fn_addr;
+		scratch.d.hashdatum.jumpdone = -1;
+
+		ExprEvalPushStep(state, &scratch);
+
+		/* subsequent attnums must be combined with the previous */
+		opcode = EEOP_HASHDATUM_NEXT32;
+	}
+
+	scratch.resvalue = NULL;
+	scratch.resnull = NULL;
+	scratch.opcode = EEOP_DONE;
+	ExprEvalPushStep(state, &scratch);
+
+	ExecReadyExpr(state);
+
+	return state;
+}
+
 /*
  * Build an ExprState that calls the given hash function(s) on the given
  * 'hash_exprs'.  When multiple expressions are present, the hash values
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 7233f1e3c0..b5c01e48f5 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -167,6 +167,7 @@ BuildTupleHashTableExt(PlanState *parent,
 	Size		hash_mem_limit;
 	MemoryContext oldcontext;
 	bool		allow_jit;
+	uint32		hash_iv = 0;
 
 	Assert(nbuckets > 0);
 
@@ -181,14 +182,12 @@ BuildTupleHashTableExt(PlanState *parent,
 
 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
-	hashtable->tab_hash_funcs = hashfunctions;
 	hashtable->tab_collations = collations;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
-	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_func = NULL;
 
 	/*
@@ -200,9 +199,7 @@ BuildTupleHashTableExt(PlanState *parent,
 	 * underestimated.
 	 */
 	if (use_variable_hash_iv)
-		hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
-	else
-		hashtable->hash_iv = 0;
+		hash_iv = murmurhash32(ParallelWorkerNumber);
 
 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
 
@@ -223,6 +220,16 @@ BuildTupleHashTableExt(PlanState *parent,
 	 */
 	allow_jit = metacxt != tablecxt;
 
+	hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
+														&TTSOpsMinimalTuple,
+														hashfunctions,
+														collations,
+														numCols,
+														keyColIdx,
+														parent,
+														hash_iv);
+	hashtable->in_hash_expr = NULL;
+
 	/* build comparator for all columns */
 	/* XXX: should we support non-minimal tuples for the inputslot? */
 	hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
@@ -314,7 +321,7 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 
 	/* set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
-	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->in_hash_expr = hashtable->tab_hash_expr;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;
 
 	local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
@@ -340,7 +347,7 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 	uint32		hash;
 
 	hashtable->inputslot = slot;
-	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->in_hash_expr = hashtable->tab_hash_expr;
 
 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -368,7 +375,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
 
 	/* set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
-	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->in_hash_expr = hashtable->tab_hash_expr;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;
 
 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
@@ -391,7 +398,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
 TupleHashEntry
 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 				   ExprState *eqcomp,
-				   FmgrInfo *hashfunctions)
+				   ExprState *hashexpr)
 {
 	TupleHashEntry entry;
 	MemoryContext oldContext;
@@ -402,7 +409,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 
 	/* Set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
-	hashtable->in_hash_funcs = hashfunctions;
+	hashtable->in_hash_expr = hashexpr;
 	hashtable->cur_eq_func = eqcomp;
 
 	/* Search the hash table */
@@ -419,25 +426,24 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
  * copied into the table.
  *
  * Also, the caller must select an appropriate memory context for running
- * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
+ * the hash functions.
  */
 static uint32
 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 							const MinimalTuple tuple)
 {
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
-	int			numCols = hashtable->numCols;
-	AttrNumber *keyColIdx = hashtable->keyColIdx;
-	uint32		hashkey = hashtable->hash_iv;
+	uint32		hashkey;
 	TupleTableSlot *slot;
-	FmgrInfo   *hashfunctions;
-	int			i;
+	bool		isnull;
 
 	if (tuple == NULL)
 	{
 		/* Process the current input tuple for the table */
-		slot = hashtable->inputslot;
-		hashfunctions = hashtable->in_hash_funcs;
+		hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
+		hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
+											  hashtable->exprcontext,
+											  &isnull));
 	}
 	else
 	{
@@ -447,32 +453,17 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb,
 		 * (this case never actually occurs due to the way simplehash.h is
 		 * used, as the hash-value is stored in the entries)
 		 */
-		slot = hashtable->tableslot;
+		slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
 		ExecStoreMinimalTuple(tuple, slot, false);
-		hashfunctions = hashtable->tab_hash_funcs;
+		hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
+											  hashtable->exprcontext,
+											  &isnull));
 	}
 
-	for (i = 0; i < numCols; i++)
-	{
-		AttrNumber	att = keyColIdx[i];
-		Datum		attr;
-		bool		isNull;
-
-		/* combine successive hashkeys by rotating */
-		hashkey = pg_rotate_left32(hashkey, 1);
-
-		attr = slot_getattr(slot, att, &isNull);
-
-		if (!isNull)			/* treat nulls as having hash key 0 */
-		{
-			uint32		hkey;
-
-			hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
-													hashtable->tab_collations[i],
-													attr));
-			hashkey ^= hkey;
-		}
-	}
+#ifdef USE_ASSERT_CHECKING
+	/* XXX not for commit. */
+	elog(DEBUG1, "hashkey = %u", hashkey);
+#endif
 
 	/*
 	 * The way hashes are combined above, among each other and with the IV,
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index a96cdd01e1..3d13944aef 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -160,7 +160,7 @@ ExecHashSubPlan(SubPlanState *node,
 			FindTupleHashEntry(node->hashtable,
 							   slot,
 							   node->cur_eq_comp,
-							   node->lhs_hash_funcs) != NULL)
+							   node->cur_hash_expr) != NULL)
 		{
 			ExecClearTuple(slot);
 			return BoolGetDatum(true);
@@ -1043,6 +1043,15 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
 													sstate->planstate,
 													NULL);
 
+		sstate->cur_hash_expr = ExecBuildHash32FromAttrs(tupDescLeft,
+														 &TTSOpsMinimalTuple,
+														 sstate->lhs_hash_funcs,
+														 sstate->tab_collations,
+														 sstate->numCols,
+														 sstate->keyColIdx,
+														 parent,
+														 0);
+
 		/*
 		 * Create comparator for lookups of rows in the table (potentially
 		 * cross-type comparisons).
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 69c3ebff00..e77377ff9b 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -160,7 +160,7 @@ extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 TupleTableSlot *slot,
 										 ExprState *eqcomp,
-										 FmgrInfo *hashfunctions);
+										 ExprState *hashexpr);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
 
 /*
@@ -289,6 +289,14 @@ extern ExprState *ExecInitCheck(List *qual, PlanState *parent);
 extern List *ExecInitExprList(List *nodes, PlanState *parent);
 extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase,
 									bool doSort, bool doHash, bool nullcheck);
+extern ExprState *ExecBuildHash32FromAttrs(TupleDesc desc,
+										   const TupleTableSlotOps *ops,
+										   FmgrInfo *hashfunctions,
+										   Oid *collations,
+										   int numCols,
+										   AttrNumber *keyColIdx,
+										   PlanState *parent,
+										   uint32 init_value);
 extern ExprState *ExecBuildHash32Expr(TupleDesc desc,
 									  const TupleTableSlotOps *ops,
 									  const Oid *hashfunc_oids,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index af7d8fd1e7..e02508d031 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -789,7 +789,7 @@ typedef struct ExecAuxRowMark
  * Normally these are the only functions used, but FindTupleHashEntry()
  * supports searching a hashtable using cross-data-type hashing.  For that,
  * the caller must supply hash functions for the LHS datatype as well as
- * the cross-type equality operators to use.  in_hash_funcs and cur_eq_func
+ * the cross-type equality operators to use.  in_hash_expr and cur_eq_func
  * are set to point to the caller's function arrays while doing such a search.
  * During LookupTupleHashEntry(), they point to tab_hash_funcs and
  * tab_eq_func respectively.
@@ -819,7 +819,7 @@ typedef struct TupleHashTableData
 	tuplehash_hash *hashtab;	/* underlying hash table */
 	int			numCols;		/* number of columns in lookup key */
 	AttrNumber *keyColIdx;		/* attr numbers of key columns */
-	FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
+	ExprState  *tab_hash_expr;	/* ExprState for hashing table datatype(s) */
 	ExprState  *tab_eq_func;	/* comparator for table datatype(s) */
 	Oid		   *tab_collations; /* collations for hash and comparison */
 	MemoryContext tablecxt;		/* memory context containing table */
@@ -828,9 +828,8 @@ typedef struct TupleHashTableData
 	TupleTableSlot *tableslot;	/* slot for referencing table entries */
 	/* The following fields are set transiently for each table search: */
 	TupleTableSlot *inputslot;	/* current input tuple's slot */
-	FmgrInfo   *in_hash_funcs;	/* hash functions for input datatype(s) */
+	ExprState  *in_hash_expr;	/* ExprState for hashing input datatype(s) */
 	ExprState  *cur_eq_func;	/* comparator for input vs. table */
-	uint32		hash_iv;		/* hash-function IV */
 	ExprContext *exprcontext;	/* expression context */
 }			TupleHashTableData;
 
@@ -986,6 +985,7 @@ typedef struct SubPlanState
 	FmgrInfo   *tab_eq_funcs;	/* equality functions for table datatype(s) */
 	FmgrInfo   *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
 	FmgrInfo   *cur_eq_funcs;	/* equality functions for LHS vs. table */
+	ExprState  *cur_hash_expr;	/* get hash value for LSH vs table */
 	ExprState  *cur_eq_comp;	/* equality comparator for LHS vs. table */
 } SubPlanState;
 
-- 
2.34.1

