From 136a1071f20764907d00d4f049d16585cc2d6620 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Tue, 6 Apr 2021 00:17:57 +1200
Subject: [PATCH v5 2/2] Rough-cut patch for HashedScalarArrayOpExpr

---
 src/backend/executor/execExpr.c           | 155 +++++++++++++---------
 src/backend/executor/execExprInterp.c     |  39 +++---
 src/backend/executor/execMain.c           |   6 +
 src/backend/nodes/copyfuncs.c             |  13 ++
 src/backend/nodes/nodeFuncs.c             |  33 ++++-
 src/backend/nodes/outfuncs.c              |  11 ++
 src/backend/nodes/readfuncs.c             |  12 ++
 src/backend/optimizer/plan/planner.c      |   9 ++
 src/backend/optimizer/util/clauses.c      |  70 ++++++++++
 src/backend/utils/adt/ruleutils.c         |  32 +++++
 src/include/executor/execExpr.h           |   9 +-
 src/include/nodes/nodes.h                 |   1 +
 src/include/nodes/plannodes.h             |  14 ++
 src/include/optimizer/optimizer.h         |   2 +
 src/test/regress/expected/expressions.out |   2 +-
 src/test/regress/sql/expressions.sql      |   2 +-
 16 files changed, 314 insertions(+), 96 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index ec079e7837..748baced34 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -51,7 +51,6 @@
 #include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
-#define MIN_ARRAY_SIZE_FOR_SAOP_HASH 9
 
 typedef struct LastAttnumInfo
 {
@@ -1146,13 +1145,11 @@ ExecInitExprRec(Expr *node, ExprState *state,
 		case T_ScalarArrayOpExpr:
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
-				Oid			func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
 				AclResult	aclresult;
-				bool		useHash = false;
 
 				Assert(list_length(opexpr->args) == 2);
 				scalararg = (Expr *) linitial(opexpr->args);
@@ -1165,65 +1162,103 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				if (aclresult != ACLCHECK_OK)
 					aclcheck_error(aclresult, OBJECT_FUNCTION,
 								   get_func_name(opexpr->opfuncid));
+				InvokeFunctionExecuteHook(opexpr->opfuncid);
 
 				/* Set up the primary fmgr lookup information */
 				finfo = palloc0(sizeof(FmgrInfo));
 				fcinfo = palloc0(SizeForFunctionCallInfo(2));
-				func = opexpr->opfuncid;
+				fmgr_info(opexpr->opfuncid, finfo);
+				fmgr_info_set_expr((Node *) node, finfo);
+				InitFunctionCallInfoData(*fcinfo, finfo, 2,
+										 opexpr->inputcollid, NULL, NULL);
+
+				/* Evaluate scalar directly into left function argument */
+				ExecInitExprRec(scalararg, state,
+								&fcinfo->args[0].value, &fcinfo->args[0].isnull);
 
 				/*
-				 * If we have a constant array and want OR semantics, then we
-				 * implement the op with a hash lookup instead of looping
-				 * through the entire array for each execution.
+				 * Evaluate array argument into our return value.  There's no
+				 * danger in that, because the return value is guaranteed to
+				 * be overwritten by EEOP_SCALARARRAYOP, and will not be
+				 * passed to any other expression.
 				 */
-				if (opexpr->useOr && arrayarg && IsA(arrayarg, Const) &&
-					!((Const *) arrayarg)->constisnull)
-				{
-					Datum		arrdatum = ((Const *) arrayarg)->constvalue;
-					ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
-					int			nitems;
+				ExecInitExprRec(arrayarg, state, resv, resnull);
 
-					/*
-					 * Only do the optimization if we have a large enough
-					 * array to make it worth it.
-					 */
-					nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
-					if (nitems >= MIN_ARRAY_SIZE_FOR_SAOP_HASH)
-					{
-						Oid			hash_func;
+				/* And perform the operation */
+				scratch.opcode = EEOP_SCALARARRAYOP;
+				scratch.d.scalararrayop.element_type = InvalidOid;
+				scratch.d.scalararrayop.useOr = opexpr->useOr;
+				scratch.d.scalararrayop.finfo = finfo;
+				scratch.d.scalararrayop.fcinfo_data = fcinfo;
+				scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
+				ExprEvalPushStep(state, &scratch);
+				break;
+			}
 
-						/*
-						 * Find the hash op that matches the originally planned
-						 * equality op. If we don't have one, we'll just fall
-						 * back to the default linear scan implementation.
-						 */
-						useHash = get_op_hash_functions(opexpr->opno, NULL, &hash_func);
+		case T_HashedScalarArrayOpExpr:
+			{
+				HashedScalarArrayOpExpr *hashsaop = (HashedScalarArrayOpExpr *) node;
+				ScalarArrayOpExpr *saop = hashsaop->saop;
+				Oid			func;
+				Expr	   *scalararg;
+				Const	   *array;
+				FmgrInfo   *finfo;
+				FunctionCallInfo fcinfo;
+				AclResult	aclresult;
+				Oid			hash_func;
+				FmgrInfo   *hash_finfo;
+				FunctionCallInfo hash_fcinfo;
 
-						if (useHash)
-						{
-							FmgrInfo   *hash_finfo;
-							FunctionCallInfo hash_fcinfo;
-
-							hash_finfo = palloc0(sizeof(FmgrInfo));
-							hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
-							fmgr_info(hash_func, hash_finfo);
-							fmgr_info_set_expr((Node *) node, hash_finfo);
-							InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
-													 opexpr->inputcollid, NULL, NULL);
-							InvokeFunctionExecuteHook(hash_func);
-
-							scratch.d.scalararrayhashedop.hash_finfo = hash_finfo;
-							scratch.d.scalararrayhashedop.hash_fcinfo_data = hash_fcinfo;
-							scratch.d.scalararrayhashedop.hash_fn_addr = hash_finfo->fn_addr;
-						}
-					}
-				}
+				Assert(list_length(saop->args) == 2);
+				scalararg = (Expr *) linitial(saop->args);
+				array = (Const *) lsecond(saop->args);
+
+				/* Check permission to call function */
+				aclresult = pg_proc_aclcheck(saop->opfuncid,
+											 GetUserId(),
+											 ACL_EXECUTE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, OBJECT_FUNCTION,
+								   get_func_name(saop->opfuncid));
+
+				/* Set up the primary fmgr lookup information */
+				finfo = palloc0(sizeof(FmgrInfo));
+				fcinfo = palloc0(SizeForFunctionCallInfo(2));
+				func = saop->opfuncid;
+
+				/*
+				 * Make sure the planner didn't make a HashedScalarArrayOpExpr
+				 * when it shouldn't have.
+				 */
+				Assert(saop->useOr);
+				Assert(IsA(array, Const));
+				Assert(!array->constisnull);
+
+				/*
+				 * Find the hash op that matches the originally planned
+				 * equality op.
+				 */
+				if (!get_op_hash_functions(saop->opno, NULL, &hash_func))
+					elog(ERROR, "could not find hash function for hash operator %u",
+						saop->opno);
+
+				hash_finfo = palloc0(sizeof(FmgrInfo));
+				hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
+				fmgr_info(hash_func, hash_finfo);
+				fmgr_info_set_expr((Node *) node, hash_finfo);
+				InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
+										 saop->inputcollid, NULL, NULL);
+				InvokeFunctionExecuteHook(hash_func);
+
+				scratch.d.hashedscalararrayop.hash_finfo = hash_finfo;
+				scratch.d.hashedscalararrayop.hash_fcinfo_data = hash_fcinfo;
+				scratch.d.hashedscalararrayop.hash_fn_addr = hash_finfo->fn_addr;
 
 				InvokeFunctionExecuteHook(func);
 				fmgr_info(func, finfo);
 				fmgr_info_set_expr((Node *) node, finfo);
 				InitFunctionCallInfoData(*fcinfo, finfo, 2,
-										 opexpr->inputcollid, NULL, NULL);
+										 saop->inputcollid, NULL, NULL);
 
 				/* Evaluate scalar directly into left function argument */
 				ExecInitExprRec(scalararg, state,
@@ -1232,28 +1267,16 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				/*
 				 * Evaluate array argument into our return value.  There's no
 				 * danger in that, because the return value is guaranteed to
-				 * be overwritten by EEOP_SCALARARRAYOP[_HASHED], and will
-				 * not be passed to any other expression.
+				 * be overwritten by EEOP_HASHED_SCALARARRAYOP, and will not
+				 * be passed to any other expression.
 				 */
-				ExecInitExprRec(arrayarg, state, resv, resnull);
+				ExecInitExprRec((Expr *) array, state, resv, resnull);
 
 				/* And perform the operation */
-				if (useHash)
-				{
-					scratch.opcode = EEOP_SCALARARRAYOP_HASHED;
-					scratch.d.scalararrayhashedop.finfo = finfo;
-					scratch.d.scalararrayhashedop.fcinfo_data = fcinfo;
-					scratch.d.scalararrayhashedop.fn_addr = finfo->fn_addr;
-				}
-				else
-				{
-					scratch.opcode = EEOP_SCALARARRAYOP;
-					scratch.d.scalararrayop.element_type = InvalidOid;
-					scratch.d.scalararrayop.useOr = opexpr->useOr;
-					scratch.d.scalararrayop.finfo = finfo;
-					scratch.d.scalararrayop.fcinfo_data = fcinfo;
-					scratch.d.scalararrayop.fn_addr = finfo->fn_addr;
-				}
+				scratch.opcode = EEOP_HASHED_SCALARARRAYOP;
+				scratch.d.hashedscalararrayop.finfo = finfo;
+				scratch.d.hashedscalararrayop.fcinfo_data = fcinfo;
+				scratch.d.hashedscalararrayop.fn_addr = finfo->fn_addr;
 				ExprEvalPushStep(state, &scratch);
 				break;
 			}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index d7968b299d..fd059377e3 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -76,7 +76,6 @@
 #include "utils/timestamp.h"
 #include "utils/typcache.h"
 #include "utils/xml.h"
-#include "lib/qunique.h"
 
 /*
  * Use computed-goto-based opcode dispatch when computed gotos are available.
@@ -184,9 +183,9 @@ saop_hash_element_match(struct saophash_hash *tb, Datum key1, Datum key2);
 static uint32 saop_element_hash(struct saophash_hash *tb, Datum key);
 
 /*
- * Define parameters for ScalarArrayOpExpr hash table code generation. The interface is
- * *also* declared in execnodes.h (to generate the types, which are externally
- * visible).
+ * Define parameters for ScalarArrayOpExpr hash table code generation. The
+ * interface is *also* declared in execnodes.h (to generate the types, which
+ * are externally visible).
  */
 #define SH_PREFIX saophash
 #define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntryData
@@ -448,7 +447,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_DOMAIN_CHECK,
 		&&CASE_EEOP_CONVERT_ROWTYPE,
 		&&CASE_EEOP_SCALARARRAYOP,
-		&&CASE_EEOP_SCALARARRAYOP_HASHED,
+		&&CASE_EEOP_HASHED_SCALARARRAYOP,
 		&&CASE_EEOP_XMLEXPR,
 		&&CASE_EEOP_AGGREF,
 		&&CASE_EEOP_GROUPING_FUNC,
@@ -1459,10 +1458,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
-		EEO_CASE(EEOP_SCALARARRAYOP_HASHED)
+		EEO_CASE(EEOP_HASHED_SCALARARRAYOP)
 		{
 			/* too complex for an inline implementation */
-			ExecEvalScalarArrayOpHashed(state, op, econtext);
+			ExecEvalHashedScalarArrayOp(state, op, econtext);
 
 			EEO_NEXT();
 		}
@@ -3386,13 +3385,13 @@ static uint32
 saop_element_hash(struct saophash_hash *tb, Datum key)
 {
 	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
-	FunctionCallInfo fcinfo = elements_tab->op->d.scalararrayhashedop.hash_fcinfo_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.hashedscalararrayop.hash_fcinfo_data;
 	Datum hash;
 
 	fcinfo->args[0].value = key;
 	fcinfo->args[0].isnull = false;
 
-	hash = elements_tab->op->d.scalararrayhashedop.hash_fn_addr(fcinfo);
+	hash = elements_tab->op->d.hashedscalararrayop.hash_fn_addr(fcinfo);
 
 	return DatumGetUInt32(hash);
 }
@@ -3407,14 +3406,14 @@ saop_hash_element_match(struct saophash_hash *tb, Datum key1, Datum key2)
 	Datum result;
 
 	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
-	FunctionCallInfo fcinfo = elements_tab->op->d.scalararrayhashedop.fcinfo_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.hashedscalararrayop.fcinfo_data;
 
 	fcinfo->args[0].value = key1;
 	fcinfo->args[0].isnull = false;
 	fcinfo->args[1].value = key2;
 	fcinfo->args[1].isnull = false;
 
-	result = elements_tab->op->d.scalararrayhashedop.fn_addr(fcinfo);
+	result = elements_tab->op->d.hashedscalararrayop.fn_addr(fcinfo);
 
 	return DatumGetBool(result);
 }
@@ -3432,11 +3431,11 @@ saop_hash_element_match(struct saophash_hash *tb, Datum key1, Datum key2)
  * The operator always yields boolean.
  */
 void
-ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
-	ScalarArrayOpExprHashTable elements_tab = op->d.scalararrayhashedop.elements_tab;
-	FunctionCallInfo fcinfo = op->d.scalararrayhashedop.fcinfo_data;
-	bool		strictfunc = op->d.scalararrayhashedop.finfo->fn_strict;
+	ScalarArrayOpExprHashTable elements_tab = op->d.hashedscalararrayop.elements_tab;
+	FunctionCallInfo fcinfo = op->d.hashedscalararrayop.fcinfo_data;
+	bool		strictfunc = op->d.hashedscalararrayop.finfo->fn_strict;
 	ArrayType  *arr;
 	Datum		scalar = fcinfo->args[0].value;
 	bool		scalar_isnull = fcinfo->args[0].isnull;
@@ -3457,7 +3456,7 @@ ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *eco
 		return;
 	}
 
-	/* Preprocess the array the first time we execute the op. */
+	/* Build the hash table on first evaluation */
 	if (elements_tab == NULL)
 	{
 		int16		typlen;
@@ -3481,7 +3480,7 @@ ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *eco
 		oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory);
 
 		elements_tab = (ScalarArrayOpExprHashTable) palloc(sizeof(ScalarArrayOpExprHashTableData));
-		op->d.scalararrayhashedop.elements_tab = elements_tab;
+		op->d.hashedscalararrayop.elements_tab = elements_tab;
 		elements_tab->op = op;
 		elements_tab->hashtab = saophash_create(CurrentMemoryContext, nitems, elements_tab);
 
@@ -3524,7 +3523,7 @@ ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *eco
 		 * Remember if we had any nulls so that we know if we need to execute
 		 * non-strict functions with a null lhs value if no match is found.
 		 */
-		op->d.scalararrayhashedop.has_nulls = num_nulls > 0;
+		op->d.hashedscalararrayop.has_nulls = num_nulls > 0;
 
 		/*
 		 * We only setup a binary search op if we have > 8 elements, so we don't
@@ -3544,7 +3543,7 @@ ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *eco
 	 * the possibility of null values (we've previously removed them from the
 	 * array).
 	 */
-	if (!DatumGetBool(result) && op->d.scalararrayhashedop.has_nulls)
+	if (!DatumGetBool(result) && op->d.hashedscalararrayop.has_nulls)
 	{
 		if (strictfunc)
 		{
@@ -3570,7 +3569,7 @@ ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *eco
 			fcinfo->args[1].value = (Datum) 0;
 			fcinfo->args[1].isnull = true;
 
-			result = op->d.scalararrayhashedop.fn_addr(fcinfo);
+			result = op->d.hashedscalararrayop.fn_addr(fcinfo);
 			resultnull = fcinfo->isnull;
 		}
 	}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 163242f54e..e5c230109b 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1624,6 +1624,12 @@ ExecRelCheck(ResultRelInfo *resultRelInfo,
 			Expr	   *checkconstr;
 
 			checkconstr = stringToNode(check[i].ccbin);
+			/*
+			 * XXX consider passing checkconstr through
+			 * convert_saop_to_hashed_saop?  We'd need to only do this when
+			 * performing bulk inserts, and there does not currently seem to
+			 * be any way to determine that.
+			 */
 			resultRelInfo->ri_ConstraintExprs[i] =
 				ExecPrepareExpr(checkconstr, estate);
 		}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ad729d10a8..f6197f485a 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1276,6 +1276,16 @@ _copyPlanRowMark(const PlanRowMark *from)
 	return newnode;
 }
 
+static HashedScalarArrayOpExpr *
+_copyHashedScalarArrayOpExpr(const HashedScalarArrayOpExpr *from)
+{
+	HashedScalarArrayOpExpr *newnode = makeNode(HashedScalarArrayOpExpr);
+
+	COPY_NODE_FIELD(saop);
+
+	return newnode;
+}
+
 static PartitionPruneInfo *
 _copyPartitionPruneInfo(const PartitionPruneInfo *from)
 {
@@ -5091,6 +5101,9 @@ copyObjectImpl(const void *from)
 		case T_PlanRowMark:
 			retval = _copyPlanRowMark(from);
 			break;
+		case T_HashedScalarArrayOpExpr:
+			retval = _copyHashedScalarArrayOpExpr(from);
+			break;
 		case T_PartitionPruneInfo:
 			retval = _copyPartitionPruneInfo(from);
 			break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 860e9a2a06..82cd29bb2c 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -86,6 +86,9 @@ exprType(const Node *expr)
 		case T_ScalarArrayOpExpr:
 			type = BOOLOID;
 			break;
+		case T_HashedScalarArrayOpExpr:
+			type = BOOLOID;
+			break;
 		case T_BoolExpr:
 			type = BOOLOID;
 			break;
@@ -802,10 +805,13 @@ exprCollation(const Node *expr)
 			coll = ((const NullIfExpr *) expr)->opcollid;
 			break;
 		case T_ScalarArrayOpExpr:
-			coll = InvalidOid;	/* result is always boolean */
+			coll = InvalidOid;	/* result is always InvalidOid */
+			break;
+		case T_HashedScalarArrayOpExpr:
+			coll = InvalidOid;	/* result is always InvalidOid */
 			break;
 		case T_BoolExpr:
-			coll = InvalidOid;	/* result is always boolean */
+			coll = InvalidOid;	/* result is always InvalidOid */
 			break;
 		case T_SubLink:
 			{
@@ -1050,10 +1056,10 @@ exprSetCollation(Node *expr, Oid collation)
 			((NullIfExpr *) expr)->opcollid = collation;
 			break;
 		case T_ScalarArrayOpExpr:
-			Assert(!OidIsValid(collation)); /* result is always boolean */
+			Assert(!OidIsValid(collation)); /* always InvalidOid */
 			break;
 		case T_BoolExpr:
-			Assert(!OidIsValid(collation)); /* result is always boolean */
+			Assert(!OidIsValid(collation)); /* always InvalidOid */
 			break;
 		case T_SubLink:
 #ifdef USE_ASSERT_CHECKING
@@ -2012,6 +2018,15 @@ expression_tree_walker(Node *node,
 					return true;
 			}
 			break;
+		case T_HashedScalarArrayOpExpr:
+			{
+				HashedScalarArrayOpExpr *expr = (HashedScalarArrayOpExpr *) node;
+
+				if (expression_tree_walker((Node *) expr->saop,
+					walker, context))
+					return true;
+			}
+			break;
 		case T_BoolExpr:
 			{
 				BoolExpr   *expr = (BoolExpr *) node;
@@ -2777,6 +2792,16 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
+		case T_HashedScalarArrayOpExpr:
+			{
+				HashedScalarArrayOpExpr *expr = (HashedScalarArrayOpExpr *) node;
+				HashedScalarArrayOpExpr *newnode;
+
+				FLATCOPY(newnode, expr, HashedScalarArrayOpExpr);
+				MUTATE(newnode->saop, expr->saop, ScalarArrayOpExpr *);
+				return (Node *) newnode;
+			}
+			break;
 		case T_BoolExpr:
 			{
 				BoolExpr   *expr = (BoolExpr *) node;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index fa8f65fbc5..74789485ee 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -987,6 +987,14 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node)
 	WRITE_BOOL_FIELD(isParent);
 }
 
+static void
+_outHashedScalarArrayOpExpr(StringInfo str, const HashedScalarArrayOpExpr *node)
+{
+	WRITE_NODE_TYPE("HASHEDSCALARARRAYOPEXPR");
+
+	WRITE_NODE_FIELD(saop);
+}
+
 static void
 _outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node)
 {
@@ -3968,6 +3976,9 @@ outNode(StringInfo str, const void *obj)
 			case T_PlanRowMark:
 				_outPlanRowMark(str, obj);
 				break;
+			case T_HashedScalarArrayOpExpr:
+				_outHashedScalarArrayOpExpr(str, obj);
+				break;
 			case T_PartitionPruneInfo:
 				_outPartitionPruneInfo(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index ecce23b747..bc4dfe0ce7 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2523,6 +2523,16 @@ _readPlanRowMark(void)
 	READ_DONE();
 }
 
+static HashedScalarArrayOpExpr *
+_readHashedScalarArrayOpExpr(void)
+{
+	READ_LOCALS(HashedScalarArrayOpExpr);
+
+	READ_NODE_FIELD(saop);
+
+	READ_DONE();
+}
+
 static PartitionPruneInfo *
 _readPartitionPruneInfo(void)
 {
@@ -2949,6 +2959,8 @@ parseNodeString(void)
 		return_value = _readNestLoopParam();
 	else if (MATCH("PLANROWMARK", 11))
 		return_value = _readPlanRowMark();
+	else if (MATCH("HASHEDSCALARARRAYOPEXPR", 23))
+		return_value = _readHashedScalarArrayOpExpr();
 	else if (MATCH("PARTITIONPRUNEINFO", 18))
 		return_value = _readPartitionPruneInfo();
 	else if (MATCH("PARTITIONEDRELPRUNEINFO", 23))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 898d7fcb0b..0ccb7638c2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1110,6 +1110,15 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 #endif
 	}
 
+	/*
+	 * Check for ScalarArrayOpExpr and check if any can be converted into
+	 * HashedScalarArrayOpExpr for increased repeat evaulation performance.
+	 */
+	if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
+	{
+		expr = convert_saop_to_hashed_saop(expr);
+	}
+
 	/* Expand SubLinks to SubPlans */
 	if (root->parse->hasSubLinks)
 		expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index bea1cc4d67..bbd6954448 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -106,6 +106,7 @@ static bool contain_leaked_vars_walker(Node *node, void *context);
 static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
 static List *find_nonnullable_vars_walker(Node *node, bool top_level);
 static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK);
+static Node *convert_saop_to_hashed_saop_mutator(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node,
 											eval_const_expressions_context *context);
 static bool contain_non_const_walker(Node *node, void *context);
@@ -2101,6 +2102,75 @@ eval_const_expressions(PlannerInfo *root, Node *node)
 	return eval_const_expressions_mutator(node, &context);
 }
 
+#define MIN_ARRAY_SIZE_FOR_HASHED_SAOP 9
+/*--------------------
+ * convert_saop_to_hashed_saop
+ *
+ * Recursively search 'node' for ScalarArrayOpExprs and convert any eligible
+ * ScalarArrayOpExprs into HashedScalarArrayOpExpr.
+ *
+ * The ScalarArrayOpExpr is eligible for conversion if:
+ * 1. The 2nd argument of the array does not contain any Vars, Params or
+ *	  volatile functions.
+ * 2. There's valid hash function for the given type.
+ * 3. If the array contains enough elements for us to consider it to be
+ *	  worthwhile using the hashed version of ScalarArrayOpExprs rather than
+ *	  the traditional version.
+ */
+Node *
+convert_saop_to_hashed_saop(Node *node)
+{
+	return convert_saop_to_hashed_saop_mutator(node, NULL);
+}
+
+static Node *
+convert_saop_to_hashed_saop_mutator(Node *node, void *context)
+{
+	if (node == NULL)
+		return NULL;
+
+	if (IsA(node, ScalarArrayOpExpr))
+	{
+		ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+		Expr	   *arrayarg = (Expr *) lsecond(saop->args);
+
+		if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
+			!((Const *) arrayarg)->constisnull &&
+			op_hashjoinable(saop->opno, exprType((Node *) arrayarg)))
+		{
+			Datum		arrdatum = ((Const *) arrayarg)->constvalue;
+			ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
+			int			nitems;
+
+			/*
+			 * Only do the conversion if we have a large enough array to make
+			 * hashing worthwhile.
+			 */
+			nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+
+			if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
+			{
+				HashedScalarArrayOpExpr *hashedsaop;
+
+				/*
+				 * HashedScalarArrayOpExpr is a simple wrapper around
+				 * ScalarArrayOpExpr to inform the executor to build a hash
+				 * table for the ScalarArrayOpExpr.
+				 */
+				hashedsaop = makeNode(HashedScalarArrayOpExpr);
+				hashedsaop->saop = saop;
+
+				return (Node *) hashedsaop;
+			}
+		}
+
+		return (Node *) saop;
+	}
+
+	return expression_tree_mutator(node, convert_saop_to_hashed_saop, NULL);
+}
+
+
 /*--------------------
  * estimate_expression_value
  *
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 254e8f3050..8fc19fb381 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -8330,6 +8330,38 @@ get_rule_expr(Node *node, deparse_context *context,
 			}
 			break;
 
+		case T_HashedScalarArrayOpExpr:
+		{
+			HashedScalarArrayOpExpr *hsaop = (HashedScalarArrayOpExpr *) node;
+			ScalarArrayOpExpr *expr = hsaop->saop;
+			List	   *args = expr->args;
+			Node	   *arg1 = (Node *) linitial(args);
+			Node	   *arg2 = (Node *) lsecond(args);
+
+			Assert(expr->useOr);
+
+			if (!PRETTY_PAREN(context))
+				appendStringInfoChar(buf, '(');
+			get_rule_expr_paren(arg1, context, true, node);
+
+			/*
+			 * XXX Syntax: Should this look like a ScalarArrayOpExpr or
+			 * should we make it look slightly different? Or should this
+			 * output always be reparseable?
+			 */
+			appendStringInfo(buf, " %s %s (",
+				generate_operator_name(expr->opno,
+					exprType(arg1),
+					get_base_element_type(exprType(arg2))), "HASH ANY");
+			get_rule_expr_paren(arg2, context, true, node);
+
+			Assert(!IsA(arg2, SubLink));
+			appendStringInfoChar(buf, ')');
+			if (!PRETTY_PAREN(context))
+				appendStringInfoChar(buf, ')');
+		}
+		break;
+
 		case T_BoolExpr:
 			{
 				BoolExpr   *expr = (BoolExpr *) node;
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index dc298e3597..88e85db52c 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -242,7 +242,7 @@ typedef enum ExprEvalOp
 	/* evaluate assorted special-purpose expression types */
 	EEOP_CONVERT_ROWTYPE,
 	EEOP_SCALARARRAYOP,
-	EEOP_SCALARARRAYOP_HASHED,
+	EEOP_HASHED_SCALARARRAYOP,
 	EEOP_XMLEXPR,
 	EEOP_AGGREF,
 	EEOP_GROUPING_FUNC,
@@ -579,7 +579,7 @@ typedef struct ExprEvalStep
 			PGFunction	fn_addr;	/* actual call address */
 		}			scalararrayop;
 
-		/* for EEOP_SCALARARRAYOP_HASHED */
+		/* for EEOP_HASHED_SCALARARRAYOP */
 		struct
 		{
 			bool		has_nulls;
@@ -592,7 +592,7 @@ typedef struct ExprEvalStep
 			FunctionCallInfo hash_fcinfo_data;	/* arguments etc */
 			/* faster to access without additional indirection: */
 			PGFunction	hash_fn_addr;	/* actual call address */
-		}			scalararrayhashedop;
+		}			hashedscalararrayop;
 
 		/* for EEOP_XMLEXPR */
 		struct
@@ -765,7 +765,8 @@ extern void ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op,
 extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op,
 								   ExprContext *econtext);
 extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op);
-extern void ExecEvalScalarArrayOpHashed(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
+extern void ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op,
+										ExprContext *econtext);
 extern void ExecEvalConstraintNotNull(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalConstraintCheck(ExprState *state, ExprEvalStep *op);
 extern void ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 2051abbbf9..547f9c94c0 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -90,6 +90,7 @@ typedef enum NodeTag
 	/* these aren't subclasses of Plan: */
 	T_NestLoopParam,
 	T_PlanRowMark,
+	T_HashedScalarArrayOpExpr,
 	T_PartitionPruneInfo,
 	T_PartitionedRelPruneInfo,
 	T_PartitionPruneStepOp,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 1678bd66fe..fb2cbf471f 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1124,6 +1124,20 @@ typedef struct PlanRowMark
 	bool		isParent;		/* true if this is a "dummy" parent entry */
 } PlanRowMark;
 
+/*
+ * HashedScalarArrayOpExpr
+ *	   Hashed version of a ScalarArrayOpExpr
+ *
+ * These are generated by convert_saop_to_hashed_saop() in order to allow
+ * faster repeat evaluation of ScalarArrayOpExpr by using a hash table when
+ * it's applicable to do so.  These are never generated during parse, hence
+ * this struct is here rather than in primnodes.h.
+ */
+typedef struct HashedScalarArrayOpExpr
+{
+	NodeTag		type;
+	ScalarArrayOpExpr	*saop;
+} HashedScalarArrayOpExpr;
 
 /*
  * Node types to represent partition pruning information.
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index d587952b7d..813801dfb1 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -146,6 +146,8 @@ extern bool contain_volatile_functions_not_nextval(Node *clause);
 
 extern Node *eval_const_expressions(PlannerInfo *root, Node *node);
 
+extern Node *convert_saop_to_hashed_saop(Node *node);
+
 extern Node *estimate_expression_value(PlannerInfo *root, Node *node);
 
 extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 42e1c0d1f2..8d66904d69 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -159,7 +159,7 @@ select count(*) from date_tbl
 (1 row)
 
 --
--- Tests for ScalarArrayOpExpr hash optimization
+-- Tests for HashedScalarArrayOpExpr
 --
 select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
  ?column? 
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index ef2738067b..5a4689e264 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -67,7 +67,7 @@ select count(*) from date_tbl
   where f1 not between symmetric '1997-01-01' and '1998-01-01';
 
 --
--- Tests for ScalarArrayOpExpr hash optimization
+-- Tests for HashedScalarArrayOpExpr
 --
 
 select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
-- 
2.21.0.windows.1

