From 3822a9ee6f882cf5ed51dcbbef87742debd88160 Mon Sep 17 00:00:00 2001
From: Sami Imseih <simseih@amazon.com>
Date: Mon, 26 May 2025 22:17:24 -0500
Subject: [PATCH v7 4/4] Support Squashing of External Parameters

62d712ec introduced the concept of element squashing for
quwry normalization purposes. However, it did not account for
external parameters passed to a list of elements. This adds
support to these types of values and simplifies the squashing
logic further.

Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467
---
 .../pg_stat_statements/expected/squashing.out |  24 ++--
 .../pg_stat_statements/pg_stat_statements.c   |   7 +
 contrib/pg_stat_statements/sql/squashing.sql  |   4 +-
 src/backend/nodes/queryjumblefuncs.c          | 126 +++++++++++-------
 src/include/nodes/primnodes.h                 |   6 +-
 src/include/nodes/queryjumble.h               |   3 +
 6 files changed, 109 insertions(+), 61 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index 5376700fef8..ad282ac2b83 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -103,7 +103,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
--- external parameters will not be squashed
+-- external parameters will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
@@ -123,14 +123,14 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                                   query                                   | calls 
----------------------------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5)                |     1
- SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t                        |     1
+                                query                                 | calls 
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)                |     1
+ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1 /*, ... */]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                   |     1
 (3 rows)
 
--- neither are prepared statements
+-- prepared statements will also be squashed
 -- the IN and ARRAY forms of this statement will have the same queryId
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
@@ -155,11 +155,11 @@ EXECUTE p1(1, 2, 3, 4, 5);
 
 DEALLOCATE p1;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                           query                            | calls 
-------------------------------------------------------------+-------
- DEALLOCATE $1                                              |     2
- SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) |     2
- SELECT pg_stat_statements_reset() IS NOT NULL AS t         |     1
+                         query                         | calls 
+-------------------------------------------------------+-------
+ DEALLOCATE $1                                         |     2
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) |     2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
 (3 rows)
 
 -- More conditions in the query
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 8cadfa2ff21..69d69db2289 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2834,6 +2834,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	 */
 	norm_query_buflen = query_len + jstate->clocations_count * 10;
 
+	if (jstate->has_squashed_lists)
+		jstate->highest_extern_param_id = 0;
+
 	/* Allocate result buffer */
 	norm_query = palloc(norm_query_buflen + 1);
 
@@ -2842,6 +2845,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
 
+		if (jstate->clocations[i].extern_param &&
+			!jstate->has_squashed_lists)
+			continue;
+
 		off = jstate->clocations[i].location;
 
 		/* Adjust recorded location if we're dealing with partial string */
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 85aae152da8..4efd412be9b 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -32,7 +32,7 @@ SELECT WHERE 1 IN (1, int4(1), int4(2), 2);
 SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
--- external parameters will not be squashed
+-- external parameters will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5)  \bind 1 2 3 4 5
 ;
@@ -40,7 +40,7 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind
 ;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
--- neither are prepared statements
+-- prepared statements will also be squashed
 -- the IN and ARRAY forms of this statement will have the same queryId
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 PREPARE p1(int, int, int, int, int) AS
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 219023b1173..fdd6ef38f08 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -61,7 +61,7 @@ static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
 static void RecordExpressionLocation(JumbleState *jstate,
-									 int location, int len);
+									 int location, int len, bool extern_param);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -70,6 +70,7 @@ static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
 									  RangeTblEntry *rte,
 									  Alias *expr);
+static void _jumbleParam(JumbleState *jstate, Node *node);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -185,6 +186,7 @@ InitJumble(void)
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
 	jstate->pending_nulls = 0;
+	jstate->has_squashed_lists = false;
 #ifdef USE_ASSERT_CHECKING
 	jstate->total_jumble_len = 0;
 #endif
@@ -381,7 +383,7 @@ FlushPendingNulls(JumbleState *jstate)
  * element contributes nothing to the jumble hash.
  */
 static void
-RecordExpressionLocation(JumbleState *jstate, int location, int len)
+RecordExpressionLocation(JumbleState *jstate, int location, int len, bool extern_param)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -405,10 +407,29 @@ RecordExpressionLocation(JumbleState *jstate, int location, int len)
 		 */
 		jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1;
 		jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false;
+		jstate->clocations[jstate->clocations_count].extern_param = extern_param;
 		jstate->clocations_count++;
 	}
 }
 
+/*
+ * Subroutine for IsSquashableExpression to check if a node is a
+ * constant or External Parameter.
+ */
+static bool
+IsConstOrExternalParam(Node *node)
+{
+	switch (nodeTag(node))
+	{
+		case T_Const:
+			return true;
+		case T_Param:
+			return ((Param *) node)->paramkind == PARAM_EXTERN;
+		default:
+			return false;
+	}
+}
+
 /*
  * Subroutine for _jumbleElements: Verify a few simple cases where we can
  * deduce that the expression is a constant:
@@ -421,42 +442,48 @@ RecordExpressionLocation(JumbleState *jstate, int location, int len)
 static bool
 IsSquashableExpression(Node *element)
 {
+	ListCell   *temp;
+
+	/* Unwrap RelabelType and CoerceViaIO layers */
 	if (IsA(element, RelabelType))
 		element = (Node *) ((RelabelType *) element)->arg;
 
 	if (IsA(element, CoerceViaIO))
 		element = (Node *) ((CoerceViaIO *) element)->arg;
 
-	if (IsA(element, FuncExpr))
+	switch (nodeTag(element))
 	{
-		FuncExpr   *func = (FuncExpr *) element;
-		ListCell   *temp;
+		case T_FuncExpr:
+			{
+				FuncExpr   *func = (FuncExpr *) element;
 
-		if (func->funcformat != COERCE_IMPLICIT_CAST &&
-			func->funcformat != COERCE_EXPLICIT_CAST)
-			return false;
+				/*
+				 * Only implicit/explicit casts on built-in functions are
+				 * squashable
+				 */
+				if (func->funcformat != COERCE_IMPLICIT_CAST &&
+					func->funcformat != COERCE_EXPLICIT_CAST)
+					return false;
 
-		if (func->funcid > FirstGenbkiObjectId)
-			return false;
+				if (func->funcid > FirstGenbkiObjectId)
+					return false;
 
-		foreach(temp, func->args)
-		{
-			Node	   *arg = lfirst(temp);
+				/* All arguments must be constants or external parameters */
+				foreach(temp, func->args)
+				{
+					Node	   *arg = lfirst(temp);
 
-			if (!IsA(arg, Const))	/* XXX we could recurse here instead */
-				return false;
-		}
+					if (IsConstOrExternalParam(arg))
+						return true;
+				}
 
-		return true;
+				return false;
+			}
+		default:
+			return IsConstOrExternalParam(element);
 	}
-
-	if (!IsA(element, Const))
-		return false;
-
-	return true;
 }
 
-
 /*
  * Subroutine for _jumbleElements: Verify whether the provided list
  * can be squashed, meaning it contains only constant expressions.
@@ -493,7 +520,7 @@ IsSquashableExpressionList(List *elements)
 #define JUMBLE_ELEMENTS(list, node) \
 	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordExpressionLocation(jstate, expr->location, -1)
+	RecordExpressionLocation(jstate, expr->location, -1, false)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -544,8 +571,9 @@ _jumbleElements(JumbleState *jstate, List *elements, Node *node)
 			{
 				RecordExpressionLocation(jstate,
 										 aexpr->list_start + 1,
-										 (aexpr->list_end - aexpr->list_start) - 1);
+										 (aexpr->list_end - aexpr->list_start) - 1, false);
 				normalize_list = true;
+				jstate->has_squashed_lists = true;
 			}
 		}
 	}
@@ -597,26 +625,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
 			break;
 	}
 
-	/* Special cases to handle outside the automated code */
-	switch (nodeTag(expr))
-	{
-		case T_Param:
-			{
-				Param	   *p = (Param *) node;
-
-				/*
-				 * Update the highest Param id seen, in order to start
-				 * normalization correctly.
-				 */
-				if (p->paramkind == PARAM_EXTERN &&
-					p->paramid > jstate->highest_extern_param_id)
-					jstate->highest_extern_param_id = p->paramid;
-			}
-			break;
-		default:
-			break;
-	}
-
 	/* Ensure we added something to the jumble buffer */
 	Assert(jstate->total_jumble_len > prev_jumble_len);
 }
@@ -719,3 +727,31 @@ _jumbleRangeTblEntry_eref(JumbleState *jstate,
 	 */
 	JUMBLE_STRING(aliasname);
 }
+
+/*
+ * Custom query jumble function for _jumbleParam.
+ *
+ * Only external parameter locations outside of squashable lists are
+ * handled.
+ */
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+	Param	   *expr = (Param *) node;
+
+	JUMBLE_FIELD(paramkind);
+	JUMBLE_FIELD(paramid);
+	JUMBLE_FIELD(paramtype);
+
+	if (expr->paramkind == PARAM_EXTERN)
+	{
+		RecordExpressionLocation(jstate, expr->location, -1, true);
+
+		/*
+		 * Update the highest Param id seen, in order to start normalization
+		 * correctly.
+		 */
+		if (expr->paramid > jstate->highest_extern_param_id)
+			jstate->highest_extern_param_id = expr->paramid;
+	}
+}
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 773cdd880aa..99d2c019c4b 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -389,14 +389,16 @@ typedef enum ParamKind
 
 typedef struct Param
 {
+	pg_node_attr(custom_query_jumble)
+
 	Expr		xpr;
 	ParamKind	paramkind;		/* kind of parameter. See above */
 	int			paramid;		/* numeric ID for parameter */
 	Oid			paramtype;		/* pg_type OID of parameter's datatype */
 	/* typmod value, if known */
-	int32		paramtypmod pg_node_attr(query_jumble_ignore);
+	int32		paramtypmod;
 	/* OID of collation, or InvalidOid if none */
-	Oid			paramcollid pg_node_attr(query_jumble_ignore);
+	Oid			paramcollid;
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
 } Param;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index da7c7abed2e..a1a99023e42 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -29,6 +29,7 @@ typedef struct LocationLen
 	 * of squashed constants.
 	 */
 	bool		squashed;
+	bool		extern_param;
 } LocationLen;
 
 /*
@@ -62,6 +63,8 @@ typedef struct JumbleState
 	 */
 	unsigned int pending_nulls;
 
+	bool		has_squashed_lists;
+
 #ifdef USE_ASSERT_CHECKING
 	/* The total number of bytes added to the jumble buffer */
 	Size		total_jumble_len;
-- 
2.39.5 (Apple Git-154)

