From 1f28a748f6e47b625a9e902215b4ccc9fe474dda Mon Sep 17 00:00:00 2001
From: Sami Imseih <simseih@amazon.com>
Date: Mon, 26 May 2025 22:17:24 -0500
Subject: [PATCH v6 4/4] Support Squashing of External Parameters

62d712ec introduced the concept of element squashing for
quwry normalization purposes. However, it did not account for
external parameters passed to a list of elements. This adds
support to these types of values and simplifies the squashing
logic further.

Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467
---
 .../pg_stat_statements/expected/squashing.out | 36 +++++++-
 .../pg_stat_statements/pg_stat_statements.c   |  7 ++
 contrib/pg_stat_statements/sql/squashing.sql  | 12 +++
 src/backend/nodes/queryjumblefuncs.c          | 84 ++++++++++++-------
 src/include/nodes/primnodes.h                 |  6 +-
 src/include/nodes/queryjumble.h               |  3 +
 6 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index f3f212183a2..739c8888b3c 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -21,10 +21,16 @@ SELECT * FROM test_squash WHERE id IN (1, 2, 3);
 ----+------
 (0 rows)
 
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3) \bind 1 2 3
+;
+ id | data 
+----+------
+(0 rows)
+
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                          query                         | calls 
 -------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) |     1
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) |     2
  SELECT * FROM test_squash WHERE id IN ($1)            |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
 (3 rows)
@@ -44,10 +50,17 @@ SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 ----+------
 (0 rows)
 
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+	\bind 1 2 3 4 5 6 7 8 9 10 11
+;
+ id | data 
+----+------
+(0 rows)
+
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                                  query                                  | calls 
 ------------------------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)                  |     4
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)                  |     6
  SELECT * FROM test_squash WHERE id IN ($1)                             |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
  SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
@@ -70,17 +83,27 @@ SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data =
 ----+------
 (0 rows)
 
+-- external parameters and constants outside of a squashed list will have
+-- different node types and result in a different queryId
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
  id | data 
 ----+------
 (0 rows)
 
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) AND data = $12
+	\bind 1 2 3 4 5 6 7 8 9 10 11 2
+;
+ id | data 
+----+------
+(0 rows)
+
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                                 query                                | calls 
 ---------------------------------------------------------------------+-------
  SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) AND data = $2 |     3
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) AND data = $2 |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                  |     1
-(2 rows)
+(3 rows)
 
 -- built-in functions will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -93,10 +116,15 @@ SELECT WHERE 1 IN (1, 2, int4(1), int4(2));
 --
 (1 row)
 
+SELECT WHERE 1 IN ($1, $2, int4($3::int), int4($4::int)) \bind 1 2 1 2
+;
+--
+(1 row)
+
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
- SELECT WHERE $1 IN ($2 /*, ... */)                 |     1
+ SELECT WHERE $1 IN ($2 /*, ... */)                 |     2
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 8cadfa2ff21..69d69db2289 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2834,6 +2834,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	 */
 	norm_query_buflen = query_len + jstate->clocations_count * 10;
 
+	if (jstate->has_squashed_lists)
+		jstate->highest_extern_param_id = 0;
+
 	/* Allocate result buffer */
 	norm_query = palloc(norm_query_buflen + 1);
 
@@ -2842,6 +2845,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
 
+		if (jstate->clocations[i].extern_param &&
+			!jstate->has_squashed_lists)
+			continue;
+
 		off = jstate->clocations[i].location;
 
 		/* Adjust recorded location if we're dealing with partial string */
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index aed4e42286c..1df11e5a220 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -11,11 +11,16 @@ CREATE TABLE test_squash (id int, data int);
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT * FROM test_squash WHERE id IN (1);
 SELECT * FROM test_squash WHERE id IN (1, 2, 3);
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3) \bind 1 2 3
+;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+	\bind 1 2 3 4 5 6 7 8 9 10 11
+;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- More conditions in the query
@@ -23,12 +28,19 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+-- external parameters and constants outside of a squashed list will have
+-- different node types and result in a different queryId
 SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) AND data = $12
+	\bind 1 2 3 4 5 6 7 8 9 10 11 2
+;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- built-in functions will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT WHERE 1 IN (1, 2, int4(1), int4(2));
+SELECT WHERE 1 IN ($1, $2, int4($3::int), int4($4::int)) \bind 1 2 1 2
+;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- Multiple squashed intervals
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 219023b1173..c13598e6757 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -61,7 +61,7 @@ static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
 static void RecordExpressionLocation(JumbleState *jstate,
-									 int location, int len);
+									 int location, int len, bool extern_param);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -70,6 +70,7 @@ static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
 									  RangeTblEntry *rte,
 									  Alias *expr);
+static void _jumbleParam(JumbleState *jstate, Node *node);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -185,6 +186,7 @@ InitJumble(void)
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
 	jstate->pending_nulls = 0;
+	jstate->has_squashed_lists = false;
 #ifdef USE_ASSERT_CHECKING
 	jstate->total_jumble_len = 0;
 #endif
@@ -381,7 +383,7 @@ FlushPendingNulls(JumbleState *jstate)
  * element contributes nothing to the jumble hash.
  */
 static void
-RecordExpressionLocation(JumbleState *jstate, int location, int len)
+RecordExpressionLocation(JumbleState *jstate, int location, int len, bool extern_param)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -405,6 +407,7 @@ RecordExpressionLocation(JumbleState *jstate, int location, int len)
 		 */
 		jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1;
 		jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false;
+		jstate->clocations[jstate->clocations_count].extern_param = extern_param;
 		jstate->clocations_count++;
 	}
 }
@@ -443,17 +446,39 @@ IsSquashableExpression(Node *element)
 		{
 			Node	   *arg = lfirst(temp);
 
-			if (!IsA(arg, Const))	/* XXX we could recurse here instead */
-				return false;
+			switch (nodeTag(arg))
+			{
+				case T_Const:
+					return true;
+				case T_Param:
+				{
+					Param	   *param = (Param *) element;
+
+					return param->paramkind == PARAM_EXTERN;
+				}
+				default:
+					break;
+			}
 		}
 
-		return true;
+		return false;
 	}
 
-	if (!IsA(element, Const))
-		return false;
+	switch (nodeTag(element))
+	{
+		case T_Const:
+			return true;
+		case T_Param:
+		{
+			Param	   *param = (Param *) element;
 
-	return true;
+			return param->paramkind == PARAM_EXTERN;
+		}
+		default:
+			break;
+	}
+
+	return false;
 }
 
 
@@ -493,7 +518,7 @@ IsSquashableExpressionList(List *elements)
 #define JUMBLE_ELEMENTS(list, node) \
 	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordExpressionLocation(jstate, expr->location, -1)
+	RecordExpressionLocation(jstate, expr->location, -1, false)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -544,8 +569,9 @@ _jumbleElements(JumbleState *jstate, List *elements, Node *node)
 			{
 				RecordExpressionLocation(jstate,
 										 aexpr->list_start + 1,
-										 (aexpr->list_end - aexpr->list_start) - 1);
+										 (aexpr->list_end - aexpr->list_start) - 1, false);
 				normalize_list = true;
+				jstate->has_squashed_lists = true;
 			}
 		}
 	}
@@ -597,26 +623,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
 			break;
 	}
 
-	/* Special cases to handle outside the automated code */
-	switch (nodeTag(expr))
-	{
-		case T_Param:
-			{
-				Param	   *p = (Param *) node;
-
-				/*
-				 * Update the highest Param id seen, in order to start
-				 * normalization correctly.
-				 */
-				if (p->paramkind == PARAM_EXTERN &&
-					p->paramid > jstate->highest_extern_param_id)
-					jstate->highest_extern_param_id = p->paramid;
-			}
-			break;
-		default:
-			break;
-	}
-
 	/* Ensure we added something to the jumble buffer */
 	Assert(jstate->total_jumble_len > prev_jumble_len);
 }
@@ -719,3 +725,21 @@ _jumbleRangeTblEntry_eref(JumbleState *jstate,
 	 */
 	JUMBLE_STRING(aliasname);
 }
+
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+	Param *expr = (Param *) node;
+
+	JUMBLE_FIELD(paramkind);
+	JUMBLE_FIELD(paramid);
+	JUMBLE_FIELD(paramtype);
+
+	if (expr->paramkind == PARAM_EXTERN)
+	{
+		RecordExpressionLocation(jstate, expr->location, -1, true);
+
+		if (expr->paramid > jstate->highest_extern_param_id)
+			jstate->highest_extern_param_id = expr->paramid;
+	}
+}
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 773cdd880aa..99d2c019c4b 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -389,14 +389,16 @@ typedef enum ParamKind
 
 typedef struct Param
 {
+	pg_node_attr(custom_query_jumble)
+
 	Expr		xpr;
 	ParamKind	paramkind;		/* kind of parameter. See above */
 	int			paramid;		/* numeric ID for parameter */
 	Oid			paramtype;		/* pg_type OID of parameter's datatype */
 	/* typmod value, if known */
-	int32		paramtypmod pg_node_attr(query_jumble_ignore);
+	int32		paramtypmod;
 	/* OID of collation, or InvalidOid if none */
-	Oid			paramcollid pg_node_attr(query_jumble_ignore);
+	Oid			paramcollid;
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
 } Param;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index da7c7abed2e..a1a99023e42 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -29,6 +29,7 @@ typedef struct LocationLen
 	 * of squashed constants.
 	 */
 	bool		squashed;
+	bool		extern_param;
 } LocationLen;
 
 /*
@@ -62,6 +63,8 @@ typedef struct JumbleState
 	 */
 	unsigned int pending_nulls;
 
+	bool		has_squashed_lists;
+
 #ifdef USE_ASSERT_CHECKING
 	/* The total number of bytes added to the jumble buffer */
 	Size		total_jumble_len;
-- 
2.39.5 (Apple Git-154)

