From 8eddb0684f57c2b3e9cd818766e4af384e55d668 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-230.ec2.internal>
Date: Fri, 24 Oct 2025 19:12:01 +0000
Subject: [PATCH v3 1/2] pg_stat_statements: Fix duplicate constant locations
 caused by composite row expansion

Fix duplicate constant locations caused by composite row expansion

Commit 0f65f3eec introduced squashing of constant lists, but failed to
handle row expansion of composite values correctly. This could result
in the same location being recorded multiple times, leading to
assertion failures in pg_stat_statements during
generate_normalized_query(), when attempting to normalize a duplicated
constant that does not actually exist in the query text.

This patch updates fill_in_constant_lengths() to check for squashed
constants only after skipping duplicates, and correctly updates
last_loc. Each location is now processed once while squashable
constants are still ignored.

An oversight in 0f65f3eec also meant duplicate locations were not
correctly set to -1. Prior to 0f65f3eec, RecordConstLocation always
set the length to -1 and delegated fill_in_constant_lengths() to
populate the lengths. Commit 0f65f3eec changed thi

Discussion: https://www.postgresql.org/message-id/2b91e358-0d99-43f7-be44-d2d4dbce37b3%40garret.ru
---
 .../pg_stat_statements/expected/squashing.out | 80 +++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements.c   | 18 +++--
 contrib/pg_stat_statements/sql/squashing.sql  | 26 ++++++
 3 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index f952f47ef7b..d5bb67c7222 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -809,6 +809,84 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  select where $1 IN ($2 /*, ... */)                 |     2
 (2 rows)
 
+-- composite function with row expansion
+create table test_composite(x integer);
+CREATE FUNCTION composite_f(a integer[], out x integer, out y integer) returns
+record as $$            begin
+        x = a[1];
+        y = a[2];
+    end;
+$$ language plpgsql;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ((composite_f(array[1, 2]))).* FROM test_composite;
+ x | y 
+---+---
+(0 rows)
+
+SELECT ((composite_f(array[1, 2, 3]))).* FROM test_composite;
+ x | y 
+---+---
+(0 rows)
+
+SELECT ((composite_f(array[1, 2, 3]))).*, 1, 2, 3, ((composite_f(array[1, 2, 3]))).*, 1, 2
+FROM test_composite
+WHERE x IN (1, 2, 3);
+ x | y | ?column? | ?column? | ?column? | x | y | ?column? | ?column? 
+---+---+----------+----------+----------+---+---+----------+----------
+(0 rows)
+
+SELECT ((composite_f(array[1, $1, 3]))).*, 1 FROM test_composite \bind 1
+;
+ x | y | ?column? 
+---+---+----------
+(0 rows)
+
+-- ROW() expression with row expansion
+SELECT (ROW(ARRAY[1,2])).*;
+  f1   
+-------
+ {1,2}
+(1 row)
+
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*;
+  f1   |   f2    
+-------+---------
+ {1,2} | {1,2,3}
+(1 row)
+
+SELECT 1, 2, (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*, 3, 4;
+ ?column? | ?column? |  f1   |   f2    | ?column? | ?column? 
+----------+----------+-------+---------+----------+----------
+        1 |        2 | {1,2} | {1,2,3} |        3 |        4
+(1 row)
+
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, $1, 3])).*, 1 \bind 1
+;
+  f1   |   f2    | ?column? 
+-------+---------+----------
+ {1,2} | {1,1,3} |        1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                                    query                                                    | calls 
+-------------------------------------------------------------------------------------------------------------+-------
+ SELECT $1, $2, (ROW(ARRAY[$3 /*, ... */], ARRAY[$4 /*, ... */])).*, $5, $6                                  |     1
+ SELECT ((composite_f(array[$1 /*, ... */]))).* FROM test_composite                                          |     2
+ SELECT ((composite_f(array[$1 /*, ... */]))).*, $2 FROM test_composite                                      |     1
+ SELECT ((composite_f(array[$1 /*, ... */]))).*, $2, $3, $4, ((composite_f(array[$5 /*, ... */]))).*, $6, $7+|     1
+ FROM test_composite                                                                                        +| 
+ WHERE x IN ($8 /*, ... */)                                                                                  | 
+ SELECT (ROW(ARRAY[$1 /*, ... */])).*                                                                        |     1
+ SELECT (ROW(ARRAY[$1 /*, ... */], ARRAY[$2 /*, ... */])).*                                                  |     1
+ SELECT (ROW(ARRAY[$1 /*, ... */], ARRAY[$2 /*, ... */])).*, $3                                              |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                                                          |     1
+(8 rows)
+
 --
 -- cleanup
 --
@@ -818,3 +896,5 @@ DROP TABLE test_squash_numeric;
 DROP TABLE test_squash_bigint;
 DROP TABLE test_squash_cast CASCADE;
 DROP TABLE test_squash_jsonb;
+DROP TABLE test_composite;
+DROP FUNCTION composite_f;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index f2187167c5c..6c9e49b471b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2955,8 +2955,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
  * a problem.
  *
  * Duplicate constant pointers are possible, and will have their lengths
- * marked as '-1', so that they are later ignored.  (Actually, we assume the
- * lengths were initialized as -1 to start with, and don't change them here.)
+ * marked as '-1', so that they are later ignored.
  *
  * If query_loc > 0, then "query" has been advanced by that much compared to
  * the original string start, so we need to translate the provided locations
@@ -3002,17 +3001,24 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 	{
 		int			loc = locs[i].location;
 		int			tok;
+		bool		squashed = locs[i].squashed;
 
 		/* Adjust recorded location if we're dealing with partial string */
 		loc -= query_loc;
 
 		Assert(loc >= 0);
 
-		if (locs[i].squashed)
-			continue;			/* squashable list, ignore */
-
 		if (loc <= last_loc)
+		{
+			locs[i].length = -1;
 			continue;			/* Duplicate constant, ignore */
+		}
+
+		/* We have the next valid location, save the current location */
+		last_loc = loc;
+
+		if (squashed)
+			continue;			/* squashable list, ignore */
 
 		/* Lex tokens until we find the desired constant */
 		for (;;)
@@ -3060,8 +3066,6 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 		/* If we hit end-of-string, give up, leaving remaining lengths -1 */
 		if (tok == 0)
 			break;
-
-		last_loc = loc;
 	}
 
 	scanner_finish(yyscanner);
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 53138d125a9..03b0515f872 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -291,6 +291,30 @@ select where '1' IN ('1'::int::text, '2'::int::text);
 select where '1' = ANY (array['1'::int::text, '2'::int::text]);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- composite function with row expansion
+create table test_composite(x integer);
+CREATE FUNCTION composite_f(a integer[], out x integer, out y integer) returns
+record as $$            begin
+        x = a[1];
+        y = a[2];
+    end;
+$$ language plpgsql;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ((composite_f(array[1, 2]))).* FROM test_composite;
+SELECT ((composite_f(array[1, 2, 3]))).* FROM test_composite;
+SELECT ((composite_f(array[1, 2, 3]))).*, 1, 2, 3, ((composite_f(array[1, 2, 3]))).*, 1, 2
+FROM test_composite
+WHERE x IN (1, 2, 3);
+SELECT ((composite_f(array[1, $1, 3]))).*, 1 FROM test_composite \bind 1
+;
+-- ROW() expression with row expansion
+SELECT (ROW(ARRAY[1,2])).*;
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*;
+SELECT 1, 2, (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*, 3, 4;
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, $1, 3])).*, 1 \bind 1
+;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 --
 -- cleanup
 --
@@ -300,3 +324,5 @@ DROP TABLE test_squash_numeric;
 DROP TABLE test_squash_bigint;
 DROP TABLE test_squash_cast CASCADE;
 DROP TABLE test_squash_jsonb;
+DROP TABLE test_composite;
+DROP FUNCTION composite_f;
-- 
2.43.0

