From 098d136c72b948ab7517192c46b2e9d569c0f15c Mon Sep 17 00:00:00 2001
From: Bykov Ivan <i.bykov@modernsys.ru>
Date: Tue, 11 Mar 2025 14:24:24 +0500
Subject: [PATCH] Query ID Calculation Fix for DISTINCT / ORDER BY and LIMIT / 
 OFFSET

In some cases, we may encounter different query trees with the same IDs.
For two structurally similar query subnodes, the query trees may
look like this:

QueryA->subNodeOne = Value X;
QueryA->subNodeTwo = NULL;
QueryB->subNodeOne = NULL;
QueryB->subNodeTwo = Value X;

When the query jumble walker calculates the query ID, it traverses
the Query members from top to bottom and generates the same IDs
for these two queries because it does not change the hash value
when visiting an empty node (= NULL).

There are two pairs of subnodes that can trigger this error:
- distinctClause and sortClause (both contain a list of SortGroupClauses);
- limitOffset and limitCount (both contain an int8 expression).

To fix this problem, for every empty node in the Query tree, a '0' character
is added to the jumble buffer, which is used for ID calculation.
---
 .../pg_stat_statements/expected/select.out    | 49 +++++++++++++++++++
 contrib/pg_stat_statements/sql/select.sql     | 18 +++++++
 src/backend/nodes/queryjumblefuncs.c          |  5 ++
 3 files changed, 72 insertions(+)

diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index dd6c756f67d..f1b71f56b7b 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -406,9 +406,58 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
      2
 (1 row)
 
+CREATE TABLE pgss_c AS
+  SELECT id FROM generate_series(1,2) AS id;
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
  t
 (1 row)
 
+-- These two queries trigger a Query ID collision in PostgreSQL versions prior to 18
+SELECT DISTINCT id FROM pgss_c;
+ id 
+----
+  2
+  1
+(2 rows)
+
+SELECT id FROM pgss_c ORDER BY id;
+ id 
+----
+  1
+  2
+(2 rows)
+
+-- These two queries trigger a Query ID collision in PostgreSQL versions prior to 18
+SELECT id FROM pgss_c LIMIT 1;
+ id 
+----
+  1
+(1 row)
+
+SELECT id FROM pgss_c OFFSET 1;
+ id 
+----
+  2
+(1 row)
+
+-- Expected four rows (Query ID collision has been fixed).
+SELECT query FROM pg_stat_statements
+  WHERE query LIKE '%pgss_c%'
+  ORDER BY query COLLATE "C";
+               query               
+-----------------------------------
+ SELECT DISTINCT id FROM pgss_c
+ SELECT id FROM pgss_c LIMIT $1
+ SELECT id FROM pgss_c OFFSET $1
+ SELECT id FROM pgss_c ORDER BY id
+(4 rows)
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+DROP TABLE pgss_c;
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index eb45cb81ad2..bbc37fa0005 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -146,4 +146,22 @@ SELECT (
 ) FROM (VALUES(6,7)) v3(e,f) GROUP BY ROLLUP(e,f);
 
 SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+
+CREATE TABLE pgss_c AS
+  SELECT id FROM generate_series(1,2) AS id;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- These two queries trigger a Query ID collision in PostgreSQL versions prior to 18
+SELECT DISTINCT id FROM pgss_c;
+SELECT id FROM pgss_c ORDER BY id;
+-- These two queries trigger a Query ID collision in PostgreSQL versions prior to 18
+SELECT id FROM pgss_c LIMIT 1;
+SELECT id FROM pgss_c OFFSET 1;
+
+-- Expected four rows (Query ID collision has been fixed).
+SELECT query FROM pg_stat_statements
+  WHERE query LIKE '%pgss_c%'
+  ORDER BY query COLLATE "C";
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+DROP TABLE pgss_c;
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 129fb447099..225a49ebc3c 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -238,7 +238,12 @@ _jumbleNode(JumbleState *jstate, Node *node)
 	Node	   *expr = node;
 
 	if (expr == NULL)
+	{
+		const unsigned char null_symbol = '\0';
+
+		AppendJumble(jstate, &null_symbol, sizeof(unsigned char));
 		return;
+	}
 
 	/* Guard against stack overflow due to overly complex expressions */
 	check_stack_depth();
-- 
2.39.5

