From 449ad2f747b90fdbe14b15274089ccae50dd8e80 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Tue, 25 Mar 2025 18:49:15 +1300
Subject: [PATCH v8 1/2] Fix query jumbling to account for NULL nodes

Previously NULL nodes were ignored.  This could cause issues where the
computed query ID could match for queries where fields that are next to
each other in their Node struct where one field was NULL and the other
non-NULL.  For example, the Query struct has distinctClause and sortClause
next to each other.  If someone wrote;

SELECT DISTINCT c1 FROM t;

and then;

SELECT c1 FROM t ORDER BY c1;

these would produce the same query ID since in the first query we ignore
the NULL sortClause and append the jumble bytes for the distictClause.
In the latter query, since we do nothing for the NULL distinctClause
then jumble the non-NULL sortClause, and since the node representation
stored is the same in both cases, the query IDs are identical.

Here we fix this by always jumbling NULL nodes.  This produces different
jumble bytes in the above queries since the order that we jumble the
NULL changes between each of the above two queries.

Author: Bykov Ivan <i.bykov@modernsys.ru>
Author: Michael Paquier <michael@paquier.xyz>
Author: David Rowley <dgrowleyml@gmail.com>
Discussion: https://postgr.es/m/aafce7966e234372b2ba876c0193f1e9%40localhost.localdomain
---
 src/backend/nodes/queryjumblefuncs.c | 51 ++++++++++++++++++++++++++++
 src/include/nodes/queryjumble.h      | 10 ++++++
 2 files changed, 61 insertions(+)

diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index f8b0f91704b..91f8fc49ee8 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -58,6 +58,7 @@ bool		query_id_squash_values = false;
  */
 bool		query_id_enabled = false;
 
+static void FlushPendingNulls(JumbleState *jstate);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
 static void RecordConstLocation(JumbleState *jstate,
@@ -134,9 +135,16 @@ JumbleQuery(Query *query)
 		palloc(jstate->clocations_buf_size * sizeof(LocationLen));
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
+	jstate->pending_nulls = 0;
+	jstate->total_jumble_len = 0;
 
 	/* Compute query ID and mark the Query node with it */
 	_jumbleNode(jstate, (Node *) query);
+
+	/* Flush any pending NULLs before doing the final hash */
+	if (jstate->pending_nulls > 0)
+		FlushPendingNulls(jstate);
+
 	query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
 													  jstate->jumble_len,
 													  0));
@@ -202,10 +210,42 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
 		jumble_len += part_size;
 		item += part_size;
 		size -= part_size;
+
+#ifdef USE_ASSERT_CHECKING
+		jstate->total_jumble_len += part_size;
+#endif
 	}
+
 	jstate->jumble_len = jumble_len;
 }
 
+/*
+ * AppendJumbleNull
+ *		For jumbling NULL pointers
+ */
+static pg_attribute_always_inline void
+AppendJumbleNull(JumbleState *jstate)
+{
+	jstate->pending_nulls++;
+}
+
+/*
+ * FlushPendingNulls
+ *		Incorporate the pending_null value into the jumble buffer.
+ *
+ * Note: Callers must ensure that there's at least 1 pending NULL.
+ */
+static pg_attribute_always_inline void
+FlushPendingNulls(JumbleState *jstate)
+{
+	Assert(jstate->pending_nulls > 0);
+
+	AppendJumble(jstate,
+				 (const unsigned char *) &jstate->pending_nulls, 4);
+	jstate->pending_nulls = 0;
+}
+
+
 /*
  * Record location of constant within query string of query tree that is
  * currently being walked.
@@ -332,6 +372,8 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
 do { \
 	if (expr->str) \
 		AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
+	else \
+		AppendJumbleNull(jstate); \
 } while(0)
 /* Function name used for the node field attribute custom_query_jumble. */
 #define JUMBLE_CUSTOM(nodetype, item) \
@@ -382,9 +424,15 @@ static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
 	Node	   *expr = node;
+#ifdef USE_ASSERT_CHECKING
+	Size		prev_jumble_len = jstate->total_jumble_len;
+#endif
 
 	if (expr == NULL)
+	{
+		AppendJumbleNull(jstate);
 		return;
+	}
 
 	/* Guard against stack overflow due to overly complex expressions */
 	check_stack_depth();
@@ -432,6 +480,9 @@ _jumbleNode(JumbleState *jstate, Node *node)
 		default:
 			break;
 	}
+
+	/* Ensure we added something to the jumble buffer */
+	Assert(jstate->total_jumble_len > prev_jumble_len);
 }
 
 static void
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 905f66bc0bd..9414fb151ee 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -54,6 +54,16 @@ typedef struct JumbleState
 
 	/* highest Param id we've seen, in order to start normalization correctly */
 	int			highest_extern_param_id;
+
+	/*
+	 * Count of the number of NULL nodes seen since last appending a value.
+	 * These are flushed out to the jumble buffer before subsequent appends
+	 * and before performing the final jumble hash.
+	 */
+	unsigned int pending_nulls;
+
+	/* The total number of bytes added to the jumble buffer */
+	Size		total_jumble_len;
 } JumbleState;
 
 /* Values for the compute_query_id GUC */
-- 
2.43.0

