From 6f432e1520e08373ed21c86e8e7dce477acf23f0 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Tue, 23 Jul 2024 08:26:49 +0200
Subject: Set query_id for queries contained in utility statement

Some utility statements like Explain, CreateTableAs and DeclareCursor
contain a query which will be planned and executed. Previously, during
post parse, only the top utility statement was jumbled, leaving the
contained query without a query_id set. ExplainQuery did the jumble
for the contained query but CreateTableAs and DeclareCursor were left
with unjumbled queries.

This led to extensions relying on query_id like pg_stat_statements to
not be able to track those nested queries as the query_id was 0.

This patch fixes this by recursively jumbling queries contained by those
utility statements during post_parse, removing the need for ExplainQuery
to do it.
---
 .../expected/level_tracking.out               | 212 ++++++++++++++++++
 .../pg_stat_statements/sql/level_tracking.sql |  71 ++++++
 src/backend/commands/explain.c                |   9 -
 src/backend/parser/analyze.c                  |  57 +++--
 4 files changed, 322 insertions(+), 27 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out
index d8dd8a2deea..897749ecd01 100644
--- a/contrib/pg_stat_statements/expected/level_tracking.out
+++ b/contrib/pg_stat_statements/expected/level_tracking.out
@@ -112,6 +112,218 @@ SELECT toplevel, calls, query FROM pg_stat_statements
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (2 rows)
 
+-- Create Table As, all-level tracking.
+SET pg_stat_statements.track = 'all';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                       query                        
+----------+-------+----------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test AS SELECT $1
+ f        |     1 | CREATE TEMPORARY TABLE pgss_test AS SELECT 1;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Create Table As using prepared stmt, all-level tracking.
+PREPARE test_prepare_pgss AS select generate_series(1, 10);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                             query                              
+----------+-------+----------------------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss
+ f        |     1 | PREPARE test_prepare_pgss AS select generate_series(1, 10)
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+ x 
+---
+(0 rows)
+
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                          query                           
+----------+-------+----------------------------------------------------------
+ t        |     1 | BEGIN
+ t        |     1 | CLOSE foocur
+ t        |     1 | COMMIT
+ t        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab
+ f        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+ t        |     1 | FETCH FORWARD 1 FROM foocur
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(7 rows)
+
+-- Explain analyze, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+           QUERY PLAN           
+--------------------------------
+ Result (actual rows=1 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                               query                               
+----------+-------+-------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1
+ f        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Explain analyze with declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on stats_track_tab (actual rows=0 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                                                     query                                                      
+----------+-------+----------------------------------------------------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab
+ f        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Create Table As, top-level tracking.
+SET pg_stat_statements.track = 'top';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TABLE pgss_test_2 AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                       query                        
+----------+-------+----------------------------------------------------
+ t        |     1 | CREATE TABLE pgss_test_2 AS SELECT $1
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Create Table As using prepared stmt, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                             query                              
+----------+-------+----------------------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+ x 
+---
+(0 rows)
+
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                          query                          
+----------+-------+---------------------------------------------------------
+ t        |     1 | BEGIN
+ t        |     1 | CLOSE foocur
+ t        |     1 | COMMIT
+ t        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab
+ t        |     1 | FETCH FORWARD 1 FROM foocur
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(6 rows)
+
+-- Explain analyze, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+           QUERY PLAN           
+--------------------------------
+ Result (actual rows=1 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                              query                              
+----------+-------+-----------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Explain analyze with declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on stats_track_tab (actual rows=0 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                                                     query                                                     
+----------+-------+---------------------------------------------------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
 -- DO block - top-level tracking without utility.
 SET pg_stat_statements.track = 'top';
 SET pg_stat_statements.track_utility = FALSE;
diff --git a/contrib/pg_stat_statements/sql/level_tracking.sql b/contrib/pg_stat_statements/sql/level_tracking.sql
index 65a17147a5a..385785aabbb 100644
--- a/contrib/pg_stat_statements/sql/level_tracking.sql
+++ b/contrib/pg_stat_statements/sql/level_tracking.sql
@@ -55,6 +55,77 @@ CALL proc_with_utility_stmt();
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C", toplevel;
 
+-- Create Table As, all-level tracking.
+SET pg_stat_statements.track = 'all';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As using prepared stmt, all-level tracking.
+PREPARE test_prepare_pgss AS select generate_series(1, 10);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze with declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As, top-level tracking.
+SET pg_stat_statements.track = 'top';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TABLE pgss_test_2 AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As using prepared stmt, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze with declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
 -- DO block - top-level tracking without utility.
 SET pg_stat_statements.track = 'top';
 SET pg_stat_statements.track_utility = FALSE;
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 5771aabf40a..5457763e71c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -186,8 +186,6 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 {
 	ExplainState *es = NewExplainState();
 	TupOutputState *tstate;
-	JumbleState *jstate = NULL;
-	Query	   *query;
 	List	   *rewritten;
 	ListCell   *lc;
 	bool		timing_set = false;
@@ -306,13 +304,6 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 	/* if the summary was not set explicitly, set default value */
 	es->summary = (summary_set) ? es->summary : es->analyze;
 
-	query = castNode(Query, stmt->query);
-	if (IsQueryIdEnabled())
-		jstate = JumbleQuery(query);
-
-	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query, jstate);
-
 	/*
 	 * Parse analysis was done already, but we still have to run the rule
 	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index e901203424d..b6e2e7012ad 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -90,6 +90,42 @@ static bool test_raw_expression_coverage(Node *node, void *context);
 #endif
 
 
+/*
+ * post_parse_jumble
+ * 		recursively jumble Query and call post_parse hook
+ *
+ * Some utility statements like Explain or CreateTableAs contain a Query which
+ * will be planned and executed later on. When query fingerprinting is
+ * enabled, all contained queries need to be jumbled in order to set their
+ * query_ids.
+ */
+static void
+post_parse_jumble(ParseState *pstate, Query *query)
+{
+	JumbleState *jstate = NULL;
+
+	if (IsQueryIdEnabled())
+		jstate = JumbleQuery(query);
+	if (post_parse_analyze_hook)
+		(*post_parse_analyze_hook) (pstate, query, jstate);
+
+	if (query->utilityStmt != NULL)
+	{
+		Query	   *nested_query = NULL;
+		Node	   *utilityStmt = query->utilityStmt;
+
+		if (IsA(utilityStmt, CreateTableAsStmt))
+			nested_query = castNode(Query, ((CreateTableAsStmt *) utilityStmt)->query);
+		else if (IsA(utilityStmt, DeclareCursorStmt))
+			nested_query = castNode(Query, ((DeclareCursorStmt *) utilityStmt)->query);
+		else if (IsA(utilityStmt, ExplainStmt))
+			nested_query = castNode(Query, ((ExplainStmt *) utilityStmt)->query);
+
+		if (nested_query)
+			post_parse_jumble(pstate, nested_query);
+	}
+}
+
 /*
  * parse_analyze_fixedparams
  *		Analyze a raw parse tree and transform it to Query form.
@@ -108,7 +144,6 @@ parse_analyze_fixedparams(RawStmt *parseTree, const char *sourceText,
 {
 	ParseState *pstate = make_parsestate(NULL);
 	Query	   *query;
-	JumbleState *jstate = NULL;
 
 	Assert(sourceText != NULL); /* required as of 8.4 */
 
@@ -121,11 +156,7 @@ parse_analyze_fixedparams(RawStmt *parseTree, const char *sourceText,
 
 	query = transformTopLevelStmt(pstate, parseTree);
 
-	if (IsQueryIdEnabled())
-		jstate = JumbleQuery(query);
-
-	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query, jstate);
+	post_parse_jumble(pstate, query);
 
 	free_parsestate(pstate);
 
@@ -148,7 +179,6 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 {
 	ParseState *pstate = make_parsestate(NULL);
 	Query	   *query;
-	JumbleState *jstate = NULL;
 
 	Assert(sourceText != NULL); /* required as of 8.4 */
 
@@ -163,11 +193,7 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 	/* make sure all is well with parameter types */
 	check_variable_parameters(pstate, query);
 
-	if (IsQueryIdEnabled())
-		jstate = JumbleQuery(query);
-
-	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query, jstate);
+	post_parse_jumble(pstate, query);
 
 	free_parsestate(pstate);
 
@@ -190,7 +216,6 @@ parse_analyze_withcb(RawStmt *parseTree, const char *sourceText,
 {
 	ParseState *pstate = make_parsestate(NULL);
 	Query	   *query;
-	JumbleState *jstate = NULL;
 
 	Assert(sourceText != NULL); /* required as of 8.4 */
 
@@ -200,11 +225,7 @@ parse_analyze_withcb(RawStmt *parseTree, const char *sourceText,
 
 	query = transformTopLevelStmt(pstate, parseTree);
 
-	if (IsQueryIdEnabled())
-		jstate = JumbleQuery(query);
-
-	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query, jstate);
+	post_parse_jumble(pstate, query);
 
 	free_parsestate(pstate);
 
-- 
2.39.3 (Apple Git-146)

