From fb57a065e4ad9feb3792680e7173ba6cff619d0e Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Tue, 23 Jul 2024 08:26:49 +0200
Subject: Set query_id for queries contained in utility statement

Some utility statements like Explain, CreateTableAs and DeclareCursor
contain a query which will be planned and executed. During post
parse, only the top utility statement is jumbled, leaving the
contained query without a set query_id. ExplainQuery does jumble the
contained query but CreateTableAs and DeclareCursor do not.

This led to extensions relying on query_id like pg_stat_statements to
not be able to track those nested queries as the query_id was 0.

This patch fixes this by jumbling the nested query of CreateTableAs and
DeclareCursor before it is executed. Additionally, explain's nested
query can itself be a CreateTableAs or DeclareCursor which also needs to
be jumbled which is now done in ExplainQuery.
---
 .../expected/level_tracking.out               | 212 ++++++++++++++++++
 .../pg_stat_statements/sql/level_tracking.sql |  71 ++++++
 src/backend/commands/createas.c               |  10 +
 src/backend/commands/explain.c                |  21 +-
 src/backend/commands/portalcmds.c             |  10 +
 src/test/regress/expected/explain.out         |  17 ++
 src/test/regress/sql/explain.sql              |   4 +
 7 files changed, 344 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out
index d8dd8a2deea..50f81a805c9 100644
--- a/contrib/pg_stat_statements/expected/level_tracking.out
+++ b/contrib/pg_stat_statements/expected/level_tracking.out
@@ -112,6 +112,218 @@ SELECT toplevel, calls, query FROM pg_stat_statements
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (2 rows)
 
+-- Create Table As, all-level tracking.
+SET pg_stat_statements.track = 'all';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                       query                        
+----------+-------+----------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test AS SELECT $1
+ f        |     1 | CREATE TEMPORARY TABLE pgss_test AS SELECT $1;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Create Table As using prepared stmt, all-level tracking.
+PREPARE test_prepare_pgss AS select generate_series(1, 10);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                             query                              
+----------+-------+----------------------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss
+ f        |     1 | PREPARE test_prepare_pgss AS select generate_series(1, 10)
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+ x 
+---
+(0 rows)
+
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                          query                           
+----------+-------+----------------------------------------------------------
+ t        |     1 | BEGIN
+ t        |     1 | CLOSE foocur
+ t        |     1 | COMMIT
+ t        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab
+ f        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+ t        |     1 | FETCH FORWARD 1 FROM foocur
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(7 rows)
+
+-- Explain analyze, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+           QUERY PLAN           
+--------------------------------
+ Result (actual rows=1 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                              query                               
+----------+-------+------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1
+ f        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Explain analyze with declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on stats_track_tab (actual rows=0 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                                                     query                                                      
+----------+-------+----------------------------------------------------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab
+ f        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
+-- Create Table As, top-level tracking.
+SET pg_stat_statements.track = 'top';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TABLE pgss_test_2 AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                       query                        
+----------+-------+----------------------------------------------------
+ t        |     1 | CREATE TABLE pgss_test_2 AS SELECT $1
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Create Table As using prepared stmt, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                             query                              
+----------+-------+----------------------------------------------------------------
+ t        |     1 | CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+ x 
+---
+(0 rows)
+
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                          query                          
+----------+-------+---------------------------------------------------------
+ t        |     1 | BEGIN
+ t        |     1 | CLOSE foocur
+ t        |     1 | COMMIT
+ t        |     1 | DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab
+ t        |     1 | FETCH FORWARD 1 FROM foocur
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(6 rows)
+
+-- Explain analyze, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+           QUERY PLAN           
+--------------------------------
+ Result (actual rows=1 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                              query                              
+----------+-------+-----------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
+-- Explain analyze with declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on stats_track_tab (actual rows=0 loops=1)
+(1 row)
+
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+ toplevel | calls |                                                     query                                                     
+----------+-------+---------------------------------------------------------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab
+ t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(2 rows)
+
 -- DO block - top-level tracking without utility.
 SET pg_stat_statements.track = 'top';
 SET pg_stat_statements.track_utility = FALSE;
diff --git a/contrib/pg_stat_statements/sql/level_tracking.sql b/contrib/pg_stat_statements/sql/level_tracking.sql
index 65a17147a5a..385785aabbb 100644
--- a/contrib/pg_stat_statements/sql/level_tracking.sql
+++ b/contrib/pg_stat_statements/sql/level_tracking.sql
@@ -55,6 +55,77 @@ CALL proc_with_utility_stmt();
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C", toplevel;
 
+-- Create Table As, all-level tracking.
+SET pg_stat_statements.track = 'all';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As using prepared stmt, all-level tracking.
+PREPARE test_prepare_pgss AS select generate_series(1, 10);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test2 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze with declare cursor, all-level tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As, top-level tracking.
+SET pg_stat_statements.track = 'top';
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TABLE pgss_test_2 AS SELECT 1;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Create Table As using prepared stmt, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+CREATE TEMPORARY TABLE pgss_test3 AS EXECUTE test_prepare_pgss;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+DECLARE FOOCUR CURSOR FOR SELECT * from stats_track_tab;
+FETCH FORWARD 1 FROM foocur;
+CLOSE foocur;
+COMMIT;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT 100;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
+-- Explain analyze with declare cursor, top tracking.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab;
+SELECT toplevel, calls, query FROM pg_stat_statements
+  ORDER BY query COLLATE "C", toplevel;
+
 -- DO block - top-level tracking without utility.
 SET pg_stat_statements.track = 'top';
 SET pg_stat_statements.track_utility = FALSE;
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index c71ff801888..dc28cb9188c 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -38,6 +38,8 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "nodes/queryjumble.h"
+#include "parser/analyze.h"
 #include "rewrite/rewriteHandler.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
@@ -224,6 +226,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
 {
 	Query	   *query = castNode(Query, stmt->query);
 	IntoClause *into = stmt->into;
+	JumbleState *jstate = NULL;
 	bool		is_matview = (into->viewQuery != NULL);
 	bool		do_refresh = false;
 	DestReceiver *dest;
@@ -238,6 +241,13 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
 	 */
 	dest = CreateIntoRelDestReceiver(into);
 
+	/* Query contained by CTAS needs to be jumbled if requested */
+	if (IsQueryIdEnabled())
+		jstate = JumbleQuery(query);
+
+	if (post_parse_analyze_hook)
+		(*post_parse_analyze_hook) (pstate, query, jstate);
+
 	/*
 	 * The contained Query could be a SELECT, or an EXECUTE utility command.
 	 * If the latter, we just pass it off to ExecuteQuery.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 5771aabf40a..4a922481583 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -308,8 +308,27 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 
 	query = castNode(Query, stmt->query);
 	if (IsQueryIdEnabled())
+	{
 		jstate = JumbleQuery(query);
 
+		/*
+		 * explain can contain an utility statement with a plannable query, we
+		 * need to jumble it to set the nested query's id
+		 */
+		if (query->utilityStmt != NULL)
+		{
+			Query	   *nested_query = NULL;
+			Node	   *utilityStmt = query->utilityStmt;
+
+			if (IsA(utilityStmt, CreateTableAsStmt))
+				nested_query = castNode(Query, ((CreateTableAsStmt *) utilityStmt)->query);
+			else if (IsA(utilityStmt, DeclareCursorStmt))
+				nested_query = castNode(Query, ((DeclareCursorStmt *) utilityStmt)->query);
+			if (nested_query)
+				JumbleQuery(nested_query);
+		}
+	}
+
 	if (post_parse_analyze_hook)
 		(*post_parse_analyze_hook) (pstate, query, jstate);
 
@@ -319,7 +338,7 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 	 * came straight from the parser, or suitable locks were acquired by
 	 * plancache.c.
 	 */
-	rewritten = QueryRewrite(castNode(Query, stmt->query));
+	rewritten = QueryRewrite(query);
 
 	/* emit opening boilerplate */
 	ExplainBeginOutput(es);
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 4f6acf67198..ac52ca25e99 100644
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@@ -28,6 +28,8 @@
 #include "executor/executor.h"
 #include "executor/tstoreReceiver.h"
 #include "miscadmin.h"
+#include "nodes/queryjumble.h"
+#include "parser/analyze.h"
 #include "rewrite/rewriteHandler.h"
 #include "tcop/pquery.h"
 #include "tcop/tcopprot.h"
@@ -44,6 +46,7 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa
 				  bool isTopLevel)
 {
 	Query	   *query = castNode(Query, cstmt->query);
+	JumbleState *jstate = NULL;
 	List	   *rewritten;
 	PlannedStmt *plan;
 	Portal		portal;
@@ -71,6 +74,13 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("cannot create a cursor WITH HOLD within security-restricted operation")));
 
+	/* Query contained by DeclareCursor needs to be jumbled if requested */
+	if (IsQueryIdEnabled())
+		jstate = JumbleQuery(query);
+
+	if (post_parse_analyze_hook)
+		(*post_parse_analyze_hook) (pstate, query, jstate);
+
 	/*
 	 * Parse analysis was done already, but we still have to run the rule
 	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 6585c6a69ef..e01ed569989 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -653,6 +653,23 @@ select explain_filter('explain (verbose) select * from int8_tbl i8');
  Query Identifier: N
 (3 rows)
 
+-- Test compute_query_id with utility statements containing plannable query
+select explain_filter('explain (verbose) declare test_cur cursor for select * from int8_tbl');
+                       explain_filter                        
+-------------------------------------------------------------
+ Seq Scan on public.int8_tbl  (cost=N.N..N.N rows=N width=N)
+   Output: q1, q2
+ Query Identifier: N
+(3 rows)
+
+select explain_filter('explain (verbose) create table test_ctas as select 1');
+             explain_filter             
+----------------------------------------
+ Result  (cost=N.N..N.N rows=N width=N)
+   Output: N
+ Query Identifier: N
+(3 rows)
+
 -- Test SERIALIZE option
 select explain_filter('explain (analyze,serialize) select * from int8_tbl i8');
                                         explain_filter                                         
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index c7055f850c5..5796137c7ff 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -163,6 +163,10 @@ select explain_filter('explain (verbose) select * from t1 where pg_temp.mysin(f1
 set compute_query_id = on;
 select explain_filter('explain (verbose) select * from int8_tbl i8');
 
+-- Test compute_query_id with utility statements containing plannable query
+select explain_filter('explain (verbose) declare test_cur cursor for select * from int8_tbl');
+select explain_filter('explain (verbose) create table test_ctas as select 1');
+
 -- Test SERIALIZE option
 select explain_filter('explain (analyze,serialize) select * from int8_tbl i8');
 select explain_filter('explain (analyze,serialize text,buffers,timing off) select * from int8_tbl i8');
-- 
2.39.3 (Apple Git-146)

