From 58467357f53dbe72527d37a22d0ccb69633bb8b9 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Thu, 3 Oct 2024 08:52:02 +0200
Subject: Track location to extract relevant part in nested statement

Previously, Query generated through transform would have unset
stmt_location. Extensions relying on the statement location to extract
the relevant part of the statement would fallback to use the whole
statement instead, thus showing the same string at the top and
nested level, which was a source of confusion.

This patch fixes the issue by keeping track of the statement locations
and propagate it to Query during transform, allowing pgss to only show
the relevant part of the query for nested query.
---
 .../expected/level_tracking.out               | 165 +++++++++---------
 .../pg_stat_statements/expected/planning.out  |  10 +-
 .../pg_stat_statements/expected/select.out    |   2 +-
 .../pg_stat_statements/expected/utility.out   |   2 +-
 contrib/pg_stat_statements/sql/planning.sql   |   4 +-
 src/backend/parser/analyze.c                  |  93 +++++++++-
 src/backend/parser/gram.y                     |  76 +++++++-
 src/include/nodes/parsenodes.h                |  10 ++
 src/include/parser/parse_node.h               |  15 ++
 9 files changed, 273 insertions(+), 104 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out
index 8f008f8bfd1..489dc7143f7 100644
--- a/contrib/pg_stat_statements/expected/level_tracking.out
+++ b/contrib/pg_stat_statements/expected/level_tracking.out
@@ -206,37 +206,37 @@ EXPLAIN (COSTS OFF) SELECT 1 UNION SELECT 2;
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                query                                
-----------+-------+---------------------------------------------------------------------
+ toplevel | calls |                               query                                
+----------+-------+--------------------------------------------------------------------
+ f        |     1 | DELETE FROM stats_track_tab
  t        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2)
- f        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2);
  t        |     1 | EXPLAIN (COSTS OFF) (TABLE test_table)
- f        |     1 | EXPLAIN (COSTS OFF) (TABLE test_table);
  t        |     1 | EXPLAIN (COSTS OFF) (VALUES ($1, $2))
- f        |     1 | EXPLAIN (COSTS OFF) (VALUES ($1, $2));
  t        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab
- f        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab;
  t        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES (($1))
- f        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES (($1));
- t        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                     +
-          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id      +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                              +
+ t        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                    +
+          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id     +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                             +
           |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
- f        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                     +
-          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id      +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                              +
-          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id);
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1 UNION SELECT $2
- f        |     1 | EXPLAIN (COSTS OFF) SELECT $1 UNION SELECT $2;
- f        |     1 | EXPLAIN (COSTS OFF) SELECT $1;
  t        |     1 | EXPLAIN (COSTS OFF) TABLE stats_track_tab
- f        |     1 | EXPLAIN (COSTS OFF) TABLE stats_track_tab;
  t        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1 WHERE x = $2
- f        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1 WHERE x = $2;
  t        |     1 | EXPLAIN (COSTS OFF) VALUES ($1)
- f        |     1 | EXPLAIN (COSTS OFF) VALUES ($1);
+ f        |     1 | INSERT INTO stats_track_tab VALUES (($1))
+ f        |     1 | MERGE INTO stats_track_tab                                        +
+          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id     +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                             +
+          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
+ f        |     1 | SELECT $1
+ f        |     1 | SELECT $1 UNION SELECT $2
+ f        |     1 | SELECT $1, $2
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+ f        |     1 | TABLE stats_track_tab
+ f        |     1 | TABLE test_table
+ f        |     1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2
+ f        |     1 | VALUES ($1)
+ f        |     1 | VALUES ($1, $2)
 (23 rows)
 
 -- EXPLAIN - top-level tracking.
@@ -405,20 +405,20 @@ EXPLAIN (COSTS OFF) SELECT 1, 2 UNION SELECT 3, 4\; EXPLAIN (COSTS OFF) (SELECT
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                                        query                                                        
-----------+-------+---------------------------------------------------------------------------------------------------------------------
+ toplevel | calls |                              query                              
+----------+-------+-----------------------------------------------------------------
+ f        |     1 | (SELECT $1, $2, $3) UNION SELECT $4, $5, $6
  t        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3)
  t        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3) UNION SELECT $4, $5, $6
- f        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3); EXPLAIN (COSTS OFF) (SELECT 1, 2, 3, 4);
  t        |     1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3, $4)
- f        |     1 | EXPLAIN (COSTS OFF) (SELECT 1, 2, 3); EXPLAIN (COSTS OFF) (SELECT $1, $2, $3, $4);
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1, $2
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1, $2 UNION SELECT $3, $4
- f        |     1 | EXPLAIN (COSTS OFF) SELECT $1, $2 UNION SELECT $3, $4; EXPLAIN (COSTS OFF) (SELECT 1, 2, 3) UNION SELECT 3, 4, 5;
- f        |     1 | EXPLAIN (COSTS OFF) SELECT $1; EXPLAIN (COSTS OFF) SELECT 1, 2;
- f        |     1 | EXPLAIN (COSTS OFF) SELECT 1, 2 UNION SELECT 3, 4; EXPLAIN (COSTS OFF) (SELECT $1, $2, $3) UNION SELECT $4, $5, $6;
- f        |     1 | EXPLAIN (COSTS OFF) SELECT 1; EXPLAIN (COSTS OFF) SELECT $1, $2;
+ f        |     1 | SELECT $1
+ f        |     1 | SELECT $1, $2
+ f        |     1 | SELECT $1, $2 UNION SELECT $3, $4
+ f        |     1 | SELECT $1, $2, $3
+ f        |     1 | SELECT $1, $2, $3, $4
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (13 rows)
 
@@ -494,29 +494,29 @@ EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES ((1))\; EXPLAIN (COSTS OF
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                                              query                                                               
-----------+-------+----------------------------------------------------------------------------------------------------------------------------------
+ toplevel | calls |                               query                                
+----------+-------+--------------------------------------------------------------------
+ f        |     1 | DELETE FROM stats_track_tab
+ f        |     1 | DELETE FROM stats_track_tab WHERE x = $1
  t        |     1 | EXPLAIN (COSTS OFF) (TABLE test_table)
  t        |     1 | EXPLAIN (COSTS OFF) (VALUES ($1, $2))
  t        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab
  t        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab WHERE x = $1
- f        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab; EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab WHERE x = $1;
- f        |     1 | EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab; EXPLAIN (COSTS OFF) DELETE FROM stats_track_tab WHERE x = 1;
  t        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES ($1), ($2)
  t        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES (($1))
- f        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES (($1)); EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES (1), (2);
- f        |     1 | EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES ((1)); EXPLAIN (COSTS OFF) INSERT INTO stats_track_tab VALUES ($1), ($2);
  t        |     1 | EXPLAIN (COSTS OFF) TABLE stats_track_tab
- f        |     1 | EXPLAIN (COSTS OFF) TABLE stats_track_tab; EXPLAIN (COSTS OFF) (TABLE test_table);
- f        |     1 | EXPLAIN (COSTS OFF) TABLE stats_track_tab; EXPLAIN (COSTS OFF) (TABLE test_table);
  t        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1
  t        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1 WHERE x = $2
- f        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1 WHERE x = $2; EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = 1;
- f        |     1 | EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = 1 WHERE x = 1; EXPLAIN (COSTS OFF) UPDATE stats_track_tab SET x = $1;
  t        |     1 | EXPLAIN (COSTS OFF) VALUES ($1)
- f        |     1 | EXPLAIN (COSTS OFF) VALUES ($1); EXPLAIN (COSTS OFF) (VALUES (1, 2));
- f        |     1 | EXPLAIN (COSTS OFF) VALUES (1); EXPLAIN (COSTS OFF) (VALUES ($1, $2));
+ f        |     1 | INSERT INTO stats_track_tab VALUES ($1), ($2)
+ f        |     1 | INSERT INTO stats_track_tab VALUES (($1))
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+ f        |     1 | TABLE stats_track_tab
+ f        |     1 | TABLE test_table
+ f        |     1 | UPDATE stats_track_tab SET x = $1
+ f        |     1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2
+ f        |     1 | VALUES ($1)
+ f        |     1 | VALUES ($1, $2)
 (21 rows)
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -547,21 +547,18 @@ EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                             query                                              
-----------+-------+------------------------------------------------------------------------------------------------
- t        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                                                +
-          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id                                 +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                         +
+ toplevel | calls |                             query                             
+----------+-------+---------------------------------------------------------------
+ t        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab               +
+          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id+
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                        +
           |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
- f        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                                                +
-          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id                                 +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                         +
-          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id); EXPLAIN (COSTS OFF) SELECT 1, 2, 3, 4, 5;
- f        |     1 | EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab                                                +
-          |       |   USING (SELECT id FROM generate_series(1, 10) id) ON x = id                                  +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                         +
-          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id); EXPLAIN (COSTS OFF) SELECT $1, $2, $3, $4, $5;
  t        |     1 | EXPLAIN (COSTS OFF) SELECT $1, $2, $3, $4, $5
+ f        |     1 | MERGE INTO stats_track_tab                                   +
+          |       |   USING (SELECT id FROM generate_series($1, $2) id) ON x = id+
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                        +
+          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
+ f        |     1 | SELECT $1, $2, $3, $4, $5
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (5 rows)
 
@@ -789,29 +786,29 @@ EXPLAIN (COSTS OFF) WITH a AS (select 4) SELECT 1 UNION SELECT 2;
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                           query                                           
-----------+-------+-------------------------------------------------------------------------------------------
+ toplevel | calls |                                          query                                           
+----------+-------+------------------------------------------------------------------------------------------
  t        |     1 | EXPLAIN (COSTS OFF) (WITH a AS (SELECT $1) (SELECT $2, $3))
- f        |     1 | EXPLAIN (COSTS OFF) (WITH a AS (SELECT $1) (SELECT $2, $3));
  t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) DELETE FROM stats_track_tab
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) DELETE FROM stats_track_tab;
  t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) INSERT INTO stats_track_tab VALUES (($2))
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) INSERT INTO stats_track_tab VALUES (($2));
- t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) MERGE INTO stats_track_tab                     +
-          |       |   USING (SELECT id FROM generate_series($2, $3) id) ON x = id                            +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                    +
+ t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) MERGE INTO stats_track_tab                    +
+          |       |   USING (SELECT id FROM generate_series($2, $3) id) ON x = id                           +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                   +
           |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) MERGE INTO stats_track_tab                     +
-          |       |   USING (SELECT id FROM generate_series($2, $3) id) ON x = id                            +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                    +
-          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id);
  t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) SELECT $2
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) SELECT $2;
  t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) UPDATE stats_track_tab SET x = $2 WHERE x = $3
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) UPDATE stats_track_tab SET x = $2 WHERE x = $3;
  t        |     1 | EXPLAIN (COSTS OFF) WITH a AS (select $1) SELECT $2 UNION SELECT $3
- f        |     1 | EXPLAIN (COSTS OFF) WITH a AS (select $1) SELECT $2 UNION SELECT $3;
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+ f        |     1 | WITH a AS (SELECT $1) (SELECT $2, $3)
+ f        |     1 | WITH a AS (SELECT $1) DELETE FROM stats_track_tab
+ f        |     1 | WITH a AS (SELECT $1) INSERT INTO stats_track_tab VALUES (($2))
+ f        |     1 | WITH a AS (SELECT $1) MERGE INTO stats_track_tab                                        +
+          |       |   USING (SELECT id FROM generate_series($2, $3) id) ON x = id                           +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                                                   +
+          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
+ f        |     1 | WITH a AS (SELECT $1) SELECT $2
+ f        |     1 | WITH a AS (SELECT $1) UPDATE stats_track_tab SET x = $2 WHERE x = $3
+ f        |     1 | WITH a AS (select $1) SELECT $2 UNION SELECT $3
 (15 rows)
 
 -- EXPLAIN with CTEs - top-level tracking
@@ -921,12 +918,12 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
 
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                              query                               
-----------+-------+------------------------------------------------------------------
- t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)           +
+ toplevel | calls |                              query                              
+----------+-------+-----------------------------------------------------------------
+ t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)          +
           |       |   DECLARE foocur CURSOR FOR SELECT * FROM stats_track_tab
  t        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1
- f        |     1 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT $1;
+ f        |     1 | SELECT $1
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (4 rows)
 
@@ -1050,8 +1047,8 @@ SELECT toplevel, calls, query FROM pg_stat_statements
 ----------+-------+-----------------------------------------------------------------
  t        |     1 | CREATE TEMPORARY TABLE pgss_ctas_1 AS SELECT $1
  t        |     1 | CREATE TEMPORARY TABLE pgss_ctas_2 AS EXECUTE test_prepare_pgss
- f        |     1 | PREPARE test_prepare_pgss AS select generate_series($1, $2)
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+ f        |     1 | select generate_series($1, $2)
 (4 rows)
 
 -- CREATE TABLE AS, top-level tracking.
@@ -1202,25 +1199,25 @@ COPY (DELETE FROM stats_track_tab WHERE x = 2 RETURNING x) TO stdout;
 2
 SELECT toplevel, calls, query FROM pg_stat_statements
   ORDER BY query COLLATE "C";
- toplevel | calls |                                    query                                    
-----------+-------+-----------------------------------------------------------------------------
- f        |     1 | COPY (DELETE FROM stats_track_tab WHERE x = $1 RETURNING x) TO stdout
+ toplevel | calls |                                   query                                   
+----------+-------+---------------------------------------------------------------------------
  t        |     1 | COPY (DELETE FROM stats_track_tab WHERE x = 2 RETURNING x) TO stdout
- f        |     1 | COPY (INSERT INTO stats_track_tab (x) VALUES ($1) RETURNING x) TO stdout
  t        |     1 | COPY (INSERT INTO stats_track_tab (x) VALUES (1) RETURNING x) TO stdout
- f        |     1 | COPY (MERGE INTO stats_track_tab USING (SELECT $1 id) ON x = id            +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                      +
+ t        |     1 | COPY (MERGE INTO stats_track_tab USING (SELECT 1 id) ON x = id           +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                                    +
           |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x) TO stdout
- t        |     1 | COPY (MERGE INTO stats_track_tab USING (SELECT 1 id) ON x = id             +
-          |       |   WHEN MATCHED THEN UPDATE SET x = id                                      +
-          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x) TO stdout
- f        |     1 | COPY (SELECT $1 UNION SELECT $2) TO stdout
- f        |     1 | COPY (SELECT $1) TO stdout
  t        |     1 | COPY (SELECT 1 UNION SELECT 2) TO stdout
  t        |     1 | COPY (SELECT 1) TO stdout
- f        |     1 | COPY (UPDATE stats_track_tab SET x = $1 WHERE x = $2 RETURNING x) TO stdout
  t        |     1 | COPY (UPDATE stats_track_tab SET x = 2 WHERE x = 1 RETURNING x) TO stdout
+ f        |     1 | DELETE FROM stats_track_tab WHERE x = $1 RETURNING x
+ f        |     1 | INSERT INTO stats_track_tab (x) VALUES ($1) RETURNING x
+ f        |     1 | MERGE INTO stats_track_tab USING (SELECT $1 id) ON x = id                +
+          |       |   WHEN MATCHED THEN UPDATE SET x = id                                    +
+          |       |   WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x
+ f        |     1 | SELECT $1
+ f        |     1 | SELECT $1 UNION SELECT $2
  t        |     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+ f        |     1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2 RETURNING x
 (13 rows)
 
 -- COPY - top-level tracking.
diff --git a/contrib/pg_stat_statements/expected/planning.out b/contrib/pg_stat_statements/expected/planning.out
index 9effd11fdc8..3ee1928cbe9 100644
--- a/contrib/pg_stat_statements/expected/planning.out
+++ b/contrib/pg_stat_statements/expected/planning.out
@@ -58,7 +58,7 @@ SELECT 42;
 (1 row)
 
 SELECT plans, calls, rows, query FROM pg_stat_statements
-  WHERE query NOT LIKE 'PREPARE%' ORDER BY query COLLATE "C";
+  WHERE query NOT LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
  plans | calls | rows |                          query                           
 -------+-------+------+----------------------------------------------------------
      0 |     1 |    0 | ALTER TABLE stats_plan_test ADD COLUMN x int
@@ -72,10 +72,10 @@ SELECT plans, calls, rows, query FROM pg_stat_statements
 -- for the prepared statement we expect at least one replan, but cache
 -- invalidations could force more
 SELECT plans >= 2 AND plans <= calls AS plans_ok, calls, rows, query FROM pg_stat_statements
-  WHERE query LIKE 'PREPARE%' ORDER BY query COLLATE "C";
- plans_ok | calls | rows |                         query                         
-----------+-------+------+-------------------------------------------------------
- t        |     4 |    4 | PREPARE prep1 AS SELECT COUNT(*) FROM stats_plan_test
+  WHERE query LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
+ plans_ok | calls | rows |                query                 
+----------+-------+------+--------------------------------------
+ t        |     4 |    4 | SELECT COUNT(*) FROM stats_plan_test
 (1 row)
 
 -- Cleanup
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index e0e2fa265c9..37a30af034a 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -128,7 +128,6 @@ DEALLOCATE pgss_test;
 SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
  calls | rows |                                    query                                     
 -------+------+------------------------------------------------------------------------------
-     1 |    1 | PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3
      4 |    4 | SELECT $1                                                                   +
        |      |   -- but this one will appear                                               +
        |      |   AS "text"
@@ -138,6 +137,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
      2 |    2 | SELECT $1 AS "int"
      1 |    2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i
      1 |    1 | SELECT $1 || $2
+     1 |    1 | SELECT $1, $2 LIMIT $3
      0 |    0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"
      1 |    1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
      1 |    2 | WITH t(f) AS (                                                              +
diff --git a/contrib/pg_stat_statements/expected/utility.out b/contrib/pg_stat_statements/expected/utility.out
index 060d4416dd7..aa4f0f7e628 100644
--- a/contrib/pg_stat_statements/expected/utility.out
+++ b/contrib/pg_stat_statements/expected/utility.out
@@ -540,7 +540,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 -------+------+----------------------------------------------------
      2 |    0 | DEALLOCATE $1
      2 |    0 | DEALLOCATE ALL
-     2 |    2 | PREPARE stat_select AS SELECT $1 AS a
+     2 |    2 | SELECT $1 AS a
      1 |    1 | SELECT $1 as a
      1 |    1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
 (5 rows)
diff --git a/contrib/pg_stat_statements/sql/planning.sql b/contrib/pg_stat_statements/sql/planning.sql
index 46f5d9b951c..9cfe206b3b0 100644
--- a/contrib/pg_stat_statements/sql/planning.sql
+++ b/contrib/pg_stat_statements/sql/planning.sql
@@ -20,11 +20,11 @@ SELECT 42;
 SELECT 42;
 SELECT 42;
 SELECT plans, calls, rows, query FROM pg_stat_statements
-  WHERE query NOT LIKE 'PREPARE%' ORDER BY query COLLATE "C";
+  WHERE query NOT LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
 -- for the prepared statement we expect at least one replan, but cache
 -- invalidations could force more
 SELECT plans >= 2 AND plans <= calls AS plans_ok, calls, rows, query FROM pg_stat_statements
-  WHERE query LIKE 'PREPARE%' ORDER BY query COLLATE "C";
+  WHERE query LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
 
 -- Cleanup
 DROP TABLE stats_plan_test;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 8a6ba1692e8..7afb065e975 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -238,24 +238,106 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
 	return query;
 }
 
+/*
+ * setQueryLocationAndLength
+ * 		Set query's location and length from statement and ParseState
+ *
+ * Some statements, like PreparableStmt, can be located within parentheses.
+ * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;".  For those, we
+ * cannot use the whole string from the statement's location or the SQL
+ * string would yield incorrectly.  The parser will set stmt_len, reflecting
+ * the size of the statement within the parentheses.  Thus, when stmt_len is
+ * available, we need to use it for the Query's stmt_len.
+ *
+ * For other cases, the parser can't provide the length of individual
+ * statements.  However, we have the statement's location plus the length
+ * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
+ * stored in pstate.  Thus, the statement's length is the RawStmt's length
+ * minus how much we've advanced in the RawStmt's string.
+ */
+static void
+setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
+{
+	ParseLoc	stmt_len = 0;
+
+	/*
+	 * We don't have information about the top RawStmt's length, leave 0 to use
+	 * the whole string
+	 */
+	if (pstate->p_stmt_len == 0)
+		return;
+
+	switch (nodeTag(parseTree))
+	{
+		case T_InsertStmt:
+			qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
+			stmt_len = ((InsertStmt *) parseTree)->stmt_len;
+			break;
+
+		case T_DeleteStmt:
+			qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
+			stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
+			break;
+
+		case T_UpdateStmt:
+			qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
+			stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
+			break;
+
+		case T_MergeStmt:
+			qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
+			stmt_len = ((MergeStmt *) parseTree)->stmt_len;
+			break;
+
+		case T_SelectStmt:
+			qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
+			stmt_len = ((SelectStmt *) parseTree)->stmt_len;
+			break;
+
+		case T_PLAssignStmt:
+			qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
+			break;
+
+		default:
+			qry->stmt_location = pstate->p_stmt_location;
+			break;
+	}
+
+	if (stmt_len > 0)
+	{
+		/* Statement's length is known, use it */
+		qry->stmt_len = stmt_len;
+	}
+	else
+	{
+		/*
+		 * Compute the statement's length from the statement's location and
+		 * the RawStmt's length and location.
+		 */
+		qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
+	}
+	Assert(qry->stmt_len >= 0);
+}
+
 /*
  * transformTopLevelStmt -
  *	  transform a Parse tree into a Query tree.
  *
- * This function is just responsible for transferring statement location data
- * from the RawStmt into the finished Query.
+ * This function is just responsible for storing location data
+ * from the RawStmt into the ParseState.
  */
 Query *
 transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
 {
 	Query	   *result;
 
+	/* Store RawStmt's length and location in pstate */
+	pstate->p_stmt_len = parseTree->stmt_len;
+	pstate->p_stmt_location = parseTree->stmt_location;
+
 	/* We're at top level, so allow SELECT INTO */
 	result = transformOptionalSelectInto(pstate, parseTree->stmt);
 
-	result->stmt_location = parseTree->stmt_location;
-	result->stmt_len = parseTree->stmt_len;
-
 	return result;
 }
 
@@ -424,6 +506,7 @@ transformStmt(ParseState *pstate, Node *parseTree)
 	/* Mark as original query until we learn differently */
 	result->querySource = QSRC_ORIGINAL;
 	result->canSetTag = true;
+	setQueryLocationAndLength(pstate, result, parseTree);
 
 	return result;
 }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4bab2117d96..45d02e942aa 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -153,6 +153,7 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
 						 const char *msg);
 static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
 static void updateRawStmtEnd(RawStmt *rs, int end_location);
+static void updatePreparableStmtEnd(Node *n, int end_location);
 static Node *makeColumnRef(char *colname, List *indirection,
 						   int location, core_yyscan_t yyscanner);
 static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -176,7 +177,7 @@ static void insertSelectOptions(SelectStmt *stmt,
 								SelectLimit *limitClause,
 								WithClause *withClause,
 								core_yyscan_t yyscanner);
-static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
+static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
 static Node *doNegate(Node *n, int location);
 static void doNegateFloat(Float *v);
 static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
@@ -3383,6 +3384,7 @@ CopyStmt:	COPY opt_binary qualified_name opt_column_list
 				{
 					CopyStmt *n = makeNode(CopyStmt);
 
+					updatePreparableStmtEnd($3, @4);
 					n->relation = NULL;
 					n->query = $3;
 					n->attlist = NIL;
@@ -12150,6 +12152,7 @@ InsertStmt:
 					$5->onConflictClause = $6;
 					$5->returningList = $7;
 					$5->withClause = $1;
+					$5->stmt_location = @$;
 					$$ = (Node *) $5;
 				}
 		;
@@ -12303,6 +12306,7 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
 					n->whereClause = $6;
 					n->returningList = $7;
 					n->withClause = $1;
+					n->stmt_location = @$;
 					$$ = (Node *) n;
 				}
 		;
@@ -12377,6 +12381,7 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
 					n->whereClause = $7;
 					n->returningList = $8;
 					n->withClause = $1;
+					n->stmt_location = @$;
 					$$ = (Node *) n;
 				}
 		;
@@ -12454,6 +12459,7 @@ MergeStmt:
 					m->joinCondition = $8;
 					m->mergeWhenClauses = $9;
 					m->returningList = $10;
+					m->stmt_location = @$;
 
 					$$ = (Node *) m;
 				}
@@ -12694,7 +12700,20 @@ SelectStmt: select_no_parens			%prec UMINUS
 		;
 
 select_with_parens:
-			'(' select_no_parens ')'				{ $$ = $2; }
+			'(' select_no_parens ')'
+				{
+					SelectStmt *n = (SelectStmt *) $2;
+
+					/*
+					 * As SelectStmt's location starts at the SELECT keyword,
+					 * we need to track the length of the SelectStmt within
+					 * parentheses to be able to extract the relevant part
+					 * of the query.  Without this, the RawStmt's length would
+					 * be used and would include the closing parenthesis.
+					 */
+					n->stmt_len = @3 - @2;
+					$$ = $2;
+				}
 			| '(' select_with_parens ')'			{ $$ = $2; }
 		;
 
@@ -12816,6 +12835,7 @@ simple_select:
 					n->groupDistinct = ($7)->distinct;
 					n->havingClause = $8;
 					n->windowClause = $9;
+					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| SELECT distinct_clause target_list
@@ -12833,6 +12853,7 @@ simple_select:
 					n->groupDistinct = ($7)->distinct;
 					n->havingClause = $8;
 					n->windowClause = $9;
+					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| values_clause							{ $$ = $1; }
@@ -12853,19 +12874,20 @@ simple_select:
 
 					n->targetList = list_make1(rt);
 					n->fromClause = list_make1($2);
+					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| select_clause UNION set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
+					$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
 				}
 			| select_clause INTERSECT set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
+					$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
 				}
 			| select_clause EXCEPT set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
+					$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
 				}
 		;
 
@@ -13423,6 +13445,7 @@ values_clause:
 				{
 					SelectStmt *n = makeNode(SelectStmt);
 
+					n->stmt_location = @1;
 					n->valuesLists = list_make1($3);
 					$$ = (Node *) n;
 				}
@@ -18565,6 +18588,44 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
 	rs->stmt_len = end_location - rs->stmt_location;
 }
 
+/* Adjust a PreparableStmt to reflect that it doesn't run to the end of the string */
+static void
+updatePreparableStmtEnd(Node *n, int end_location)
+{
+	if (IsA(n, SelectStmt))
+	{
+		SelectStmt *stmt = (SelectStmt *)n;
+
+		stmt->stmt_len = end_location - stmt->stmt_location;
+	}
+	else if (IsA(n, InsertStmt))
+	{
+		InsertStmt *stmt = (InsertStmt *)n;
+
+		stmt->stmt_len = end_location - stmt->stmt_location;
+	}
+	else if (IsA(n, UpdateStmt))
+	{
+		UpdateStmt *stmt = (UpdateStmt *)n;
+
+		stmt->stmt_len = end_location - stmt->stmt_location;
+	}
+	else if (IsA(n, DeleteStmt))
+	{
+		DeleteStmt *stmt = (DeleteStmt *)n;
+
+		stmt->stmt_len = end_location - stmt->stmt_location;
+	}
+	else if (IsA(n, MergeStmt))
+	{
+		MergeStmt *stmt = (MergeStmt *)n;
+
+		stmt->stmt_len = end_location - stmt->stmt_location;
+	}
+	else
+		elog(ERROR, "unexpected node type %d", (int) n->type);
+}
+
 static Node *
 makeColumnRef(char *colname, List *indirection,
 			  int location, core_yyscan_t yyscanner)
@@ -18943,11 +19004,13 @@ insertSelectOptions(SelectStmt *stmt,
 					 errmsg("multiple WITH clauses not allowed"),
 					 parser_errposition(exprLocation((Node *) withClause))));
 		stmt->withClause = withClause;
+		/* Update SelectStmt's location to the start of the with clause */
+		stmt->stmt_location = withClause->location;
 	}
 }
 
 static Node *
-makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
+makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
 {
 	SelectStmt *n = makeNode(SelectStmt);
 
@@ -18955,6 +19018,7 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
 	n->all = all;
 	n->larg = (SelectStmt *) larg;
 	n->rarg = (SelectStmt *) rarg;
+	n->stmt_location = location;
 	return (Node *) n;
 }
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index c92cef3d16d..b40b661ec8a 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2046,6 +2046,8 @@ typedef struct InsertStmt
 	List	   *returningList;	/* list of expressions to return */
 	WithClause *withClause;		/* WITH clause */
 	OverridingKind override;	/* OVERRIDING clause */
+	ParseLoc	stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	stmt_len;		/* length in bytes; 0 means "rest of string" */
 } InsertStmt;
 
 /* ----------------------
@@ -2060,6 +2062,8 @@ typedef struct DeleteStmt
 	Node	   *whereClause;	/* qualifications */
 	List	   *returningList;	/* list of expressions to return */
 	WithClause *withClause;		/* WITH clause */
+	ParseLoc	stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	stmt_len;		/* length in bytes; 0 means "rest of string" */
 } DeleteStmt;
 
 /* ----------------------
@@ -2075,6 +2079,8 @@ typedef struct UpdateStmt
 	List	   *fromClause;		/* optional from clause for more tables */
 	List	   *returningList;	/* list of expressions to return */
 	WithClause *withClause;		/* WITH clause */
+	ParseLoc	stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	stmt_len;		/* length in bytes; 0 means "rest of string" */
 } UpdateStmt;
 
 /* ----------------------
@@ -2090,6 +2096,8 @@ typedef struct MergeStmt
 	List	   *mergeWhenClauses;	/* list of MergeWhenClause(es) */
 	List	   *returningList;	/* list of expressions to return */
 	WithClause *withClause;		/* WITH clause */
+	ParseLoc	stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	stmt_len;		/* length in bytes; 0 means "rest of string" */
 } MergeStmt;
 
 /* ----------------------
@@ -2159,6 +2167,8 @@ typedef struct SelectStmt
 	bool		all;			/* ALL specified? */
 	struct SelectStmt *larg;	/* left child */
 	struct SelectStmt *rarg;	/* right child */
+	ParseLoc	stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	stmt_len;		/* length in bytes; 0 means "rest of string" */
 	/* Eventually add fields for CORRESPONDING spec here */
 } SelectStmt;
 
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 543df568147..fe1fdc4e184 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -108,6 +108,19 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
  * byte-wise locations in parse structures to character-wise cursor
  * positions.)
  *
+ * p_stmt_location: location of the top level RawStmt's start. During
+ * transformation, Query's location will be set to the statement's
+ * location if available. Otherwise, RawStmt's start will be used.
+ * Propagating the location through ParseState is needed for the Query
+ * length calculation (see p_stmt_len below).
+ *
+ * p_stmt_len: length of the top level RawStmt. Most of the time, the
+ * statement's length won't be provided by the parser, with the exception of
+ * SelectStmt within parentheses and PreparableStmt in Copy. If the statement's
+ * location is provided by the parser, the top level location and length is
+ * needed to accurately compute the Query's length. If the statement's location
+ * is not provided, RawStmt's length can be used directly.
+ *
  * p_rtable: list of RTEs that will become the rangetable of the query.
  * Note that neither relname nor refname of these entries are necessarily
  * unique; searching the rtable by name is a bad idea.
@@ -193,6 +206,8 @@ struct ParseState
 {
 	ParseState *parentParseState;	/* stack link */
 	const char *p_sourcetext;	/* source text, or NULL if not available */
+	ParseLoc	p_stmt_location;	/* start location, or -1 if unknown */
+	ParseLoc	p_stmt_len;		/* length in bytes; 0 means "rest of string" */
 	List	   *p_rtable;		/* range table so far */
 	List	   *p_rteperminfos; /* list of RTEPermissionInfo nodes for each
 								 * RTE_RELATION entry in rtable */
-- 
2.39.5 (Apple Git-154)

