commit e005a19cc2b86d74cd3502df42c2ca55160cc5f5
Author: Lukas Fittl <lukas@fittl.com>
Date:   Tue Feb 28 02:34:22 2017 -0800

    Use bind parameters $1,$2,.. instead of ? for pg_stat_statements masking.
    
    Whilst adding a few characters, this makes the statements that are output
    parseable by the standard Postgres parser. In particular, this allows one
    to pass the statements to PREPARE and then use EXECUTE with the constant
    values.
    
    Compared to ? it also solves the edge case where it can't be determined
    if a statement contains the ? replacement character, or the ? operator.

diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index fd53f15d8b..05c799a116 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -68,6 +68,13 @@ SELECT 1 AS i UNION SELECT 2 ORDER BY i;
  2
 (2 rows)
 
+-- ? operator
+select '{"a":1, "b":2}'::jsonb ? 'b';
+ ?column? 
+----------
+ t
+(1 row)
+
 -- cte
 WITH t(f) AS (
   VALUES (1.0), (2.0)
@@ -79,24 +86,35 @@ WITH t(f) AS (
  2.0
 (2 rows)
 
+-- prepared statement
+PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
+EXECUTE pgss_test(1);
+ ?column? | ?column? 
+----------+----------
+        1 | test
+(1 row)
+
+DEALLOCATE pgss_test;
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
-                  query                  | calls | rows 
------------------------------------------+-------+------
- SELECT ?                               +|     4 |    4
-                                        +|       | 
-   AS "text"                             |       | 
- SELECT ? + ?                            |     2 |    2
- SELECT ? + ? + ? AS "add"               |     3 |    3
- SELECT ? AS "float"                     |     1 |    1
- SELECT ? AS "int"                       |     2 |    2
- SELECT ? AS i UNION SELECT ? ORDER BY i |     1 |    2
- SELECT ? || ?                           |     1 |    1
- SELECT pg_stat_statements_reset()       |     1 |    1
- WITH t(f) AS (                         +|     1 |    2
-   VALUES (?), (?)                      +|       | 
- )                                      +|       | 
-   SELECT f FROM t ORDER BY f            |       | 
-(9 rows)
+                       query                       | calls | rows 
+---------------------------------------------------+-------+------
+ PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3 |     1 |    1
+ SELECT $1                                        +|     4 |    4
+                                                  +|       | 
+   AS "text"                                       |       | 
+ SELECT $1 + $2                                    |     2 |    2
+ SELECT $1 + $2 + $3 AS "add"                      |     3 |    3
+ SELECT $1 AS "float"                              |     1 |    1
+ SELECT $1 AS "int"                                |     2 |    2
+ SELECT $1 AS i UNION SELECT $2 ORDER BY i         |     1 |    2
+ SELECT $1 || $2                                   |     1 |    1
+ SELECT pg_stat_statements_reset()                 |     1 |    1
+ WITH t(f) AS (                                   +|     1 |    2
+   VALUES ($1), ($2)                              +|       | 
+ )                                                +|       | 
+   SELECT f FROM t ORDER BY f                      |       | 
+ select $1::jsonb ? $2                             |     1 |    1
+(11 rows)
 
 --
 -- CRUD: INSERT SELECT UPDATE DELETE on test table
@@ -125,6 +143,8 @@ BEGIN \;
 UPDATE test SET b = '555' WHERE a = 5 \;
 UPDATE test SET b = '666' WHERE a = 6 \;
 COMMIT ;
+-- many INSERT values
+INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
 -- SELECT with constants
 SELECT * FROM test WHERE a > 5 ORDER BY a ;
  a |          b           
@@ -147,8 +167,11 @@ SELECT *
 SELECT * FROM test ORDER BY a;
  a |          b           
 ---+----------------------
+ 1 | a                   
  1 | 111                 
+ 2 | b                   
  2 | 222                 
+ 3 | c                   
  3 | 333                 
  4 | 444                 
  5 | 555                 
@@ -156,19 +179,35 @@ SELECT * FROM test ORDER BY a;
  7 | aaa                 
  8 | bbb                 
  9 | bbb                 
-(9 rows)
+(12 rows)
+
+-- SELECT with IN clause
+SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
+ a |          b           
+---+----------------------
+ 1 | 111                 
+ 2 | 222                 
+ 3 | 333                 
+ 4 | 444                 
+ 5 | 555                 
+ 1 | a                   
+ 2 | b                   
+ 3 | c                   
+(8 rows)
 
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
-                       query                       | calls | rows 
----------------------------------------------------+-------+------
- DELETE FROM test WHERE a > ?                      |     1 |    1
- INSERT INTO test VALUES(generate_series(?, ?), ?) |     1 |   10
- SELECT * FROM test ORDER BY a                     |     1 |    9
- SELECT * FROM test WHERE a > ? ORDER BY a         |     2 |    4
- SELECT pg_stat_statements_reset()                 |     1 |    1
- UPDATE test SET b = ? WHERE a = ?                 |     6 |    6
- UPDATE test SET b = ? WHERE a > ?                 |     1 |    3
-(7 rows)
+                            query                            | calls | rows 
+-------------------------------------------------------------+-------+------
+ DELETE FROM test WHERE a > $1                               |     1 |    1
+ INSERT INTO test (a, b) VALUES ($1, $2), ($3, $4), ($5, $6) |     1 |    3
+ INSERT INTO test VALUES(generate_series($1, $2), $3)        |     1 |   10
+ SELECT * FROM test ORDER BY a                               |     1 |   12
+ SELECT * FROM test WHERE a > $1 ORDER BY a                  |     2 |    4
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5)          |     1 |    8
+ SELECT pg_stat_statements_reset()                           |     1 |    1
+ UPDATE test SET b = $1 WHERE a = $2                         |     6 |    6
+ UPDATE test SET b = $1 WHERE a > $2                         |     1 |    3
+(9 rows)
 
 --
 -- pg_stat_statements.track = none
@@ -251,9 +290,9 @@ SELECT PLUS_ONE(10);
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
                query               | calls | rows 
 -----------------------------------+-------+------
- SELECT ?::TEXT                    |     1 |    1
- SELECT PLUS_ONE(?)                |     2 |    2
- SELECT PLUS_TWO(?)                |     2 |    2
+ SELECT $1::TEXT                   |     1 |    1
+ SELECT PLUS_ONE($1)               |     2 |    2
+ SELECT PLUS_TWO($1)               |     2 |    2
  SELECT pg_stat_statements_reset() |     1 |    1
 (4 rows)
 
@@ -308,10 +347,10 @@ SELECT PLUS_ONE(1);
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
                query               | calls | rows 
 -----------------------------------+-------+------
- SELECT (i + ? + ?)::INTEGER       |     2 |    2
- SELECT (i + ?)::INTEGER LIMIT ?   |     2 |    2
- SELECT PLUS_ONE(?)                |     2 |    2
- SELECT PLUS_TWO(?)                |     2 |    2
+ SELECT (i + $2 + $3)::INTEGER     |     2 |    2
+ SELECT (i + $2)::INTEGER LIMIT $3 |     2 |    2
+ SELECT PLUS_ONE($1)               |     2 |    2
+ SELECT PLUS_TWO($1)               |     2 |    2
  SELECT pg_stat_statements_reset() |     1 |    1
 (5 rows)
 
@@ -352,7 +391,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
  DROP FUNCTION PLUS_TWO(INTEGER)           |     1 |    0
  DROP TABLE IF EXISTS test                 |     3 |    0
  DROP TABLE test                           |     1 |    0
- SELECT ?                                  |     1 |    1
+ SELECT $1                                 |     1 |    1
  SELECT pg_stat_statements_reset()         |     1 |    1
 (8 rows)
 
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 62dec8768a..ac295b61c9 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -27,8 +27,8 @@
  * to blame query costs on the proper queryId.
  *
  * To facilitate presenting entries to users, we create "representative" query
- * strings in which constants are replaced with '?' characters, to make it
- * clearer what a normalized entry can represent.  To save on shared memory,
+ * strings in which constants are replaced with parameters (such as $1), to make
+ * it clearer what a normalized entry can represent.  To save on shared memory,
  * and to avoid having to truncate oversized query strings, we store these
  * strings in a temporary external query-texts file.  Offsets into this
  * file are kept in shared memory.
@@ -219,6 +219,9 @@ typedef struct pgssJumbleState
 
 	/* Current number of valid entries in clocations array */
 	int			clocations_count;
+
+	/* highest param id we've seen, in order to start normalization correctly */
+	int			highest_extern_param_id;
 } pgssJumbleState;
 
 /*---- Local variables ----*/
@@ -803,6 +806,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
 	jstate.clocations = (pgssLocationLen *)
 		palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
 	jstate.clocations_count = 0;
+	jstate.highest_extern_param_id = 0;
 
 	/* Compute query ID and mark the Query node with it */
 	JumbleQuery(&jstate, query);
@@ -2478,6 +2482,11 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
 				APP_JUMB(p->paramkind);
 				APP_JUMB(p->paramid);
 				APP_JUMB(p->paramtype);
+
+				if (p->paramkind == PARAM_EXTERN &&
+					p->paramid > jstate->highest_extern_param_id) {
+					jstate->highest_extern_param_id = p->paramid;
+				}
 			}
 			break;
 		case T_Aggref:
@@ -2925,7 +2934,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
  * of interest, so it's worth doing.)
  *
  * *query_len_p contains the input string length, and is updated with
- * the result string length (which cannot be longer) on exit.
+ * the result string length on exit. The resulting string might be longer
+ * depending on the number of $n replacement characters.
  *
  * Returns a palloc'd string.
  */
@@ -2940,7 +2950,9 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
 				quer_loc = 0,	/* Source query byte location */
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
-				last_tok_len = 0;		/* Length (in bytes) of that tok */
+				last_tok_len = 0,		/* Length (in bytes) of that tok */
+				norm_query_buflen = 0, /* Size of the normalized query buffer */
+				highest_param_len = 0; /* Length of highest replacement char */
 
 	/*
 	 * Get constants' lengths (core system only gives us locations).  Note
@@ -2948,8 +2960,14 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
 	 */
 	fill_in_constant_lengths(jstate, query, query_loc);
 
+	/* Accomodate the additional query replacement characters */
+	highest_param_len = 1 + floor(log10(jstate->clocations_count +
+										jstate->highest_extern_param_id));
+	norm_query_buflen = query_len;
+	norm_query_buflen += jstate->clocations_count * highest_param_len;
+
 	/* Allocate result buffer */
-	norm_query = palloc(query_len + 1);
+	norm_query = palloc(norm_query_buflen + 1);
 
 	for (i = 0; i < jstate->clocations_count; i++)
 	{
@@ -2973,8 +2991,9 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
 		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
 		n_quer_loc += len_to_wrt;
 
-		/* And insert a '?' in place of the constant token */
-		norm_query[n_quer_loc++] = '?';
+		/* And insert a param in place of the constant token */
+		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", i + 1 +
+							  jstate->highest_extern_param_id);
 
 		quer_loc = off + tok_len;
 		last_off = off;
@@ -2991,7 +3010,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
 	memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
 	n_quer_loc += len_to_wrt;
 
-	Assert(n_quer_loc <= query_len);
+	Assert(n_quer_loc <= norm_query_buflen);
 	norm_query[n_quer_loc] = '\0';
 
 	*query_len_p = n_quer_loc;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index 385c8a8d99..4cfe745ad6 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -37,12 +37,20 @@ SELECT :add + 1 + 1 AS "add" \gset
 -- set operator
 SELECT 1 AS i UNION SELECT 2 ORDER BY i;
 
+-- ? operator
+select '{"a":1, "b":2}'::jsonb ? 'b';
+
 -- cte
 WITH t(f) AS (
   VALUES (1.0), (2.0)
 )
   SELECT f FROM t ORDER BY f;
 
+-- prepared statement
+PREPARE pgss_test (int) AS SELECT $1, 'test' LIMIT 1;
+EXECUTE pgss_test(1);
+DEALLOCATE pgss_test;
+
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 --
@@ -74,6 +82,9 @@ UPDATE test SET b = '555' WHERE a = 5 \;
 UPDATE test SET b = '666' WHERE a = 6 \;
 COMMIT ;
 
+-- many INSERT values
+INSERT INTO test (a, b) VALUES (1, 'a'), (2, 'b'), (3, 'c');
+
 -- SELECT with constants
 SELECT * FROM test WHERE a > 5 ORDER BY a ;
 
@@ -85,6 +96,9 @@ SELECT *
 -- SELECT without constants
 SELECT * FROM test ORDER BY a;
 
+-- SELECT with IN clause
+SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5);
+
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 --
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index d4f09fc2a3..d674339cbf 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -245,9 +245,11 @@
 
   <para>
    When a constant's value has been ignored for purposes of matching the
-   query to other queries, the constant is replaced by <literal>?</literal>
-   in the <structname>pg_stat_statements</> display.  The rest of the query
-   text is that of the first query that had the particular
+   query to other queries, the constant is replaced by parameter symbols such as
+   <literal>$1</literal> in the <structname>pg_stat_statements</> display. The
+   parameter symbols start incrementing from the next one after the highest $n
+   parameter in the original query text, or $1 if none is present. The rest of
+   the query text is that of the first query that had the particular
    <structfield>queryid</> hash value associated with the
    <structname>pg_stat_statements</> entry.
   </para>
@@ -481,13 +483,13 @@ bench=# SELECT query, calls, total_time, rows, 100.0 * shared_blks_hit /
                nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
           FROM pg_stat_statements ORDER BY total_time DESC LIMIT 5;
 -[ RECORD 1 ]---------------------------------------------------------------------
-query       | UPDATE pgbench_branches SET bbalance = bbalance + ? WHERE bid = ?;
+query       | UPDATE pgbench_branches SET bbalance = bbalance + $1 WHERE bid = $2;
 calls       | 3000
 total_time  | 9609.00100000002
 rows        | 2836
 hit_percent | 99.9778970000200936
 -[ RECORD 2 ]---------------------------------------------------------------------
-query       | UPDATE pgbench_tellers SET tbalance = tbalance + ? WHERE tid = ?;
+query       | UPDATE pgbench_tellers SET tbalance = tbalance + $1 WHERE tid = $2;
 calls       | 3000
 total_time  | 8015.156
 rows        | 2990
@@ -499,7 +501,7 @@ total_time  | 310.624
 rows        | 100000
 hit_percent | 0.30395136778115501520
 -[ RECORD 4 ]---------------------------------------------------------------------
-query       | UPDATE pgbench_accounts SET abalance = abalance + ? WHERE aid = ?;
+query       | UPDATE pgbench_accounts SET abalance = abalance + $1 WHERE aid = $2;
 calls       | 3000
 total_time  | 271.741999999997
 rows        | 3000
