> On Sat, Sep 24, 2022 at 04:07:14PM +0200, Dmitry Dolgov wrote:
> > On Fri, Sep 16, 2022 at 09:25:13PM +0300, Sergei Kornilov wrote:
> > Hello!
> >
> > Unfortunately the patch needs another rebase due to the recent split of 
> > guc.c (0a20ff54f5e66158930d5328f89f087d4e9ab400)
> >
> > I'm reviewing a patch on top of a previous commit and noticed a failed test:
> >
> > #   Failed test 'no parameters missing from postgresql.conf.sample'
> > #   at t/003_check_guc.pl line 82.
> > #          got: '1'
> > #     expected: '0'
> > # Looks like you failed 1 test of 3.
> > t/003_check_guc.pl ..............
> >
> > The new option has not been added to the postgresql.conf.sample
> >
> > PS: I would also like to have such a feature. It's hard to increase 
> > pg_stat_statements.max or lose some entries just because some ORM sends 
> > requests with a different number of parameters.
>
> Thanks! I'll post the rebased version soon.

And here it is.
>From 327673290bfad8cebc00f9706a25d11034d2245d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v9] Prevent jumbling of every element in ArrayExpr

pg_stat_statements produces multiple entries for queries like

    SELECT something FROM table WHERE col IN (1, 2, 3, ...)

depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.

Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.

Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
 .../expected/pg_stat_statements.out           | 412 ++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements.c   |  33 +-
 .../sql/pg_stat_statements.sql                | 107 +++++
 doc/src/sgml/config.sgml                      |  26 ++
 doc/src/sgml/pgstatstatements.sgml            |  28 +-
 src/backend/utils/misc/guc_tables.c           |  13 +
 src/backend/utils/misc/postgresql.conf.sample |   2 +-
 src/backend/utils/misc/queryjumble.c          | 105 ++++-
 src/include/utils/queryjumble.h               |   5 +-
 9 files changed, 712 insertions(+), 19 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out 
b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index ff0166fb9d..858cf49e66 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1102,4 +1102,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query 
LIKE '%SELECT GROUPING%';
      2
 (1 row)
 
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                     query                                     
 | calls 
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6)                 
 |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7)             
 |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8)         
 |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9)     
 |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, 
$10) |     1
+ SELECT pg_stat_statements_reset()                                             
 |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"        
 |     0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3)                      |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3)                      |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     5
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     6
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     6
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     5
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     2
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     2
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data 
= 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3               |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...)                     |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result 
+--------
+(0 rows)
+
+RESET const_merge_threshold;
 DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c 
b/contrib/pg_stat_statements/pg_stat_statements.c
index ba868f0de9..5554581475 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2653,6 +2653,9 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
                                n_quer_loc = 0, /* Normalized query byte 
location */
                                last_off = 0,   /* Offset from start for 
previous tok */
                                last_tok_len = 0;       /* Length (in bytes) of 
that tok */
+       bool            skip = false;   /* Signals that certain constants are
+                                                                  merged 
together and have to be skipped */
+
 
        /*
         * Get constants' lengths (core system only gives us locations).  Note
@@ -2676,7 +2679,6 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
        {
                int                     off,            /* Offset from start 
for cur tok */
                                        tok_len;        /* Length (in bytes) of 
that tok */
-
                off = jstate->clocations[i].location;
                /* Adjust recorded location if we're dealing with partial 
string */
                off -= query_loc;
@@ -2691,12 +2693,31 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                len_to_wrt -= last_tok_len;
 
                Assert(len_to_wrt >= 0);
-               memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-               n_quer_loc += len_to_wrt;
 
-               /* And insert a param symbol in place of the constant token */
-               n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-                                                         i + 1 + 
jstate->highest_extern_param_id);
+               /* Normal path, non merged constant */
+               if (!jstate->clocations[i].merged)
+               {
+                       memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
+                       n_quer_loc += len_to_wrt;
+
+                       /* And insert a param symbol in place of the constant 
token */
+                       n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+                                                                 i + 1 + 
jstate->highest_extern_param_id);
+
+                       /* In case previous constants were merged away, stop 
doing that */
+                       if (skip)
+                               skip = false;
+               }
+               /* The firsts merged constant */
+               else if (!skip)
+               {
+                       memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
+                       n_quer_loc += len_to_wrt;
+
+                       /* Skip the following until a non merged constant 
appear */
+                       skip = true;
+                       n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+               }
 
                quer_loc = off + tok_len;
                last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql 
b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index a01f183727..9ff50b52d3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
 
 SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
 
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data 
= 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
 DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a5cd4e44c7..ee388d8374 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8175,6 +8175,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH 
csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-const-merge-threshold" 
xreflabel="const_merge_treshold">
+      <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>const_merge_threshold</varname> configuration 
parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the minimal length of an array to be eligible for constants
+        collapsing.  Normally every element of an array contributes to a query
+        identifier, which means the same query containing an array of constants
+        could get multiple different identifiers, depending of size of the
+        array.  If this parameter is nonzero, the array contains only constants
+        and it's length is larger than <varname> const_merge_threshold 
</varname>,
+        then array elements will contribure nothing to the query identifier.
+        Thus the query will get the same identifier no matter how many 
constants
+        it contains.
+
+        Zero turns off collapsing, and it is the default value.
+
+        The <xref linkend="pgstatstatements"/> extension will represent such
+        collapsed constants via <literal>'(...)'</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml 
b/doc/src/sgml/pgstatstatements.sgml
index ecf6cd6bf3..a7b1d2863b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
   <para>
    In some cases, queries with visibly different texts might get merged into a
    single <structname>pg_stat_statements</structname> entry.  Normally this 
will happen
-   only for semantically equivalent queries, but there is a small chance of
-   hash collisions causing unrelated queries to be merged into one entry.
-   (This cannot happen for queries belonging to different users or databases,
-   however.)
+   only for semantically equivalent queries, for example when queries are
+   different only in values of constants they use. Another valid possibility 
for
+   merging queries into a single <structname>pg_stat_statements</structname>
+   entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+   queries contain an array with more than 
<varname>const_merge_threshold</varname>
+   constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+   But there is a small chance of hash collisions causing unrelated queries to
+   be merged into one entry. (This cannot happen for queries belonging to
+   different users or databases, however.)
   </para>
 
   <para>
diff --git a/src/backend/utils/misc/guc_tables.c 
b/src/backend/utils/misc/guc_tables.c
index 87e625aa7a..bff8fcaaf9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3415,6 +3415,19 @@ struct config_int ConfigureNamesInt[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+                       gettext_noop("Sets the minimal numer of constants in an 
array"
+                                                " after which they will be 
merged"),
+                       gettext_noop("Computing query id for an array of 
constants"
+                                                " will produce the same id for 
all arrays with length"
+                                                " larger than this value. Zero 
turns off merging."),
+               },
+               &const_merge_threshold,
+               0, 0, INT_MAX,
+               NULL, NULL, NULL
+       },
+
        /* End-of-list marker */
        {
                {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample 
b/src/backend/utils/misc/postgresql.conf.sample
index 90bec0502c..806e74d61b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -626,7 +626,7 @@
 #log_parser_stats = off
 #log_planner_stats = off
 #log_executor_stats = off
-
+#const_merge_threshold = 0
 
 #------------------------------------------------------------------------------
 # AUTOVACUUM
diff --git a/src/backend/utils/misc/queryjumble.c 
b/src/backend/utils/misc/queryjumble.c
index a67487e5fe..063b4be725 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
 /* GUC parameters */
 int                    compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
+/* Minimal numer of constants in an array after which they will be merged */
+int                    const_merge_threshold = 0;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool           query_id_enabled = false;
 
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query 
*query);
 static void JumbleRangeTable(JumbleState *jstate, List *rtable);
 static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
 static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool 
merged);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
                jstate->jumble_len = 0;
                jstate->clocations_buf_size = 32;
                jstate->clocations = (LocationLen *)
-                       palloc(jstate->clocations_buf_size * 
sizeof(LocationLen));
+                       palloc0(jstate->clocations_buf_size * 
sizeof(LocationLen));
                jstate->clocations_count = 0;
                jstate->highest_extern_param_id = 0;
 
@@ -341,6 +345,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
        }
 }
 
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+       ListCell   *temp;
+       Node       *firstExpr = NULL;
+       bool            allConst = true;
+
+       if (elements == NULL)
+               return;
+
+       if (const_merge_threshold == 0)
+       {
+               /* Merging is disabled, process everything one by one. */
+               JumbleExpr(jstate, (Node *) elements);
+               return;
+       }
+
+       if (elements->length < const_merge_threshold)
+       {
+               /* The list is not large enough to collapse it. */
+               JumbleExpr(jstate, (Node *) elements);
+               return;
+       }
+
+       /* Guard against stack overflow due to overly complex expressions */
+       check_stack_depth();
+
+       firstExpr = linitial(elements);
+
+       /*
+        * We always emit the node's NodeTag, then any additional fields that 
are
+        * considered significant, and then we recurse to any child nodes.
+        */
+       APP_JUMB(elements->type);
+
+       /*
+        * If the first expression is a constant, verify if the following 
elements
+        * are constants as well. If yes, the list is eligible for collapsing --
+        * mark it as merged and return from the function.
+        */
+       if (IsA(firstExpr, Const))
+       {
+               foreach(temp, elements)
+               {
+                       Node    *expr = (Node *) lfirst(temp);
+
+                       if (!IsA(expr, Const))
+                       {
+                               allConst = false;
+                               break;
+                       }
+               }
+
+               if (allConst)
+               {
+                       Const *firstConst = (Const *) firstExpr;
+                       Const *lastConst = llast_node(Const, elements);
+
+                       /*
+                        * First and last constants are needed to identify 
which part of
+                        * the query to skip in generate_normalized_query.
+                        */
+                       RecordConstLocation(jstate, firstConst->location, true);
+                       RecordConstLocation(jstate, lastConst->location, true);
+                       return;
+               }
+       }
+
+       /*
+        * If we end up here, it means no constants merging is possible, process
+        * the list as usual.
+        */
+       JumbleExpr(jstate, (Node *) elements);
+       return;
+}
+
 /*
  * Jumble an expression tree
  *
@@ -390,7 +478,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
                                /* We jumble only the constant's type, not its 
value */
                                APP_JUMB(c->consttype);
                                /* Also, record its parse location for query 
normalization */
-                               RecordConstLocation(jstate, c->location);
+                               RecordConstLocation(jstate, c->location, false);
                        }
                        break;
                case T_Param:
@@ -579,7 +667,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
                        }
                        break;
                case T_ArrayExpr:
-                       JumbleExpr(jstate, (Node *) ((ArrayExpr *) 
node)->elements);
+                       JumbleExprList(jstate, (List *) ((ArrayExpr *) 
node)->elements);
                        break;
                case T_RowExpr:
                        JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +920,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
 }
 
 /*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
 {
        /* -1 indicates unknown or undefined location */
        if (location >= 0)
@@ -851,6 +941,7 @@ RecordConstLocation(JumbleState *jstate, int location)
                                                 sizeof(LocationLen));
                }
                jstate->clocations[jstate->clocations_count].location = 
location;
+               jstate->clocations[jstate->clocations_count].merged = merged;
                /* initialize lengths to -1 to simplify third-party module 
usage */
                jstate->clocations[jstate->clocations_count].length = -1;
                jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 3c2d9beab2..b50cc42d4e 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
 #define QUERYJUBLE_H
 
 #include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
 
 #define JUMBLE_SIZE                            1024    /* query serialization 
buffer size */
 
@@ -25,6 +26,8 @@ typedef struct LocationLen
 {
        int                     location;               /* start offset in 
query text */
        int                     length;                 /* length in bytes, or 
-1 to ignore */
+       bool            merged;                 /* whether or not the location 
was marked as
+                                                                  not 
contributing to jumble */
 } LocationLen;
 
 /*
@@ -63,7 +66,7 @@ enum ComputeQueryIdType
 
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query, const char *querytext);
-- 
2.27.0

Reply via email to