Re: [patch] bit XOR aggregate functions

2021-03-05 Thread Alexey Bashtanov

Hi all,

Thanks for your reviews.
I've updated my patch to the current master and added a documentation 
line suggesting using the new function as a checksum.


Best regards, Alex

On 04/03/2021 17:14, Ibrar Ahmed wrote:



On Wed, Mar 3, 2021 at 7:30 PM Peter Eisentraut 
> wrote:


On 10.02.21 06:42, Kyotaro Horiguchi wrote:
> We already had CREATE AGGREATE at the time, so BIT_XOR can be
thought
> as it falls into the same category with BIT_AND and BIT_OR, that is,
> we may have BIT_XOR as an intrinsic aggregation?

I think the use of BIT_XOR is quite separate from BIT_AND and BIT_OR.
The latter give you an "all" or "any" result of the bits set. 
BIT_XOR
will return 1 or true if an odd number of inputs are 1 or true, which
isn't useful by itself.  But it can be used as a checksum, so it
seems
pretty reasonable to me to add it.  Perhaps the use case could be
pointed out in the documentation.




Hi Alex,

The patch is failing just because of a comment, which is already 
changed by another patch


-/* Define to build with OpenSSL support. (--with-ssl=openssl) */

+/* Define to 1 if you have OpenSSL support. */


Do you mind sending an updated patch?

http://cfbot.cputube.org/patch_32_2980.log.

I am changing the status to "Waiting for Author"


In my opinion that change no more requires so I removed that and 
attached the patch.


--
Ibrar Ahmed


diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index fee0561961..dff158e99a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19198,6 +19198,33 @@ SELECT NULLIF(value, '(none)') ...
Yes
   
 
+  
+   
+
+ bit_xor
+
+bit_xor ( smallint )
+smallint
+   
+   
+bit_xor ( integer )
+integer
+   
+   
+bit_xor ( bigint )
+bigint
+   
+   
+bit_xor ( bit )
+bit
+   
+   
+Computes the bitwise exclusive OR of all non-null input values.
+May be useful as a checksum for an unordered set of values.
+   
+   Yes
+  
+
   

 
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5c1f962251..0d8c5a922a 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -505,18 +505,26 @@
   aggcombinefn => 'int2and', aggtranstype => 'int2' },
 { aggfnoid => 'bit_or(int2)', aggtransfn => 'int2or', aggcombinefn => 'int2or',
   aggtranstype => 'int2' },
+{ aggfnoid => 'bit_xor(int2)', aggtransfn => 'int2xor', aggcombinefn => 'int2xor',
+  aggtranstype => 'int2' },
 { aggfnoid => 'bit_and(int4)', aggtransfn => 'int4and',
   aggcombinefn => 'int4and', aggtranstype => 'int4' },
 { aggfnoid => 'bit_or(int4)', aggtransfn => 'int4or', aggcombinefn => 'int4or',
   aggtranstype => 'int4' },
+{ aggfnoid => 'bit_xor(int4)', aggtransfn => 'int4xor', aggcombinefn => 'int4xor',
+  aggtranstype => 'int4' },
 { aggfnoid => 'bit_and(int8)', aggtransfn => 'int8and',
   aggcombinefn => 'int8and', aggtranstype => 'int8' },
 { aggfnoid => 'bit_or(int8)', aggtransfn => 'int8or', aggcombinefn => 'int8or',
   aggtranstype => 'int8' },
+{ aggfnoid => 'bit_xor(int8)', aggtransfn => 'int8xor', aggcombinefn => 'int8xor',
+  aggtranstype => 'int8' },
 { aggfnoid => 'bit_and(bit)', aggtransfn => 'bitand', aggcombinefn => 'bitand',
   aggtranstype => 'bit' },
 { aggfnoid => 'bit_or(bit)', aggtransfn => 'bitor', aggcombinefn => 'bitor',
   aggtranstype => 'bit' },
+{ aggfnoid => 'bit_xor(bit)', aggtransfn => 'bitxor', aggcombinefn => 'bitxor',
+  aggtranstype => 'bit' },
 
 # xml
 { aggfnoid => 'xmlagg', aggtransfn => 'xmlconcat2', aggtranstype => 'xml' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 59d2b71ca9..506689d8ac 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7995,24 +7995,36 @@
 { oid => '2237', descr => 'bitwise-or smallint aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
   proargtypes => 'int2', prosrc => 'aggregate_dummy' },
+{ oid => '8452', descr => 'bitwise-xor smallint aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
+  proargtypes => 'int2', prosrc => 'aggregate_dummy' },
 { oid => '2238', descr => 'bitwise-and integer aggregate',
   proname => 'bit_and', prokind => 'a', proisstrict => 'f',
   prorettype => 'int4', proargtypes => 'int4', prosrc => 'aggregate_dummy' },
 { oid => '2239', descr => 'bitwise-or integer aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
   proargtypes => 'int4', prosrc => 'aggregate_dummy' },
+{ oid => '8453', descr => 'bitwise-xor integer aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
+  proargtypes => 'int4', prosrc => 'aggrega

[patch] bit XOR aggregate functions

2021-02-09 Thread Alexey Bashtanov

Hi,

I personally use it as a checksum for a large unordered set, where 
performance and simplicity is prioritized over collision resilience.

Maybe there are other ways to use them.

Best, Alex
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1ab31a9056..f33358f8db 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19192,6 +19192,32 @@ SELECT NULLIF(value, '(none)') ...
Yes
   
 
+  
+   
+
+ bit_xor
+
+bit_xor ( smallint )
+smallint
+   
+   
+bit_xor ( integer )
+integer
+   
+   
+bit_xor ( bigint )
+bigint
+   
+   
+bit_xor ( bit )
+bit
+   
+   
+Computes the bitwise exclusive OR of all non-null input values.
+   
+   Yes
+  
+
   

 
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 5c1f962251..0d8c5a922a 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -505,18 +505,26 @@
   aggcombinefn => 'int2and', aggtranstype => 'int2' },
 { aggfnoid => 'bit_or(int2)', aggtransfn => 'int2or', aggcombinefn => 'int2or',
   aggtranstype => 'int2' },
+{ aggfnoid => 'bit_xor(int2)', aggtransfn => 'int2xor', aggcombinefn => 'int2xor',
+  aggtranstype => 'int2' },
 { aggfnoid => 'bit_and(int4)', aggtransfn => 'int4and',
   aggcombinefn => 'int4and', aggtranstype => 'int4' },
 { aggfnoid => 'bit_or(int4)', aggtransfn => 'int4or', aggcombinefn => 'int4or',
   aggtranstype => 'int4' },
+{ aggfnoid => 'bit_xor(int4)', aggtransfn => 'int4xor', aggcombinefn => 'int4xor',
+  aggtranstype => 'int4' },
 { aggfnoid => 'bit_and(int8)', aggtransfn => 'int8and',
   aggcombinefn => 'int8and', aggtranstype => 'int8' },
 { aggfnoid => 'bit_or(int8)', aggtransfn => 'int8or', aggcombinefn => 'int8or',
   aggtranstype => 'int8' },
+{ aggfnoid => 'bit_xor(int8)', aggtransfn => 'int8xor', aggcombinefn => 'int8xor',
+  aggtranstype => 'int8' },
 { aggfnoid => 'bit_and(bit)', aggtransfn => 'bitand', aggcombinefn => 'bitand',
   aggtranstype => 'bit' },
 { aggfnoid => 'bit_or(bit)', aggtransfn => 'bitor', aggcombinefn => 'bitor',
   aggtranstype => 'bit' },
+{ aggfnoid => 'bit_xor(bit)', aggtransfn => 'bitxor', aggcombinefn => 'bitxor',
+  aggtranstype => 'bit' },
 
 # xml
 { aggfnoid => 'xmlagg', aggtransfn => 'xmlconcat2', aggtranstype => 'xml' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 4e0c9be58c..95577094bb 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7992,24 +7992,36 @@
 { oid => '2237', descr => 'bitwise-or smallint aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
   proargtypes => 'int2', prosrc => 'aggregate_dummy' },
+{ oid => '8452', descr => 'bitwise-xor smallint aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'int2',
+  proargtypes => 'int2', prosrc => 'aggregate_dummy' },
 { oid => '2238', descr => 'bitwise-and integer aggregate',
   proname => 'bit_and', prokind => 'a', proisstrict => 'f',
   prorettype => 'int4', proargtypes => 'int4', prosrc => 'aggregate_dummy' },
 { oid => '2239', descr => 'bitwise-or integer aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
   proargtypes => 'int4', prosrc => 'aggregate_dummy' },
+{ oid => '8453', descr => 'bitwise-xor integer aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'int4',
+  proargtypes => 'int4', prosrc => 'aggregate_dummy' },
 { oid => '2240', descr => 'bitwise-and bigint aggregate',
   proname => 'bit_and', prokind => 'a', proisstrict => 'f',
   prorettype => 'int8', proargtypes => 'int8', prosrc => 'aggregate_dummy' },
 { oid => '2241', descr => 'bitwise-or bigint aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
   proargtypes => 'int8', prosrc => 'aggregate_dummy' },
+{ oid => '8454', descr => 'bitwise-xor bigint aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'int8',
+  proargtypes => 'int8', prosrc => 'aggregate_dummy' },
 { oid => '2242', descr => 'bitwise-and bit aggregate',
   proname => 'bit_and', prokind => 'a', proisstrict => 'f', prorettype => 'bit',
   proargtypes => 'bit', prosrc => 'aggregate_dummy' },
 { oid => '2243', descr => 'bitwise-or bit aggregate',
   proname => 'bit_or', prokind => 'a', proisstrict => 'f', prorettype => 'bit',
   proargtypes => 'bit', prosrc => 'aggregate_dummy' },
+{ oid => '8455', descr => 'bitwise-xor bit aggregate',
+  proname => 'bit_xor', prokind => 'a', proisstrict => 'f', prorettype => 'bit',
+  proargtypes => 'bit', prosrc => 'aggregate_dummy' },
 
 # formerly-missing interval + datetime operators
 { oid => '2546',
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 55ca

don't allocate HashAgg hash tables when running explain only

2020-11-13 Thread Alexey Bashtanov

Hi,

I got somewhat scared when my explain took a few seconds to complete and 
used a few gigs of RAM.

To reproduce try the following:

discard temp;
create temp table a as select to_timestamp(generate_series(1, 7000)) i;
analyze a;
set work_mem to '3GB';
explain select distinct a1.i - a2.i from a a1, a a2;

I would appreciate if someone could have a look at the patch attached, 
which makes executor skip initializing hash tables when doing explain only.


Best, Alex
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index d87677d659..196fe09c52 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3665,7 +3665,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			&aggstate->hash_ngroups_limit,
 			&aggstate->hash_planned_partitions);
 		find_hash_columns(aggstate);
-		build_hash_tables(aggstate);
+
+		/* Skip massive memory allocation when running only explain */
+		if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
+			build_hash_tables(aggstate);
+
 		aggstate->table_filled = false;
 
 		/* Initialize this to 1, meaning nothing spilled, yet */


Re: Improve planner cost estimations for alternative subplans

2020-07-24 Thread Alexey Bashtanov

Hi Tom,

sorry for the delay,

I experimented with adding a number-of-evaluations parameter to
cost_qual_eval, and found that the majority of call sites do have
something realistic they can pass.  The attached very-much-WIP
patch shows my results so far.  There's a lot of loose ends:

I like the idea, so if we alternative subplans remain there
I think we should implement it.

Best, Alex




Re: Improve planner cost estimations for alternative subplans

2020-07-24 Thread Alexey Bashtanov

Hi Melanie,

Sorry for the delay.


I've just started looking at this patch today, but I was wondering if
you might include a test case which minimally reproduces the original
problem you had.

I could reproduce it with an easier generated data set, please see attached.

However, to be honest with you, while searching I encountered a few 
examples of the opposite behavior,

when the patched version was slower than the master branch.
So I'm not so sure whether we should use the patch, maybe we should 
rather consider Tom's approach.


Best, Alex


altplan-example.sql
Description: application/sql


Improve planner cost estimations for alternative subplans

2020-06-05 Thread Alexey Bashtanov

Hello,

Currently, in case of alternative subplans that do hashed vs per-row 
lookups,

the per-row estimate is used when planning the rest of the query.
It's also coded in not quite an explicit way.

In [1] we found a situation where it leads to a suboptimal plan,
as it bloats the overall cost into large figures,
a decision related to an outer part of the plan look negligible to the 
planner,

and as a result it doesn't elaborate on choosing the optimal one.

The patch is to fix it. Our linear model for costs cannot quite accommodate
the piecewise linear matter of alternative subplans,
so it is based on ugly heuristics and still cannot be very precise,
but I think it's better than the current one.

Thoughts?

Best, Alex

[1] 
https://www.postgresql.org/message-id/flat/ff42b25b-ff03-27f8-ed11-b8255d658cd5%40imap.cc
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b976afb69d..5e2c5ec822 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -92,6 +92,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
 #include "parser/parsetree.h"
+#include "utils/float.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 #include "utils/spccache.h"
@@ -4283,15 +4284,57 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 	else if (IsA(node, AlternativeSubPlan))
 	{
 		/*
-		 * Arbitrarily use the first alternative plan for costing.  (We should
-		 * certainly only include one alternative, and we don't yet have
-		 * enough information to know which one the executor is most likely to
-		 * use.)
+		 * Estimate the cost as some mean of the alternatives.
+		 * It's not uncommon for the alternative costs to be of different
+		 * orders of magnitude, so arithmetic mean would be too biased towards
+		 * the slower one. That's why for per-tuple coefficient we use geometric
+		 * mean.
+		 *
+		 * It's tempting to use geometric mean for startup costs too, but
+		 * one of them can be zero, which will result in substantial
+		 * underestimation. So instead we estimate the cost of returning *first*
+		 * tuple as geometric mean of single-tuple costs for the alternatives.
 		 */
-		AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+		List   *subplans = ((AlternativeSubPlan *) node)->subplans;
+		Cost 	per_tuple_mean = 1;
+		Cost 	first_tuple_mean = 1;
+		Cost 	startup_min = get_float8_infinity();
+		Cost 	startup_max = 0;
+		Cost 	startup_mean;
+		ListCell   *lc;
+
+		foreach(lc, subplans)
+		{
+			cost_qual_eval_context subplanContext;
+
+			subplanContext.root = context->root;
+			subplanContext.total.startup = 0;
+			subplanContext.total.per_tuple = 0;
 
-		return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
-	 context);
+			cost_qual_eval_walker((Node *) lfirst(lc), &subplanContext);
+
+			Assert(subplanContext.total.startup >= 0);
+			Assert(subplanContext.total.per_tuple >= 0);
+
+			per_tuple_mean *= subplanContext.total.per_tuple;
+			first_tuple_mean *= subplanContext.total.startup + subplanContext.total.per_tuple;
+			startup_min = float8_min(startup_min, subplanContext.total.startup);
+			startup_max = float8_max(startup_min, subplanContext.total.startup);
+		}
+		per_tuple_mean =
+			pow(per_tuple_mean, 1.0 / list_length(subplans));
+		first_tuple_mean =
+			pow(first_tuple_mean, 1.0 / list_length(subplans));
+		startup_mean = first_tuple_mean - per_tuple_mean;
+		/* make sure this calculation makes a sane value */
+		startup_mean = float8_max(startup_mean, startup_min);
+		startup_mean = float8_min(startup_mean, startup_max);
+
+		context->total.startup += startup_mean;
+		context->total.per_tuple += per_tuple_mean;
+
+		/* We've already recursed into children, skip the generic recursion */
+		return false;
 	}
 	else if (IsA(node, PlaceHolderVar))
 	{
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 5de53f2782..98787e75e0 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -2301,17 +2301,16 @@ SELECT * FROM v1 WHERE a=8;
 
 EXPLAIN (VERBOSE, COSTS OFF)
 UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a < 7 AND a != 6;
-QUERY PLAN 

+ QUERY PLAN  
+-
  Update on public.t1
Update on public.t1
Update on public.t11 t1_1
Update on public.t12 t1_2
Update on public.t111 t1_3
-   ->  

Re: control max length of parameter values logged

2020-04-02 Thread Alexey Bashtanov




Pushed with a bit of editorialization.

Great, and thanks for the fixes!

Best, Alex




Re: control max length of parameter values logged

2020-04-02 Thread Alexey Bashtanov

Hi,

Please see the new version attached.

+If greater than zero, bind parameter values reported in non-error
+statement-logging messages are trimmed to no more than this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero disables logging parameters with statements.
+-1 (the default) makes parameters logged in full.

say: "..causes parameters to be logged in full".

Also..I just realized that this truncates *each* parameter, rather than
truncating the parameter list.

So say: "
|If greater than zero, each bind parameter value reported in a non-error
|statement-logging messages is trimmed to no more than this number of bytes.

okay

I also changed "trimmed to no more than this many bytes" to "trimmed to 
this many bytes".

It's not that pedantic any more but hopefully less awkward.


+Non-zero values add some overhead, as
+PostgreSQL will compute and store textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged.

say: "even if they are ultimately not logged"

okay


+++ b/src/backend/nodes/params.c
@@ -280,6 +280,7 @@ BuildParamLogString(ParamListInfo params, char 
**knownTextValues, int maxlen)
oldCxt;
StringInfoData buf;
  
+	Assert(maxlen == -1 || maxlen > 0);

You can write: >= -1

no, I'm specifically checking they don't pass 0



-   if (log_parameters_on_error)
+   if (log_parameter_max_length_on_error 
!= 0)

I would write this as >= 0

no, I'm specifically checking for both positives and -1


+   if 
(log_parameter_max_length_on_error > 0)
+   {
+   /*
+* We can trim the 
saved string, knowing that we
+* won't print all of 
it.  But we must copy at
+* least two more full 
characters than
+* BuildParamLogString 
wants to use; otherwise it
+* might fail to 
include the trailing ellipsis.
+*/
+   
knownTextValues[paramno] =
+   
pnstrdup(pstring,
+   
 log_parameter_max_length_on_error
+   
 + 2 * MAX_MULTIBYTE_CHAR_LEN);

The comment says we need at least 2 chars, but
log_parameter_max_length_on_error might be 1, so I think you can either add 64
byte fudge factor, like before, or do Max(log_parameter_max_length_on_error, 2).

That's the code I reused without deep analysis TBH.
I think it's mostly for to allocate the space for the ellipsis in case 
it needs to be added,

not to copy any actual characters, that's why we add.


+   }
+   else
+   
knownTextValues[paramno] = pstrdup(pstring);

I suggest to handle the "== -1" case first and the > 0 case as "else".

Good, as long as I thought of this too, I'm changing that.

Best, Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2de21903a1..21b65f2791 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6690,29 +6690,47 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
- 
-  log_parameters_on_error (boolean)
+ 
+  log_parameter_max_length (integer)
   
-   log_parameters_on_error configuration parameter
+   log_parameter_max_length configuration parameter
   
   
   

-Controls whether bind parameters are logged when a statement is logged
-as a result of .
-It adds some overhead, as PostgreSQL will
-compute and store textual representations of parameter values in
-memory for all statements, even if they eventually do not get logged.
-This setting has no effect on statements logged due to
- or
- settings, as they are always logged
-with parameters.
-The default is off.
+If greater than zero, each bind parameter value reported in non-error
+statement-logging messages is trimmed to this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero disables logging parameters with statements.
+-1 (the default) makes parameters logged in full.
 Only superusers can change this

Re: control max length of parameter values logged

2020-04-01 Thread Alexey Bashtanov

Hi,

The privilege argument seems irrelevant to me.  We already decided
that the plan is (a) SUSET for non-error statement logging purposes and
(b) USERSET for logging caused by errors, and that would have to apply
to length limits as well as enable/disable ability.  Otherwise a user
could pretty effectively disable logging by setting the length to 1.
The only privilege that user can gain if we drop the boolean is to 
*enable* logging parameters on error.
That gives user a little bit easier way to fill up the disk with logs, 
but they anyway can do that if they want to.

If that's okay with everyone, please see the new version attached.

Best, Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2de21903a1..3d7db57d74 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6690,29 +6690,48 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
- 
-  log_parameters_on_error (boolean)
+ 
+  log_parameter_max_length (integer)
   
-   log_parameters_on_error configuration parameter
+   log_parameter_max_length configuration parameter
   
   
   

-Controls whether bind parameters are logged when a statement is logged
-as a result of .
-It adds some overhead, as PostgreSQL will
-compute and store textual representations of parameter values in
-memory for all statements, even if they eventually do not get logged.
-This setting has no effect on statements logged due to
- or
- settings, as they are always logged
-with parameters.
-The default is off.
+If greater than zero, bind parameter values reported in non-error
+statement-logging messages are trimmed to no more than this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero disables logging parameters with statements.
+-1 (the default) makes parameters logged in full.
 Only superusers can change this setting.

   
  
 
+ 
+  log_parameter_max_length_on_error (integer)
+  
+   log_parameter_max_length_on_error configuration parameter
+  
+  
+  
+   
+If greater than zero, bind parameter values reported in error messages
+are trimmed to no more than this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero (the default) disables logging parameters on error.
+-1 makes parameters logged in full.
+
+This setting only affects statements logged as a result of
+.
+Non-zero values add some overhead, as
+PostgreSQL will compute and store textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index a57f9eea76..77328e17ff 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -269,7 +269,7 @@ RestoreParamList(char **start_address)
  * can contain NULLs for any unknown individual values.  NULL can be given if
  * no parameters are known.
  *
- * If maxlen is not zero, that's the maximum number of bytes of any one
+ * If maxlen is not -1, that's the maximum number of bytes of any one
  * parameter value to be printed; an ellipsis is added if the string is
  * longer.  (Added quotes are not considered in this calculation.)
  */
@@ -280,6 +280,7 @@ BuildParamLogString(ParamListInfo params, char **knownTextValues, int maxlen)
 oldCxt;
 	StringInfoData buf;
 
+	Assert(maxlen == -1 || maxlen > 0);
 	/*
 	 * NB: think not of returning params->paramValuesStr!  It may have been
 	 * generated with a different maxlen, and so be unsuitable.  Besides that,
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5b677863b9..cbe4907e61 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1838,7 +1838,7 @@ exec_bind_message(StringInfo input_message)
  */
 if (pstring)
 {
-	if (log_parameters_on_error)
+	if (log_parameter_max_length_on_error != 0)
 	{
 		MemoryContext	oldcxt;
 
@@ -1846,13 +1846,24 @@ exec_bind_message(StringInfo input_message)
 		if (knownTextValues == NULL)
 			knownTextValues =
 palloc0(numParams * sizeof(char *));
-		/*
-		 * Note: must copy at least two more full characters
-		 * than BuildParamLogString wants to see; otherwise it
-		 * might fail to include the ellipsis.
-		 */
-		knownTextValues[paramno] =
-			pnstrdup(pstring, 64 + 2 * MAX_MULTIBYTE_CHAR_LEN);
+
+		if (log_parameter_max_length_on_error > 0)
+		{
+			/*
+			 * We can trim the saved string, knowing that we
+			 * won't print all of it.  But we must copy at
+			 * least two more full characters than
+		

Re: control max length of parameter values logged

2020-04-01 Thread Alexey Bashtanov

Hi,

+If greater than zero, bind parameter values reported in non-error
+statement-logging messages are trimmed to no more than this many bytes.

Can I suggest to say:

"Limit bind parameter values reported by non-error statement-logging messages
to this many bytes". Or,

"The maximum length of bind parameter values to log with non-error
statement-logging messages".

Okay I'll rephrase.

Could you make zero a normal value and -1 the "special" value to disable
trimming ?

Setting to zero will avoid displaying parameters at all, setting to -1 wil
display values in full.
I can, but then for the sake of consistency I'll have to do the same for 
log_parameter_max_length.
Then we'll end up with two ways to enable/disable parameter logging on 
error:

using the primary boolean setting and setting length to 0.
One of them will require superuser privileges, the other one won't.
Do you think it's okay? I have no objections but I'm a bit worried 
someone may find it clumsy.


Best, Alex




Re: control max length of parameter values logged

2020-03-31 Thread Alexey Bashtanov



Also agreed.  It's been like it is for a long time with not that
many complaints, so the case for changing the default behavior
seems a bit weak.

Barring other opinions, I think we have consensus here on what
to do.  Alexey, will you update your patch?


Sorry for the delay, please could you have a look?

Best, Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2de21903a1..35d420e026 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6713,6 +6713,38 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameter_max_length (integer)
+  
+   log_parameter_max_length configuration parameter
+  
+  
+  
+   
+If greater than zero, bind parameter values reported in non-error
+statement-logging messages are trimmed to no more than this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero (the default) disables trimming. Only superusers can change this setting.
+   
+  
+ 
+
+ 
+  log_parameter_max_length_on_error (integer)
+  
+   log_parameter_max_length_on_error configuration parameter
+  
+  
+  
+   
+If greater than zero, bind parameter values reported in error messages
+are trimmed to no more than this many bytes.
+If this value is specified without units, it is taken as bytes.
+Zero (the default) disables trimming.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5b677863b9..ea2fe17e0b 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1846,13 +1846,24 @@ exec_bind_message(StringInfo input_message)
 		if (knownTextValues == NULL)
 			knownTextValues =
 palloc0(numParams * sizeof(char *));
-		/*
-		 * Note: must copy at least two more full characters
-		 * than BuildParamLogString wants to see; otherwise it
-		 * might fail to include the ellipsis.
-		 */
-		knownTextValues[paramno] =
-			pnstrdup(pstring, 64 + 2 * MAX_MULTIBYTE_CHAR_LEN);
+
+		if (log_parameter_max_length_on_error > 0)
+		{
+			/*
+			 * We can trim the saved string, knowing that we
+			 * won't print all of it.  But we must copy at
+			 * least two more full characters than
+			 * BuildParamLogString wants to use; otherwise it
+			 * might fail to include the trailing ellipsis.
+			 */
+			knownTextValues[paramno] =
+pnstrdup(pstring,
+		 log_parameter_max_length_on_error
+		 + 2 * MAX_MULTIBYTE_CHAR_LEN);
+		}
+		else
+			knownTextValues[paramno] = pstrdup(pstring);
+
 		MemoryContextSwitchTo(oldcxt);
 	}
 	if (pstring != pbuf.data)
@@ -1915,7 +1926,9 @@ exec_bind_message(StringInfo input_message)
 		 */
 		if (log_parameters_on_error)
 			params->paramValuesStr =
-BuildParamLogString(params, knownTextValues, 64);
+BuildParamLogString(params,
+	knownTextValues,
+	log_parameter_max_length_on_error);
 	}
 	else
 		params = NULL;
@@ -2404,7 +2417,7 @@ errdetail_params(ParamListInfo params)
 	{
 		char   *str;
 
-		str = BuildParamLogString(params, NULL, 0);
+		str = BuildParamLogString(params, NULL, log_parameter_max_length);
 		if (str && str[0] != '\0')
 			errdetail("parameters: %s", str);
 	}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 79bc7ac8ca..a3903d1b16 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -544,6 +544,8 @@ int			log_min_messages = WARNING;
 int			client_min_messages = NOTICE;
 int			log_min_duration_sample = -1;
 int			log_min_duration_statement = -1;
+int			log_parameter_max_length = 0;
+int			log_parameter_max_length_on_error = 0;
 int			log_temp_files = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
@@ -2855,6 +2857,28 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"log_parameter_max_length", PGC_SUSET, LOGGING_WHAT,
+			gettext_noop("When logging statements, limit logged parameter values to first N bytes."),
+			gettext_noop("Zero to print values in full."),
+			GUC_UNIT_BYTE
+		},
+		&log_parameter_max_length,
+		0, 0, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"log_parameter_max_length_on_error", PGC_USERSET, LOGGING_WHAT,
+			gettext_noop("When reporting an error, limit logged parameter values to first N bytes."),
+			gettext_noop("Zero to print values in full."),
+			GUC_UNIT_BYTE
+		},
+		&log_parameter_max_length_on_error,
+		0, 0, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"bgwriter_delay", PGC_SIGHUP, RESOURCES_BGWRITER,
 			gettext_noop("Background writer sleep time between rounds."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e9f8ca775d..8899f174c7 100644
--- a/src/backend/utils/mis

Re: Less-silly selectivity for JSONB matching operators

2020-03-31 Thread Alexey Bashtanov

On 31/03/2020 18:53, Tom Lane wrote:

Renamed "matchsel" to "matchingsel" etc, added DEFAULT_MATCHING_SEL,
rebased over commit 911e70207.  Since that commit already created
new versions of the relevant contrib modules, I think we can just
redefine what those versions contain, rather than making yet-newer
versions.  (Of course, that assumes we're going to include this in
v13.)


Looks good to me.

Best, Alex




Re: Less-silly selectivity for JSONB matching operators

2020-03-31 Thread Alexey Bashtanov




I was wondering about DEFAULT_MATCHING_SEL.  The difference in purpose
from DEFAULT_MATCH_SEL wouldn't be too obvious, but then it probably
wouldn't be anyway.

Fine with me, especially if both new functions are renamed accordingly.

Best, Alex




Re: Less-silly selectivity for JSONB matching operators

2020-03-31 Thread Alexey Bashtanov




So that leaves us with needing
to find a better name for this new one.  Any ideas?

I'm thinking of something wide like
opersel, operjoinsel, DEFAULT_OPER_SEL
or maybe even
genericsel, genericjoinsel, DEFAULT_GENERIC_SEL

Best, Alex




Re: Less-silly selectivity for JSONB matching operators

2020-03-31 Thread Alexey Bashtanov

Quickly tested like this:

create table t(a jsonb);
insert into t select jsonb_object( array[(random() * 10)::int::text], 
'{" "}') from generate_series(1, 10);
insert into t select jsonb_object( array[(random() * 10)::int::text], 
array[(random() * 1000)::int::text]) from generate_series(1, 10);

explain analyze select * from t where a ? '1';
analyze t;
explain analyze select * from t where a ? '1';

Best, Alex




Re: Less-silly selectivity for JSONB matching operators

2020-03-31 Thread Alexey Bashtanov

Hi Tom,

The patches look entirely reasonable to me.
The second one needs to be rebased.

I like the idea of stubbing matchjoinsel for now,
as well as being careful with operators that may correlate with sort 
orderings.


The only little thing I can think of is hardcoding it as 2 * DEFAULT_EQ_SEL.
While I don't have any arguments against the value itself I think it 
should be configurable independently.

Sadly DEFAULT_MATCH_SEL name is already taken for text patterns.
Not sure if it's a reason to rename all the stuff.

Best, Alex




Small docs bugfix: make it clear what can be used in UPDATE FROM and DELETE USING

2020-02-13 Thread Alexey Bashtanov

Hello,

Currently the documentation says that one can put "a list of table 
expressions"

after FROM in UPDATE or after USING in DELETE.
However, "table expression" is defined as a complex of
FROM, WHERE, GROUP BY and HAVING clauses [1].
The thing one can list in the FROM clause in a comma-separated manner
is called a table reference [2].
SELECT reference does not use this term but explains what they could be [3].

Please could someone have a look at the patch attached?
It's not just pedantry but rather based on a real-life example of 
someone reading and being not sure

whether e.g. joins can be used in there.

Best, Alex

[1] https://www.postgresql.org/docs/devel/queries-table-expressions.html
[2] 
https://www.postgresql.org/docs/devel/queries-table-expressions.html#QUERIES-FROM

[3] https://www.postgresql.org/docs/devel/sql-select.html#SQL-FROM
diff --git a/doc/src/sgml/ref/delete.sgml b/doc/src/sgml/ref/delete.sgml
index df8cea48cf..3f9e067b47 100644
--- a/doc/src/sgml/ref/delete.sgml
+++ b/doc/src/sgml/ref/delete.sgml
@@ -120,9 +120,9 @@ DELETE FROM [ ONLY ] table_name [ *
 using_list
 
  
-  A list of table expressions, allowing columns from other tables
+  A list of table references, allowing columns from other tables
   to appear in the WHERE condition.  This is similar
-  to the list of tables that can be specified in the  of a
   SELECT statement; for example, an alias for
   the table name can be specified.  Do not repeat the target table
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index f58dcd8877..44815cc654 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -167,10 +167,10 @@ UPDATE [ ONLY ] table_name [ * ] [
 from_list
 
  
-  A list of table expressions, allowing columns from other tables
+  A list of table references, allowing columns from other tables
   to appear in the WHERE condition and the update
-  expressions. This is similar to the list of tables that can be
-  specified in the  of a SELECT
   statement.  Note that the target table must not appear in the
   from_list, unless you intend a self-join (in which


Re: improve transparency of bitmap-only heap scans

2020-02-09 Thread Alexey Bashtanov




I kinda suspect one of the ressons why this got so little attention is
that it was never added to any CF.


Thanks Tomas, I've created a CF entry 
https://commitfest.postgresql.org/27/2443/


Best, Alex





Re: improve transparency of bitmap-only heap scans

2020-02-07 Thread Alexey Bashtanov

Hello,


It took me a while to figure out what those names mean.  "unfetched",
as you call it on the code, may be more descriptive than "avoided" for
the new label.  However I think the other two are more confusing.  It
may be a good idea to change them together with this.

It'll be sad if this patch is forgotten only because of the words choice.
I've changed it all to "unfetched" for at least not to call the same 
thing differently
in the code and in the output, and also rebased it and fit in 80 lines 
width limit.


Best, Alex
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d901dc4a50..c7200d2a21 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2777,6 +2777,8 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 {
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
+		ExplainPropertyInteger("Unfetched Heap Blocks", NULL,
+			   planstate->unfetched_pages, es);
 		ExplainPropertyInteger("Exact Heap Blocks", NULL,
 			   planstate->exact_pages, es);
 		ExplainPropertyInteger("Lossy Heap Blocks", NULL,
@@ -2784,10 +2786,14 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 	}
 	else
 	{
-		if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+		if (planstate->unfetched_pages > 0 || planstate->exact_pages > 0
+		   || planstate->lossy_pages > 0)
 		{
 			ExplainIndentText(es);
 			appendStringInfoString(es->str, "Heap Blocks:");
+			if (planstate->unfetched_pages > 0)
+appendStringInfo(es->str, " unfetched=%ld",
+	planstate->unfetched_pages);
 			if (planstate->exact_pages > 0)
 appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
 			if (planstate->lossy_pages > 0)
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index ae8a11da30..0456f8592b 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -231,17 +231,20 @@ BitmapHeapNext(BitmapHeapScanState *node)
  * node->return_empty_tuples.
  */
 node->return_empty_tuples = tbmres->ntuples;
+node->unfetched_pages++;
 			}
 			else if (!table_scan_bitmap_next_block(scan, tbmres))
 			{
 /* AM doesn't think this block is valid, skip */
 continue;
 			}
-
-			if (tbmres->ntuples >= 0)
-node->exact_pages++;
 			else
-node->lossy_pages++;
+			{
+if (tbmres->ntuples >= 0)
+	node->exact_pages++;
+else
+	node->lossy_pages++;
+			}
 
 			/* Adjust the prefetch target */
 			BitmapAdjustPrefetchTarget(node);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 5d5b38b879..0bd75c329c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1564,6 +1564,7 @@ typedef struct ParallelBitmapHeapState
  *		pvmbuffer		   ditto, for prefetched pages
  *		exact_pages		   total number of exact pages retrieved
  *		lossy_pages		   total number of lossy pages retrieved
+ *		unfetched_pages	   total number of pages not retrieved due to vm
  *		prefetch_iterator  iterator for prefetching ahead of current page
  *		prefetch_pages	   # pages prefetch iterator is ahead of current
  *		prefetch_targetcurrent target prefetch distance
@@ -1588,6 +1589,7 @@ typedef struct BitmapHeapScanState
 	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
+	long		unfetched_pages;
 	TBMIterator *prefetch_iterator;
 	int			prefetch_pages;
 	int			prefetch_target;


control max length of parameter values logged

2020-02-07 Thread Alexey Bashtanov

Hello

Patch ba79cb5 (for full discussion see [1]) introduces a feature to log 
bind parameter values on error,
which greatly helps to reproduce errors artificially having only server 
log -- thanks everyone who

reviewed and improved it!

However, it cuts the values, as some of the reviewers were worried log 
could fill up too quickly.
This applies both to the new case of logging parameter values and to the 
existing ones due to

log_min_duration_statement or log_statement.

This is a backwards-incompatible change, and also ruins the idea of 
reproducible errors -- sorry Tom

I failed to second this idea [2] in time, before the change was pushed.

I personally don't think that we necessarily need to cut the values, we 
can rely on the users
being careful when using this feature -- in the same way we trusted them 
use similarly dangerous
log_min_duration_statement and especially log_statement for ages. At 
least it's better than having
no option to disable it. Alvaro's opinion was different [3]. What do you 
think
of adding a parameter to limit max logged parameter length? See patch 
attached.


Best, Alex

[1] https://postgr.es/m/0146a67b-a22a-0519-9082-bc29756b9...@imap.cc
[2] https://postgr.es/m/11425.1575927321%40sss.pgh.pa.us
[3] https://postgr.es/m/20191209200531.GA19848@alvherre.pgsql
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c1128f89ec..0f40246c2d 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6645,6 +6645,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameter_max_length (boolean)
+  
+   log_parameter_max_length configuration parameter
+  
+  
+  
+   
+Controls whether to log parameters in full or cut them to a certain length.
+Bind parameters can appear in the log as a result of 
+,
+ or
+
+settings.
+Zero (the default) disables cutting.
+If this value is specified without units, it is taken as bytes.
+Due to multibyte character and ellipsis, truncated values may be slightly shorter.
+Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 0a6f80963b..acc31899d6 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1839,13 +1839,22 @@ exec_bind_message(StringInfo input_message)
 		if (knownTextValues == NULL)
 			knownTextValues =
 palloc0(numParams * sizeof(char *));
-		/*
-		 * Note: must copy at least two more full characters
-		 * than BuildParamLogString wants to see; otherwise it
-		 * might fail to include the ellipsis.
-		 */
-		knownTextValues[paramno] =
-			pnstrdup(pstring, 64 + 2 * MAX_MULTIBYTE_CHAR_LEN);
+
+		if (log_parameter_max_length != 0)
+		{
+			/*
+			* Note: must copy at least two more full characters
+			* than BuildParamLogString wants to see;
+			* otherwise it might fail to include the ellipsis.
+			*/
+			knownTextValues[paramno] =
+pnstrdup(pstring,
+		 log_parameter_max_length
+			+ 2 * MAX_MULTIBYTE_CHAR_LEN);
+		}
+		else
+			knownTextValues[paramno] = pstrdup(pstring);
+
 		MemoryContextSwitchTo(oldcxt);
 	}
 	if (pstring != pbuf.data)
@@ -1908,7 +1917,9 @@ exec_bind_message(StringInfo input_message)
 		 */
 		if (log_parameters_on_error)
 			params->paramValuesStr =
-BuildParamLogString(params, knownTextValues, 64);
+BuildParamLogString(params,
+	knownTextValues,
+	log_parameter_max_length);
 	}
 	else
 		params = NULL;
@@ -2397,7 +2408,7 @@ errdetail_params(ParamListInfo params)
 	{
 		char   *str;
 
-		str = BuildParamLogString(params, NULL, 0);
+		str = BuildParamLogString(params, NULL, log_parameter_max_length);
 		if (str && str[0] != '\0')
 			errdetail("parameters: %s", str);
 	}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8228e1f390..f27efc6c24 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -543,6 +543,7 @@ int			log_min_messages = WARNING;
 int			client_min_messages = NOTICE;
 int			log_min_duration_sample = -1;
 int			log_min_duration_statement = -1;
+int			log_parameter_max_length = 0;
 int			log_temp_files = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
@@ -2834,6 +2835,17 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"log_parameter_max_length", PGC_SUSET, LOGGING_WHAT,
+			gettext_noop("When logging a parameter value, only log first N bytes."),
+			gettext_noop("Zero to print values in full."),
+			GUC_UNIT_BYTE
+		},
+		&log_parameter_max_length,
+		0, 0, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"bgwriter_delay", PGC_SIGHUP, RESOURCES_BGWRITER,
 			gettext_noop("Background writer sleep time between rounds."),
diff -

Re: log bind parameter values on error

2019-12-02 Thread Alexey Bashtanov

Patch file name is not great, changing it.

On 30/11/2019 12:35, Alexey Bashtanov wrote:
I've implemented it using error contexts, please could you have a look 
at the patch attached?


diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4ec13f3311..b3a0d27861 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6597,6 +6597,29 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether bind parameters are logged when a statement is logged
+as a result of .
+It adds some overhead, as postgres will compute and store textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged.
+This setting has no effect on statements logged due to
+ or
+ settings, as they are always logged
+with parameters.
+The default is off.
+Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index cf4387e40f..7c1f86f5c5 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -15,12 +15,16 @@
 
 #include "postgres.h"
 
+#include "lib/stringinfo.h"
 #include "nodes/bitmapset.h"
 #include "nodes/params.h"
 #include "storage/shmem.h"
 #include "utils/datum.h"
+#include "utils/guc.h"
 #include "utils/lsyscache.h"
+#include "utils/memutils.h"
 
+static void LogParams(void *arg);
 
 /*
  * Allocate and initialize a new ParamListInfo structure.
@@ -44,6 +48,7 @@ makeParamList(int numParams)
 	retval->paramCompileArg = NULL;
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
+	retval->logString = NULL;
 	retval->numParams = numParams;
 
 	return retval;
@@ -58,6 +63,8 @@ makeParamList(int numParams)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We also don't copy logString.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
@@ -221,6 +228,8 @@ SerializeParamList(ParamListInfo paramLI, char **start_address)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We also don't copy logString.
  */
 ParamListInfo
 RestoreParamList(char **start_address)
@@ -251,3 +260,152 @@ RestoreParamList(char **start_address)
 
 	return paramLI;
 }
+
+/*
+ * BuildParamLogString - create a string to represent parameters list for
+ * logging and save it to params. If caller already knows textual
+ * representations for some or all parameters, it can pass an array of exactly
+ * params->numParams values as knownTextValues. It can contain NULLs for the
+ * individual values not known, or, if no text text values known at all the
+ * caller may pass NULL pointer.
+ */
+void
+BuildParamLogString(ParamListInfo params, char **knownTextValues)
+{
+	MemoryContext tmpCxt,
+  oldCxt;
+	int logStringLen = 0;
+	StringInfoData logString;
+	char **textValues;
+
+	if (params->logString != NULL)
+		return;
+
+	Assert(params->paramFetch == NULL);
+
+	initStringInfo(&logString);
+
+	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+   "BuildParamLogString",
+   ALLOCSET_DEFAULT_SIZES);
+	oldCxt = MemoryContextSwitchTo(tmpCxt);
+
+	textValues = (char **) palloc0(params->numParams * sizeof(char *));
+
+	/* calculate unknown text representations and pre-calculate byte lengths */
+	for (int paramno = 0; paramno < params->numParams; paramno++)
+	{
+		ParamExternData param = params->params[paramno];
+
+		/* reserve the space for the number of digits in paramno + 1 */
+		for (int d = paramno + 1; d > 0; d /= 10)
+			logStringLen++;
+
+		if (param.isnull)
+		{
+			logStringLen += 4; /* for "NULL" */
+		}
+		else
+		{
+			char *textValue,
+ *s;
+
+			/* find out textValues[paramno] */
+			if (knownTextValues != NULL && knownTextValues[paramno] != NULL)
+			{
+textValues[paramno] = knownTextValues[paramno];
+			}
+			else
+			{
+Oid		typoutput;
+bool	typisvarlena;
+getTypeOutputInfo(param.ptype, &typoutput, &typisvarlena);
+textValues[paramno] =
+OidOutputFunctionCall(typoutput, param.value);
+			}
+
+			/* caluclate the space needed its printed length */
+			textValue = textValues[paramno];
+			for (s = textValue; *s != '\0'; s++)
+if (*s == '\'

Re: log bind parameter values on error

2019-11-30 Thread Alexey Bashtanov


Hi,

I'm sorry for replying so late.


I don't think those really are contradictions. You can continue to have
an errdetail_params(), and but call it from the error context callback
set up in the portal code

...

Even leaving that aside, I'm *STRONGLY* against entangling elog.c with
query execution details like ParamListInfo. We should work to make
elog.c know less about different parts of the system, not more.
I've implemented it using error contexts, please could you have a look 
at the patch attached?


I did not use the PG_TRY blocks in portal code because they are not wide 
enough.

Otherwise, we wouldn't catch errors that can happen in GetCachedPlan.
Instead I added an item to error context chain in postgres.c 
exec_*_message routines.
I can't find why it could be unsafe, but I'm still a bit worried about 
it as there's no

other error context changes that early in the call stack.

I've also made string formatting more efficient, and pre-calculate
the whole string instead of individual values as suggested. Unfortunately
it still copies the error text when reporting -- I'm not sure how I 
change it

without substantial change of error logging infrastructure.

One more concern I'd like to ask for opinions. I'm using errdetail_log
for parameters logging, as it's the only way to make it appear in the 
log only

but not get sent to the client. It looks a bit awkward, as it appears like
ERROR
DETAIL
CONTEXT
STATEMENT,
where CONTEXT may in fact be something inner nested than the parameters
that appear in DETAILS.
With this regards, I'm thinking of adding some special arrangements into 
elog.c.
One idea is a econtext_log to add a log-only data into CONTEXT, however 
it'll still log

params above the statement, although directly above.
Another option is a special estatementparams (or perhaps 
efinalcontext_log?) to log

below the statement.
What do you think?

2 Alvaro:

So, if some parameters are large (they can be up to 1 GB-1, remember)
then we can bloat the log file severely.  I think we need to place an
upper limit on the strings that we're going to log -- as inspiration,
callers of ExecBuildValueDescription uses 64 chars per value maximum.
Something like that seems reasonable.  So I think you need to add some
pg_mbcliplen() calls in a couple of places in exec_bind_message.


I like the idea, but I don't think it's directly related to the change
proposed. We are already logging parameters in certain situations
with no limits on their lenghts. Given the time it takes to get the change
through (not least because of me addressing reviewers' comments
really slowly) I'd not include it in the patch. Do you find it acceptable?

Best, Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4ec13f3311..b3a0d27861 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6597,6 +6597,29 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether bind parameters are logged when a statement is logged
+as a result of .
+It adds some overhead, as postgres will compute and store textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged.
+This setting has no effect on statements logged due to
+ or
+ settings, as they are always logged
+with parameters.
+The default is off.
+Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index cf4387e40f..7c1f86f5c5 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -15,12 +15,16 @@
 
 #include "postgres.h"
 
+#include "lib/stringinfo.h"
 #include "nodes/bitmapset.h"
 #include "nodes/params.h"
 #include "storage/shmem.h"
 #include "utils/datum.h"
+#include "utils/guc.h"
 #include "utils/lsyscache.h"
+#include "utils/memutils.h"
 
+static void LogParams(void *arg);
 
 /*
  * Allocate and initialize a new ParamListInfo structure.
@@ -44,6 +48,7 @@ makeParamList(int numParams)
 	retval->paramCompileArg = NULL;
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
+	retval->logString = NULL;
 	retval->numParams = numParams;
 
 	return retval;
@@ -58,6 +63,8 @@ makeParamList(int numParams)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We also don't copy logString.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
@@ -221,6 +228,8 @@ SerializeParamList(ParamListInfo paramLI, char **start_address)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them in

Re: log bind parameter values on error

2019-11-05 Thread Alexey Bashtanov

Hi Anders and Alvaro,

I must have missed this conversation branch when sending in v011.
Sorry about that.

> I think it might be worthwhile to cross-reference
> log_min_error_statement, as log_parameters_on_error will only take 
effect when the

> statement is logged due to log_min_error_statement.

Agreed, added some clarification.

> I don't think "cache" is the right descriptor. Usually caching implies
> trying to make repeated tasks faster, which isn't the case here.

Well, potentially it can, if we set log_min_error_statement to something
lower than error and emit tons of warnings. But it's not the primary use
case, so I just replaced it by the word "save".



What I'm suggesting is that PortalStart() would build a string
representation out of the parameter list (using
ParamExternData.textValue if set, calling the output function
otherwise), and stash that in the portal.

At the start of all the already existing PG_TRY blocks in pquery.c we
push an element onto the error_context_stack that adds the errdetail
with the parameters to the error.  Alternatively we could also add them
in in the PG_CATCH blocks, but that'd only work for elevel == ERROR
(i.e. neither FATAL nor non throwing log levels would be able to enrich
the error).


I'm a bit worried about going this way, as it makes us log
the query and its parameters too far apart in the code,
and it's trickier to make sure we never log parameters without the query.

I think logging the parameters should not be part of error_context_stack,
but rather a primary part of logging facility itself, like statement.
That's because whether we want to log parameters depends
on print_stmt in elog.c. With print_stmt being computed upon edata,
I'm not sure how to work it out nicely.


Thinking about this: I think the current approach doesn't actually
handle recursive errors correctly. Even if we fail to emit the error
message due to the parameter details requiring a lot of memory, we'd
again do so (and again fail) when handling that OOM error, because
current_top_portal afaict would still be set.  At the very least this'd
need to integrate with the recursion_depth logic in elog.c.

We handle it the same way as we do it for debug_query_string itself:

        if (in_error_recursion_trouble())
        {
            error_context_stack = NULL;
            debug_query_string = NULL;
            current_top_portal = NULL;
        }


I'm attaching the patch with docs fixes.

Best regards,
  Alexey

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0191ec84b1..832ebe57a3 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6492,6 +6492,29 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether bind parameters are logged when a statement is logged
+as a result of .
+It adds some overhead, as postgres will compute and store textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged.
+This setting has no effect on statements logged due to
+ or
+ settings, as they are always logged
+with parameters.
+The default is off.
+Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 7e0a041fab..7692ea551b 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -406,7 +406,7 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
 		prm->value = ExecEvalExprSwitchContext(n,
 			   GetPerTupleExprContext(estate),
 			   &prm->isnull);
-
+		prm->textValue = NULL;
 		i++;
 	}
 
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 83337c2eed..92fcee8b19 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -925,6 +925,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
 			prm->isnull = fcinfo->args[i].isnull;
 			prm->pflags = 0;
 			prm->ptype = fcache->pinfo->argtypes[i];
+			prm->textValue = NULL;
 		}
 	}
 	else
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 2c0ae395ba..f6b4246b47 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -2458,6 +2458,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 			prm->isnull = (Nulls && Nulls[i] == 'n');
 			prm->pflags = PARAM_FLAG_CONST;
 			prm->ptype = argtypes[i];
+			prm->textValue = NULL;
 		}
 	}
 	else
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index cf4387e40f..bf8e3705af 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -45,6 +45,7 @@ makeParamList(int numParams)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->

Re: log bind parameter values on error

2019-09-27 Thread Alexey Bashtanov

Hello Alvaro,


I didn't like abusing testlibpq3.c for your new stuff, so I moved it off
to a new file testlibpq5.c.  I cleaned up a few other cosmetics things
about this -- v10 attached.

Thanks for doing this.


I eventually noticed that this patch fails
to initialize each param's textValue to NULL,
I've added the changes to set textValue to NULL for each parameter after 
calling makeParamList.
I think it's the best option, as it's natural to populate text 
representations (albeit with NULLs)

in the same loop as we write the other parameters attributes.

It still requires us to set hasTextValues afterwards, as it doesn't seem 
practical to me to make
two loops, first null them all and next populate them: if there's a 
parsing or converting problem

midway it's out of the feature scope and already being logged elsewhere.

Attaching v11, marking as waiting for review.

Best, Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6612f95f9f..81dfee5fe7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6414,6 +6414,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 7e0a041fab..7692ea551b 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -406,7 +406,7 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
 		prm->value = ExecEvalExprSwitchContext(n,
 			   GetPerTupleExprContext(estate),
 			   &prm->isnull);
-
+		prm->textValue = NULL;
 		i++;
 	}
 
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 83337c2eed..92fcee8b19 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -925,6 +925,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
 			prm->isnull = fcinfo->args[i].isnull;
 			prm->pflags = 0;
 			prm->ptype = fcache->pinfo->argtypes[i];
+			prm->textValue = NULL;
 		}
 	}
 	else
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 2c0ae395ba..f6b4246b47 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -2458,6 +2458,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 			prm->isnull = (Nulls && Nulls[i] == 'n');
 			prm->pflags = PARAM_FLAG_CONST;
 			prm->ptype = argtypes[i];
+			prm->textValue = NULL;
 		}
 	}
 	else
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index cf4387e40f..bf8e3705af 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -45,6 +45,7 @@ makeParamList(int numParams)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = numParams;
+	retval->hasTextValues = false;
 
 	return retval;
 }
@@ -91,6 +92,9 @@ copyParamList(ParamListInfo from)
 			continue;
 		get_typlenbyval(nprm->ptype, &typLen, &typByVal);
 		nprm->value = datumCopy(nprm->value, typByVal, typLen);
+
+		/* We don't copy text values, as no caller needs that at present. */
+		nprm->textValue = NULL;
 	}
 
 	return retval;
@@ -247,6 +251,9 @@ RestoreParamList(char **start_address)
 
 		/* Read datum/isnull. */
 		prm->value = datumRestore(start_address, &prm->isnull);
+
+		/* We don't copy text values, as no caller needs that at present. */
+		prm->textValue = NULL;
 	}
 
 	return paramLI;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index e8d8e6f828..ec1e4092f5 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -86,6 +86,12 @@
  */
 const char *debug_query_string; /* client-supplied query string */
 
+/*
+ * The top-level portal that the client is immediately working with:
+ * creating, binding, executing, or all at one using simple protocol
+ */
+Portal current_top_portal = NULL;
+
 /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
 CommandDest whereToSendOutput = DestDebug;
 
@@ -1714,6 +1720,9 @@ exec_bind_message(StringInfo input_message)
 	else
 		portal = CreatePortal(portal_name, false, false);
 
+	Assert(current_top_portal == NULL);
+	current_top_portal = portal;
+
 	/*
 	 * Prepare to copy stuff into the portal's memory context.  We do all this
 	 * copying first, because it could possibly fail (out-of-memory) and we
@@ -1751,6 +1760,9 @@ exec_bind_message(StringInfo input_message)
 	 */
 	if (numParams > 0)
 	{
+		/* GUC value can change, so we remember its state to be consistent */
+		bool need_text_values 

Re: log bind parameter values on error

2019-07-05 Thread Alexey Bashtanov

Please find the rebased patch attached.

Tested like the following.
Provided you're in the postgres checkout and you've run make in 
src/test/examples/ and connected to db=postgres:


CREATE SCHEMA testlibpq3;
SET search_path = testlibpq3;
CREATE TABLE test1_(i int4, t text, b bytea);
INSERT INTO test1_ VALUES(0, '', '');
CREATE VIEW test1 AS SELECT 1/i i, t, b FROM test1_;

-- will log only statement
\! ./src/test/examples/testlibpq3

ALTER SYSTEM SET log_parameters_on_error TO on;
SELECT pg_reload_conf();

-- will log statement with parameters
\! ./src/test/examples/testlibpq3

Best regards,
  Alexey
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c91e3e1550..9f2e9bae33 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6423,6 +6423,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index f5d56138ee..5e5ccf1a4f 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -45,6 +45,7 @@ makeParamList(int numParams)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = numParams;
+	retval->hasTextValues = false;
 
 	return retval;
 }
@@ -58,6 +59,10 @@ makeParamList(int numParams)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * Since this function is never used for copying parameters into a top portal,
+ * we don't copy textual representations. Neither we set the hasTextValues
+ * flag, so noone will access them.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 44a59e1d4f..3559163ac3 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -86,6 +86,12 @@
  */
 const char *debug_query_string; /* client-supplied query string */
 
+/*
+ * The top-level portal that the client is immediately working with:
+ * creating, binding, executing, or all at one using simple protocol
+ */
+Portal current_top_portal = NULL;
+
 /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
 CommandDest whereToSendOutput = DestDebug;
 
@@ -1694,6 +1700,9 @@ exec_bind_message(StringInfo input_message)
 	else
 		portal = CreatePortal(portal_name, false, false);
 
+	Assert(current_top_portal == NULL);
+	current_top_portal = portal;
+
 	/*
 	 * Prepare to copy stuff into the portal's memory context.  We do all this
 	 * copying first, because it could possibly fail (out-of-memory) and we
@@ -1731,6 +1740,9 @@ exec_bind_message(StringInfo input_message)
 	 */
 	if (numParams > 0)
 	{
+		/* GUC value can change, so we remember its state to be consistent */
+		bool need_text_values = log_parameters_on_error;
+
 		params = makeParamList(numParams);
 
 		for (int paramno = 0; paramno < numParams; paramno++)
@@ -1798,9 +1810,31 @@ exec_bind_message(StringInfo input_message)
 
 pval = OidInputFunctionCall(typinput, pstring, typioparam, -1);
 
-/* Free result of encoding conversion, if any */
-if (pstring && pstring != pbuf.data)
-	pfree(pstring);
+if (pstring)
+{
+	if (need_text_values)
+	{
+		if (pstring == pbuf.data)
+		{
+			/*
+			 * Copy textual representation to portal context.
+			 */
+			params->params[paramno].textValue =
+			pstrdup(pstring);
+		}
+		else
+		{
+			/* Reuse the result of encoding conversion for it */
+			params->params[paramno].textValue = pstring;
+		}
+	}
+	else
+	{
+		/* Free result of encoding conversion */
+		if (pstring != pbuf.data)
+			pfree(pstring);
+	}
+}
 			}
 			else if (pformat == 1)	/* binary mode */
 			{
@@ -1826,6 +1860,22 @@ exec_bind_message(StringInfo input_message)
 			(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
 			 errmsg("incorrect binary data format in bind parameter %d",
 	paramno + 1)));
+
+/*
+ * Compute textual representation for further logging. We waste
+ * some time and memory here, maybe one day we could skip
+ * certain types like built-in primitives, which are safe to get
+ * it calculated later in an aborted xact.
+ */
+if (!isNull && need_text_values)
+{
+	Oid			typoutput;
+	bool		t

Re: log bind parameter values on error

2019-04-15 Thread Alexey Bashtanov

On 05/04/2019 12:23, Peter Eisentraut wrote:

On 2019-03-29 15:55, Alexey Bashtanov wrote:

ERROR:  value "62812" is out of range for type smallint
STATEMENT:  SELECT abalance FROM pgbench_accounts WHERE aid = $1;

(In this case the error message contains the parameter value, so it's
not a very practical case, but it should work, it seems.)

I guess this error occurred /while/ binding, so the parameters probably
weren't yet all bound by the time of error reporting.
That's why the error message came without parameters.

I see.  But I think that could be fixed.  Change exec_bind_message() to
loop over the parameters twice: once to save them away, once to actually
process them.  I think the case of a faulty input value is probably very
common, so it would be confusing if that didn't work.

I understand this may sound lazy of me, however let me take this risk
and try to explain why I think logging them here in the same fashion
would be inconsistent.

The original feature is intended to log textual representations of
the bind parameter values, whereas the problem suggested to be solved
together with it is verbose logging of the part of the bind message that
failed to get converted to a Datum.

These are equivalent only under the following conditions:
1) the rest of the message was correctly formatted, so we can extract
something for each of the parameter values
2) the values have been passed in text mode
(all of them, not only the one failed to process)
3) the values have been passed in the server encoding

I think it would be a bit of inconsistent to log the parts of the message
only when we are lucky to satisfy the 3 conditions above.

If we are to log bind parameters message parsing and processing problems
more verbosely, what do you think of rather wrapping calls to
OidInputFunctionCall, OidReceiveFunctionCall and pg_client_to_server
into PG_TRY blocks and logging their arguments individually?

In case it doesn't work for any reason, I have an alternative idea on
how to log half-processed parameters.
Instead of hasTextValues store the number of parameters having
their textual representations successfully saved.
This way we will be able, in case of text mode,
save the original value and increment the counter
before the call to OidInputFunctionCall. When logging and not having values
for some parameters, we can print an ellipsis in the log to indicate there's
some more of them missing.


I think this patch needs some tests.  Manually testing it is cumbersome,
and as we are seeing now, it is not quite clear which cases it is
supposed to cover.  There are also additional cases for binary
parameters, and there are additions to the CSV output format.  We need
tests for all that so the behavior explains itself and doesn't have to
be rediscovered every time someone wants to look at this again.



I have added a section into src/test/examples/testlibpq3.c,
please see attached.

As far as I could see these tests never run on "make check" or "make 
installcheck",

hence the README change. Please correct me if I'm wrong.

I also failed to find an automatic way to test what actually gets logged
to the server log, at least not in the installcheck case.
I would appreciate if you had any suggestions about it.

Best regards,
  Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 75f9471..437dc44 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6418,6 +6418,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index f5d5613..5e5ccf1 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -45,6 +45,7 @@ makeParamList(int numParams)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = numParams;
+	retval->hasTextValues = false;
 
 	return retval;
 }
@@ -58,6 +59,10 @@ makeParamList(int numParams)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * Since this function is never used for copying parameters into a top portal,
+ * we don't copy textual representations. Neither we set the hasTextValues
+ * flag, so noone will access them.
  */

Re: log bind parameter values on error

2019-03-29 Thread Alexey Bashtanov

Hello and sorry for weeks of silence.


Hello Anders and Peter,

Thanks for your messages.
Please see the new patch version attached.

In my testing, I couldn't get this patch to do anything.  Could you
please share your testing steps?


Sure. Provided you're in the postgres checkout and you've run make in 
src/test/examples/ this should work


CREATE SCHEMA testlibpq3;
SET search_path = testlibpq3;
CREATE TABLE test1_(i int4, t text, b bytea);
INSERT INTO test1_ VALUES(0, '', '');
CREATE VIEW test1 AS SELECT 1/i i, t, b FROM test1_;

-- will log only statement
\! ./src/test/examples/testlibpq3

ALTER SYSTEM SET log_parameters_on_error TO on;
SELECT pg_reload_conf();

-- will log statement with parameters
\! ./src/test/examples/testlibpq3


I did

postgres -D data --log-parameters-on-error=on

pgbench -i bench

alter table pgbench_accounts alter column aid type smallint;

pgbench -M extended -S -T 10 bench

This will then error out on type overflows, but I don't see any
parameters being logged:

ERROR:  value "62812" is out of range for type smallint
STATEMENT:  SELECT abalance FROM pgbench_accounts WHERE aid = $1;

(In this case the error message contains the parameter value, so it's
not a very practical case, but it should work, it seems.)
I guess this error occurred /while/ binding, so the parameters probably 
weren't yet all bound by the time of error reporting.

That's why the error message came without parameters.


Meanwhile, I have committed a patch that refactors the ParamListInfo
initialization a bit, so you don't have to initialize hasTextValues in a
bunch of places unrelated to your core code.  So please rebase your
patch on that.


Please find rebased patch attached.

I apologize for no reply before: I first thought my patch was really 
faulty and knew I wouldn't have time to fix it these days, but it seems 
to me it actually works.


Anyway, I don't suppose you manage to review it within the remaining few 
days, so I'll rebase it again in the beginning of the next CF.


Best regards,
  Alexey
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d383de2..1442f30 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6330,6 +6330,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index f5d5613..5e5ccf1 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -45,6 +45,7 @@ makeParamList(int numParams)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = numParams;
+	retval->hasTextValues = false;
 
 	return retval;
 }
@@ -58,6 +59,10 @@ makeParamList(int numParams)
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * Since this function is never used for copying parameters into a top portal,
+ * we don't copy textual representations. Neither we set the hasTextValues
+ * flag, so noone will access them.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f9ce3d8..e097ce8 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -86,6 +86,12 @@
  */
 const char *debug_query_string; /* client-supplied query string */
 
+/*
+ * The top-level portal that the client is immediately working with:
+ * creating, binding, executing, or all at one using simple protocol
+ */
+Portal current_top_portal = NULL;
+
 /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
 CommandDest whereToSendOutput = DestDebug;
 
@@ -1694,6 +1700,9 @@ exec_bind_message(StringInfo input_message)
 	else
 		portal = CreatePortal(portal_name, false, false);
 
+	Assert(current_top_portal == NULL);
+	current_top_portal = portal;
+
 	/*
 	 * Prepare to copy stuff into the portal's memory context.  We do all this
 	 * copying first, because it could possibly fail (out-of-memory) and we
@@ -1731,6 +1740,9 @@ exec_bind_message(StringInfo input_message)
 	 */
 	if (numParams > 0)
 	{
+		/* GUC value can change, so we remember its state to be consistent */
+		bool need_text_values = log_parameters_on_error;
+
 		params = makeParamList(numParams);
 
 		for (int paramno = 0; paramno < numParams; paramno++)
@@ -1798,9 +1810,31 @@ exec_bind

Re: log bind parameter values on error

2019-02-15 Thread Alexey Bashtanov

Hello Anders and Peter,

Thanks for your messages.
Please see the new patch version attached.

> Have you analyzed how invasive it'd be to delay that till we actually
> can safely start a [sub]transaction to do that logging? Probably too
> expensive, but it seems like something that ought to be analyzed.

The fundamental problem here is that the output functions for the types
of the values to be logged may be present only in the transaction
that has just been aborted.

Also I don't like the idea of doing complex operations in the error handler,
as it increases the chances of cascading errors.

I thought of pre-calculating types' output functions instead of their 
results,

but that would work for certain built-in types only.


+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration 
parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values.
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 

This needs a bit of language polishing.


I would appreciate if you had any suggestions, as my English isn't great.


@@ -31,6 +31,8 @@
   * set of parameter values.  If dynamic parameter hooks are present, we
   * intentionally do not copy them into the result.  Rather, we forcibly
   * instantiate all available parameter values and copy the datum values.
+ *
+ * We don't copy textual representations here.
   */

That probably needs to be expanded on. Including a comment about what
guarantees that there are no memory lifetime issues.


What kind of memory lifetime issues do you mean?
We're not copying textual representations, so the worst can happen
is they don't get logged when appropriate. Luckily, we never use this
function when copying to a portal we use for logging, I added this to
the comment. Do you think it's any better?



So the parameters we log here haven't actually gone through the output
function? Isn't that an issue? I mean that'll cause the contents to
differ from the non-error case, no? And differs from the binary protocol
case?

I don't think it's much of a problem. Text input and output functions are
meant to match, but the CREATE TYPE spec isn't too specific about what 
it means.
Of course it does not mean that typoutput(typinput(foo)) is always 
exactly foo.

However, I really hope that at least typinput(typoutput(foo)) = foo,
where "=" is the correspondent operator registered in postgres.

If a cheeky client passes '007' as a numeric value I don't mind it being
sometimes logged as '007' and sometimes as '7', depending on the settings.
It anyway denotes the same number, and we'll know what to pass to 
reproduce the problem.
For binary protocol it'll be '7' as well, as it'll undergo the typrecv 
and then typoutput procedures.



else
{
+   /*
+* We do it for non-xact commands only, as an xact command
+* 1) shouldn't have any parameters to log;
+* 2) may have the portal dropped early.
+*/
+   Assert(current_top_portal == NULL);
+   current_top_portal = portal;
+   portalParams = NULL;
+

"it"? ISTM the comment doesn't really stand on its own?

Thanks, I fixed the comment and some code around it.


+/*
+ * get_portal_bind_parameters
+ * Get the string containing parameters data, is used for logging.
+ *
+ * Can return NULL if there are no parameters in the portal
+ * or the portal is not valid, or the text representations of the parameters 
are
+ * not available. If returning a non-NULL value, it allocates memory
+ * for the returned string in the current context, and it's the caller's
+ * responsibility to pfree() it if needed.
+ */
+char *
+get_portal_bind_parameters(ParamListInfo params)
+{
+   StringInfoData param_str;
+
+   /* No parameters to format */
+   if (!params || params->numParams == 0)
+   return NULL;
+
+   elog(WARNING, "params->hasTextValues=%d, 
IsAbortedTransactionBlockState()=%d",
+  params->hasTextValues && 
IsAbortedTransactionBlockState());

Err, huh?

This was some debugging, I threw it away now.

Best,
 Alex

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8bd57f3..59638f8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6312,6 +6312,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters_on_error (boolean)
+  
+   log_parameters_on_error configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache text

Re: log bind parameter values on error

2019-01-27 Thread Alexey Bashtanov

Hi Peter,


With your patch, with log_statement=all and log_parameters=on, you get
the same, but with log_statement=all and log_parameters=off you get

LOG:  execute : SELECT abalance FROM pgbench_accounts WHERE aid
= $1;
DETAIL:  parameters: $1 = UNKNOWN TYPE

Thanks for spotting this, I've fixed it, see the new patch attached.

This also raises the question of the new parameter name.  Parameters are
already logged.  So the name should perhaps be more like
log_parameters_on_error.

Done

Some API notes on your patch:  I think you can change
get_portal_bind_parameters() to take a ParamListInfo, since you're not
doing anything with the Portal other than grab the parameters.  And that
would allow you to keep the signature of errdetail_params() unchanged.

Done

I did some performance tests using the commands shown above and didn't
find any problems.  Obviously the default pgbench stuff isn't very
parameter-intensive.  Do you have tests with more and larger parameter
values?



I've done some tests, but they are not very reproducible:
the difference between runs is more than the difference between master 
vs feature branch

and log_parameters_on_error on vs off.

I was using a small java app, it tested the speed using only a single 
connection.

See its code and the results attached.

I'm sorry for the delay, feel free to move it to next commitfest if needed.

Best,
  Alex

import java.sql.ResultSet;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import com.zaxxer.hikari.HikariDataSource;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;


public class JDBCTest {
  public static void main(String[] argv) throws Exception {
Class.forName("org.postgresql.Driver");

HikariDataSource dataSource = new HikariDataSource();

dataSource.setPoolName("foo");
dataSource.setJdbcUrl("jdbc:postgresql://127.0.0.1:5432/alexey?prepareThreshold=1");
dataSource.setUsername("alexey");
dataSource.setPassword("");
dataSource.setMinimumIdle(0);
dataSource.setMaximumPoolSize(1);
dataSource.setIdleTimeout(1 * 1000L);
dataSource.setMaxLifetime(1 * 1000L);
JdbcTemplate jt = new JdbcTemplate(dataSource);

int[][] paramsets = new int[][]{
{10, 0, 0},
{1000, 1, 1},
{1000, 1000, 1000},
{1000, 100, 1},
{1000, 10, 10},
{1000, 1, 100},
{10, 1, 1}
};

for(int[] paramset: paramsets) {

int nruns = paramset[0];
int nparams = paramset[1];
int paramLength = paramset[2];

String sql = "select coalesce('' "
+ String.join("", Collections.nCopies(nparams, ", ?"))
+ ")";
String p = String.join("", Collections.nCopies(paramLength, "z"));
Object[] pp = new Object[nparams];
Arrays.fill(pp, p);

RowMapper rm = (ResultSet resultSet, int i) -> resultSet.getString(1);

long startTime = System.nanoTime();

for (int i = 0; i < nruns; i++) {
List result = jt.query(sql, pp, rm);
}

long endTime = System.nanoTime();
long duration = (endTime - startTime);
System.out.println(String.format("NPARAMS=%d PARAM_LENGTH=%d NRUNS=%d TIME=%fs",
nparams, paramLength, nruns, duration / 100. / 1000));
}
System.exit(0);
  }
}
log_min_duration_statement=-1:

feature branch, log_parameters_on_error=off:
NPARAMS=0 PARAM_LENGTH=0 NRUNS=10 TIME=5.455950s
NPARAMS=1 PARAM_LENGTH=1 NRUNS=1000 TIME=8.708911s
NPARAMS=1000 PARAM_LENGTH=1000 NRUNS=1000 TIME=9.266428s
NPARAMS=100 PARAM_LENGTH=1 NRUNS=1000 TIME=5.949109s
NPARAMS=10 PARAM_LENGTH=10 NRUNS=1000 TIME=5.086381s
NPARAMS=1 PARAM_LENGTH=100 NRUNS=1000 TIME=5.301780s
NPARAMS=1 PARAM_LENGTH=1 NRUNS=10 TIME=6.763883s

feature branch, log_parameters_on_error=on:
NPARAMS=0 PARAM_LENGTH=0 NRUNS=10 TIME=5.238674s
NPARAMS=1 PARAM_LENGTH=1 NRUNS=1000 TIME=8.927818s
NPARAMS=1000 PARAM_LENGTH=1000 NRUNS=1000 TIME=7.960569s
NPARAMS=100 PARAM_LENGTH=1 NRUNS=1000 TIME=6.398343s
NPARAMS=10 PARAM_LENGTH=10 NRUNS=1000 TIME=5.293962s
NPARAMS=1 PARAM_LENGTH=100 NRUNS=1000 TIME=5.519607s
NPARAMS=1 PARAM_LENGTH=1 NRUNS=10 TIME=7.236014s

master:
NPARAMS=0 PARAM_LENGTH=0 NRUNS=10 TIME=5.600735s
NPARAMS=1 PARAM_LENGTH=1 NRUNS=1000 TIME=11.478406s
NPARAMS=1000 PARAM_LENGTH=1000 NRUNS=1000 TIME=9.701054s
NPARAMS=100 PARAM_LENGTH=1 NRUNS=1000 TIME=6.352961s
NPARAMS=10 PARAM_LENGTH=10 NRUNS=1000 TIME=5.153303s
NPARAMS=1 PARAM_LE

Re: log bind parameter values on error

2019-01-12 Thread Alexey Bashtanov



please see attached

sorry, some unintended changes sneaked in, please see the corrected patch
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f64402a..924a76c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6270,6 +6270,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters (boolean)
+  
+   log_parameters configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index a98c836..9ee3954 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -404,6 +404,7 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = num_params;
+	paramLI->hasTextValues = false;
 
 	i = 0;
 	foreach(l, exprstates)
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index de41588..195e3c5 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -921,6 +921,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
 			paramLI->parserSetup = NULL;
 			paramLI->parserSetupArg = NULL;
 			paramLI->numParams = nargs;
+			paramLI->hasTextValues = false;
 			fcache->paramLI = paramLI;
 		}
 		else
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 94a53e0..398f0e3 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -2399,6 +2399,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 		paramLI->parserSetup = NULL;
 		paramLI->parserSetupArg = NULL;
 		paramLI->numParams = nargs;
+		paramLI->hasTextValues = false;
 
 		for (i = 0; i < nargs; i++)
 		{
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index a89a25e..5aebd428 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -31,6 +31,8 @@
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We don't copy textual representations here.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
@@ -53,6 +55,7 @@ copyParamList(ParamListInfo from)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = from->numParams;
+	retval->hasTextValues = false;
 
 	for (i = 0; i < from->numParams; i++)
 	{
@@ -229,6 +232,7 @@ RestoreParamList(char **start_address)
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = nparams;
+	paramLI->hasTextValues = false;
 
 	for (i = 0; i < nparams; i++)
 	{
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 0c0891b..1f81fba 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -86,6 +86,12 @@
  */
 const char *debug_query_string; /* client-supplied query string */
 
+/*
+ * The top-level portal that the client is immediately working with:
+ * creating, binding, executing, or all at one using simple protocol
+ */
+Portal current_top_portal = NULL;
+
 /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
 CommandDest whereToSendOutput = DestDebug;
 
@@ -183,7 +189,7 @@ static void forbidden_in_wal_sender(char firstchar);
 static List *pg_rewrite_query(Query *query);
 static bool check_log_statement(List *stmt_list);
 static int	errdetail_execute(List *raw_parsetree_list);
-static int	errdetail_params(ParamListInfo params);
+static int	errdetail_params(Portal portal);
 static int	errdetail_abort(void);
 static int	errdetail_recovery_conflict(void);
 static void start_xact_command(void);
@@ -1694,6 +1700,9 @@ exec_bind_message(StringInfo input_message)
 	else
 		portal = CreatePortal(portal_name, false, false);
 
+	Assert(current_top_portal == NULL);
+	current_top_portal = portal;
+
 	/*
 	 * Prepare to copy stuff into the portal's memory context.  We do all this
 	 * copying first, because it could possibly fail (out-of-memory) and we
@@ -1731,6 +1740,9 @@ exec_bind_message(StringInfo input_message)
 	 */
 	if (numParams > 0)
 	{
+		/* GUC value can change, so we remember its state to be consistent */
+		bool need_text_values = log_parameters;
+
 		params = (ParamListInfo) palloc(offsetof(ParamListInfoData, params) +
 		numParams * sizeof(ParamExternData));
 		/* we have static list of params, so no hooks needed */
@@ -1741,6 +1753,8 @@ exec_bind_message(StringInfo input_message)
 		params->parserSetup = NULL;
 		params->parserSetu

Re: log bind parameter values on error

2019-01-12 Thread Alexey Bashtanov

Hello Peter,


Unlike SQL, parameters may spend much more memory, so I'd have them
in portal memory context to make sure the memory is released earlier
rather than later.

Having them in the portal structure is different from having it in the
portal memory context.  Although there is perhaps value in keeping them
together.

yeah, to avoid pointers to deallocated areas


Let's see how it looks.


please see attached

Best,
  Alex
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f64402a..924a76c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6270,6 +6270,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters (boolean)
+  
+   log_parameters configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index a98c836..9ee3954 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -404,6 +404,7 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = num_params;
+	paramLI->hasTextValues = false;
 
 	i = 0;
 	foreach(l, exprstates)
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index de41588..195e3c5 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -921,6 +921,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
 			paramLI->parserSetup = NULL;
 			paramLI->parserSetupArg = NULL;
 			paramLI->numParams = nargs;
+			paramLI->hasTextValues = false;
 			fcache->paramLI = paramLI;
 		}
 		else
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 94a53e0..398f0e3 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -2399,6 +2399,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 		paramLI->parserSetup = NULL;
 		paramLI->parserSetupArg = NULL;
 		paramLI->numParams = nargs;
+		paramLI->hasTextValues = false;
 
 		for (i = 0; i < nargs; i++)
 		{
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index a89a25e..5aebd428 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -31,6 +31,8 @@
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We don't copy textual representations here.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
@@ -53,6 +55,7 @@ copyParamList(ParamListInfo from)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = from->numParams;
+	retval->hasTextValues = false;
 
 	for (i = 0; i < from->numParams; i++)
 	{
@@ -229,6 +232,7 @@ RestoreParamList(char **start_address)
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = nparams;
+	paramLI->hasTextValues = false;
 
 	for (i = 0; i < nparams; i++)
 	{
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 0c0891b..1f81fba 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -86,6 +86,12 @@
  */
 const char *debug_query_string; /* client-supplied query string */
 
+/*
+ * The top-level portal that the client is immediately working with:
+ * creating, binding, executing, or all at one using simple protocol
+ */
+Portal current_top_portal = NULL;
+
 /* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
 CommandDest whereToSendOutput = DestDebug;
 
@@ -183,7 +189,7 @@ static void forbidden_in_wal_sender(char firstchar);
 static List *pg_rewrite_query(Query *query);
 static bool check_log_statement(List *stmt_list);
 static int	errdetail_execute(List *raw_parsetree_list);
-static int	errdetail_params(ParamListInfo params);
+static int	errdetail_params(Portal portal);
 static int	errdetail_abort(void);
 static int	errdetail_recovery_conflict(void);
 static void start_xact_command(void);
@@ -1694,6 +1700,9 @@ exec_bind_message(StringInfo input_message)
 	else
 		portal = CreatePortal(portal_name, false, false);
 
+	Assert(current_top_portal == NULL);
+	current_top_portal = portal;
+
 	/*
 	 * Prepare to copy stuff into the portal's memory context.  We do all this
 	 * copying first, because it could possibly fail (out-of-memory) and we
@@ -1731,6 +1740,9 @@ exec_bind_message(StringInfo input_message)
 	 */
 	if (numParams > 0)
 	{
+		/* GUC value can change, so we remember its state to be consistent */
+		bool 

Re: log bind parameter values on error

2019-01-02 Thread Alexey Bashtanov




That sounds like a plausible approach.  Have you done any performance
measurements?


No I haven't yet


In your patch, I would organize the changes to the portal API a bit
differently.  Don't change the signature of CreatePortal().

okay

Get rid of PortalSetCurrentTop() and PortalClearCurrentTop().


I'll remove them from Portal API, but possibly have them in postgres.c
if you don't mind, just to avoid code duplication.
Renamed and maybe even inlined.


Just have a global
variable CurrentPortal and set it directly.  And then change
GetCurrentPortalBindParameters() to take a Portal as argument.  This can
all happen inside postgres.c without changing the Portal APIs.

Okay, will do

In fact, maybe don't use the Portal structure at all and just store the
saved textualized values inside postgres.c in a static variable.


Unlike SQL, parameters may spend much more memory, so I'd have them
in portal memory context to make sure the memory is released earlier 
rather than later.

Tracking individual variable lifetime like we do with debug_query_string
sounds doable but a bit non-straightforward to me,
see e.g. the tricks we do with transaction commands.

Also, I'd like to avoid early forming of the error string, as we may 
need to combine
them differently in future, e.g. when logging in various logging formats 
or languages.

One-by-one pfree-ing doesn't look tempting either.

Do you think it would be acceptable to leave them cached in parameters 
structure?


Best,
  Alex




Re: A case for UPDATE DISTINCT attribute

2018-12-20 Thread Alexey Bashtanov

Hello Gajus,

I have observed that the following pattern is repeating in our data 
management programs:


UPDATE
  event
SET
  fuid = ${fuid},
  venue_id = ${venueId},
  url = ${url}
WHERE
  id = ${id} AND
  fuid IS != ${fuid} AND
  venue_id IS != ${venueId} AND
  url IS DISTINCT FROM ${url};


...
Meanwhile, a WHERE condition that excludes rows with matching values 
makes this into a noop in case of matching target column values.


For this to hold, you need your conditions in WHERE to be ORed, not ANDed.



UPDATE DISTINCT
  event
SET
  fuid = ${fuid},
  venue_id = ${venueId},
  url = ${url}
WHERE
  id = ${id}

would encourage greater adoption of such pattern.

Is there a technical reason this does not existing already?

ᐧ


At least a bunch of questions and concerns arise. Like these:

1) We cannot treat it as a syntactic sugar only and just expand it on 
parsing stage,
as the expression to generate the value assigned may be volatile, like 
UPDATE ... SET ... = random();
2) How should this interact with triggers? E.g. when NEW and OLD were 
the same
before BEFORE UPDATE trigger execution, but would be different after. Or 
visa versa.

Should they be included into transition tables?
3) Should RETURNING clause return the non-updated rows?
4) It must be not easy to guarantee anything if there is a FROM clause, 
a target row is present in the join more than once.
5) We need to fail correctly if one of the column data types doesn't 
have an equality operator.


Best regards,
  Alexey


log bind parameter values on error

2018-12-14 Thread Alexey Bashtanov

Hello,

I'd like to propose a patch to log bind parameter values not only when 
logging duration,
but also on error (timeout in particular) or in whatever situation the 
statement normally gets logged.
This mostly could be useful when the request originator doesn't log them 
either, so it's hard

to reproduce the problem.

Unfortunately, when enabled, the feature comes with some memory and CPU 
overhead,

as we cannot convert certain values to text when in aborted transaction.

We potentially could do the trick with built-in types, but it would need 
cautious work with composite types,
and also require more computation on the logging stage, which is a risk 
of cascading errors.
Custom types still wouldn't be loggable, even as passed by client, which 
would be not great.


So I decided to cache textual representations on bind stage,
which is especially easy if the client uses text protocol.

Best,
  Alexey
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4a7121a..7b38aed 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6265,6 +6265,23 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
   
  
 
+ 
+  log_parameters (boolean)
+  
+   log_parameters configuration parameter
+  
+  
+  
+   
+Controls whether the statement is logged with bind parameter values. 
+It adds some overhead, as postgres will cache textual
+representations of parameter values in memory for all statements,
+even if they eventually do not get logged. The default is
+off. Only superusers can change this setting.
+   
+  
+ 
+
  
   log_statement (enum)
   
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 5684997..ba431ce 100644
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@@ -94,7 +94,7 @@ PerformCursorOpen(DeclareCursorStmt *cstmt, ParamListInfo params,
 	/*
 	 * Create a portal and copy the plan and queryString into its memory.
 	 */
-	portal = CreatePortal(cstmt->portalname, false, false);
+	portal = CreatePortal(cstmt->portalname, false, false, false);
 
 	oldContext = MemoryContextSwitchTo(portal->portalContext);
 
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 6036b73..363c096 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -404,6 +404,7 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = num_params;
+	paramLI->hasTextValues = false;
 
 	i = 0;
 	foreach(l, exprstates)
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index fc7c605..6d047cd 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -921,6 +921,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
 			paramLI->parserSetup = NULL;
 			paramLI->parserSetupArg = NULL;
 			paramLI->numParams = nargs;
+			paramLI->hasTextValues = false;
 			fcache->paramLI = paramLI;
 		}
 		else
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index ad72667..532a365 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -1302,7 +1302,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
 	else
 	{
 		/* In this path, error if portal of same name already exists */
-		portal = CreatePortal(name, false, false);
+		portal = CreatePortal(name, false, false, false);
 	}
 
 	/* Copy the plan's query string into the portal */
@@ -2399,6 +2399,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
 		paramLI->parserSetup = NULL;
 		paramLI->parserSetupArg = NULL;
 		paramLI->numParams = nargs;
+		paramLI->hasTextValues = false;
 
 		for (i = 0; i < nargs; i++)
 		{
diff --git a/src/backend/nodes/params.c b/src/backend/nodes/params.c
index 79197b1..3c3b152 100644
--- a/src/backend/nodes/params.c
+++ b/src/backend/nodes/params.c
@@ -31,6 +31,8 @@
  * set of parameter values.  If dynamic parameter hooks are present, we
  * intentionally do not copy them into the result.  Rather, we forcibly
  * instantiate all available parameter values and copy the datum values.
+ *
+ * We don't copy textual representations here.
  */
 ParamListInfo
 copyParamList(ParamListInfo from)
@@ -53,6 +55,7 @@ copyParamList(ParamListInfo from)
 	retval->parserSetup = NULL;
 	retval->parserSetupArg = NULL;
 	retval->numParams = from->numParams;
+	retval->hasTextValues = false;
 
 	for (i = 0; i < from->numParams; i++)
 	{
@@ -229,6 +232,7 @@ RestoreParamList(char **start_address)
 	paramLI->parserSetup = NULL;
 	paramLI->parserSetupArg = NULL;
 	paramLI->numParams = nparams;
+	paramLI->hasTextValues = false;
 
 	for (i = 0; i < nparams; i++)
 	{
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5ab7d3c..2ad56a2 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -183,7 +183,7 @@ static void f

Re: proposal: plpgsql pragma statement

2018-12-05 Thread Alexey Bashtanov




You can use PERFORM as a workaround:

PERFORM 'PRAGMA', 'cmdtype', 'CREATE';

There's some overhead when executing, but probably not too much.


Thank you for tip, but I have not any idea, how it can work?



Well, I thought you were for a comment-like thing that remains there 
when compiled and can act as a hint for your static analysis extension.
Surely this PERFORM won't impose any hints on the following statement 
itself.
But maybe your extension could read the PERFORM statement preceding it 
and treat it as an annotation hint for the following statement.


Best,
  Alex


Re: proposal: plpgsql pragma statement

2018-12-05 Thread Alexey Bashtanov

Hello Pavel,



The PRAGMA statement does nothing in runtime. It works only in compile 
time, and add a pair of key, value to next non pragma statement. This 
information can be used by some plpgsql extensions.


What do you think about this proposal?



You can use PERFORM as a workaround:

PERFORM 'PRAGMA', 'cmdtype', 'CREATE';

There's some overhead when executing, but probably not too much.

Best regards,
  Alexey



Re: Pull up sublink of type 'NOT NOT (expr)'

2018-10-23 Thread Alexey Bashtanov

Hello Richard,

Currently for quals in the form of "NOT NOT (SubLink)", this SubLink 
would not

be considered when pulling up sublinks. For instance:

gpadmin=# explain select * from a where NOT NOT (a.i in (select b.i 
from b));

     QUERY PLAN
-
 Seq Scan on a (cost=51.50..85.62 rows=1005 width=8)
   Filter: (hashed SubPlan 1)
   SubPlan 1
     ->  Seq Scan on b  (cost=0.00..44.00 rows=3000 width=4)
(4 rows)


Should we give it a chance, like the attached does?


Sometimes hashed subplan is faster than hash join and than all the other 
options, as it preserves the order.

Using NOT NOT, one can control whether to use it or not:
https://pgblog.bashtanov.com/2017/12/08/double-negative-and-query-performance/ 
(test case and results in the bottom of the page).


Surely dirty tricks should not be the way to control the planner, but 
when breaking them we should probably provide a way to achieve the same 
result,

ideally making the planner choose the best plan without hints.

Best,
  Alex


Timetz comparison

2018-05-25 Thread Alexey Bashtanov

Hello,

Comparison of timetz values looks a bit weird to me, as 
'22:00+02'::timetz > '21:00+01'::timetz.

I can see this behavior introduced by commit 2792374c in 2001
with the comment "timetz was just plain broken (some possible pairs of 
values were neither < nor = nor >)".


The in-code comment is:
/*
 * If same GMT time, sort by timezone; we only want to say that two
 * timetz's are equal if both the time and zone parts are equal.
 */

However I found no specification on how timetz values are compared 
neither in postgres docs

nor in the standard (http://www.wiscorp.com/sql20nn.zip).

Was this decision made just to be definite?

What's the problem with these values to be considered equal?
Backward compatibility? Hash algorithms?

Thanks

Best regards,
  Alexey




Re: [patch] pg_attribute.attndims turns to 0 when 'create table like/as'

2018-04-16 Thread Alexey Bashtanov


Judging from the discussion in [1], attndims is deprecated. 

Good to know, thanks.


Could you describe what you are trying to achieve with it?
Mostly conformance with the documentation. Might be easier to fix the 
docs though.




[patch] pg_attribute.attndims turns to 0 when 'create table like/as'

2018-04-16 Thread Alexey Bashtanov
As reported in [1], pg_attribute.attndims turns to 0 when 'create table 
like/as'.

The patch attached is to fix it.

Best Regards,
  Alexey

[1] 
https://www.postgresql.org/message-id/20150707072942.1186.98151%40wrigleys.postgresql.org
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
new file mode 100644
index 1bd2599..8fff3d2
*** a/src/backend/nodes/makefuncs.c
--- b/src/backend/nodes/makefuncs.c
*** makeTypeNameFromOid(Oid typeOid, int32 t
*** 475,480 
--- 475,498 
  	n->typeOid = typeOid;
  	n->typemod = typmod;
  	n->location = -1;
+ 
+ 	return n;
+ }
+ 
+ /*
+  * makeTypeNameFromOid -
+  *	build a TypeName node to represent a type already known by OID/typmod/ndims
+  */
+ TypeName *
+ makeTypeNameWithNdimsFromOid(Oid typeOid, int32 typmod, int32 ndims)
+ {
+ 	int i;
+ 	TypeName   *n = makeTypeNameFromOid(typeOid, typmod);
+ 
+ 	n->arrayBounds = NIL;
+ 	for (i = 0; i < ndims; i++)
+ 		n->arrayBounds = lcons_int(-1, n->arrayBounds);
+ 
  	return n;
  }
  
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
new file mode 100644
index c6f3628..3eeb5cd
*** a/src/backend/parser/parse_utilcmd.c
--- b/src/backend/parser/parse_utilcmd.c
*** transformTableLikeClause(CreateStmtConte
*** 981,988 
  		 */
  		def = makeNode(ColumnDef);
  		def->colname = pstrdup(attributeName);
! 		def->typeName = makeTypeNameFromOid(attribute->atttypid,
! 			attribute->atttypmod);
  		def->inhcount = 0;
  		def->is_local = true;
  		def->is_not_null = attribute->attnotnull;
--- 981,989 
  		 */
  		def = makeNode(ColumnDef);
  		def->colname = pstrdup(attributeName);
! 		def->typeName = makeTypeNameWithNdimsFromOid(attribute->atttypid,
! 	 attribute->atttypmod,
! 	 attribute->attndims);
  		def->inhcount = 0;
  		def->is_local = true;
  		def->is_not_null = attribute->attnotnull;
diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h
new file mode 100644
index 57bd52f..8db4b10
*** a/src/include/nodes/makefuncs.h
--- b/src/include/nodes/makefuncs.h
*** extern RangeVar *makeRangeVar(char *sche
*** 71,76 
--- 71,77 
  extern TypeName *makeTypeName(char *typnam);
  extern TypeName *makeTypeNameFromNameList(List *names);
  extern TypeName *makeTypeNameFromOid(Oid typeOid, int32 typmod);
+ extern TypeName *makeTypeNameWithNdimsFromOid(Oid typeOid, int32 typmod, int32 ndims);
  
  extern ColumnDef *makeColumnDef(const char *colname,
  			  Oid typeOid, int32 typmod, Oid collOid);