Re: [PATCH] postgres_fdw connection caching - cause remote sessions linger till the local session exit

2021-01-16 Thread Bharath Rupireddy
On Sat, Jan 16, 2021 at 10:36 AM Bharath Rupireddy
 wrote:
> > > Please consider the v9 patch set for further review.
> >
> > Thanks for updating the patch! I reviewed only 0001 patch.

I addressed the review comments and attached v10 patch set. Please
consider it for further review.


With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com
From 3cf7d7d4d7241460997530ed01c2a53508257786 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy 
Date: Sun, 17 Jan 2021 12:30:22 +0530
Subject: [PATCH v10 1/3] postgres_fdw function to discard cached connections

This patch introduces new function postgres_fdw_disconnect() when
called with a foreign server name discards the associated
connections with the server name. When called without any argument,
discards all the existing cached connections.

This patch also adds another function postgres_fdw_get_connections()
to get the list of all cached connections by corresponding foreign
server names.
---
 contrib/postgres_fdw/Makefile |   2 +-
 contrib/postgres_fdw/connection.c | 330 +-
 .../postgres_fdw/expected/postgres_fdw.out| 414 +-
 .../postgres_fdw/postgres_fdw--1.0--1.1.sql   |  20 +
 contrib/postgres_fdw/postgres_fdw.control |   2 +-
 contrib/postgres_fdw/sql/postgres_fdw.sql | 160 +++
 doc/src/sgml/postgres-fdw.sgml|  90 
 7 files changed, 999 insertions(+), 19 deletions(-)
 create mode 100644 contrib/postgres_fdw/postgres_fdw--1.0--1.1.sql

diff --git a/contrib/postgres_fdw/Makefile b/contrib/postgres_fdw/Makefile
index ee8a80a392..c1b0cad453 100644
--- a/contrib/postgres_fdw/Makefile
+++ b/contrib/postgres_fdw/Makefile
@@ -14,7 +14,7 @@ PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
 
 EXTENSION = postgres_fdw
-DATA = postgres_fdw--1.0.sql
+DATA = postgres_fdw--1.0.sql postgres_fdw--1.0--1.1.sql
 
 REGRESS = postgres_fdw
 
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index eaedfea9f2..b6da4899ea 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -16,12 +16,14 @@
 #include "access/xact.h"
 #include "catalog/pg_user_mapping.h"
 #include "commands/defrem.h"
+#include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postgres_fdw.h"
 #include "storage/fd.h"
 #include "storage/latch.h"
+#include "utils/builtins.h"
 #include "utils/datetime.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
@@ -66,6 +68,7 @@ typedef struct ConnCacheEntry
  * Connection cache (initialized on first use)
  */
 static HTAB *ConnectionHash = NULL;
+static bool conn_cache_destroyed = false;
 
 /* for assigning cursor numbers and prepared statement numbers */
 static unsigned int cursor_number = 0;
@@ -74,6 +77,12 @@ static unsigned int prep_stmt_number = 0;
 /* tracks whether any work is needed in callback functions */
 static bool xact_got_connection = false;
 
+/*
+ * SQL functions
+ */
+PG_FUNCTION_INFO_V1(postgres_fdw_get_connections);
+PG_FUNCTION_INFO_V1(postgres_fdw_disconnect);
+
 /* prototypes of private functions */
 static void make_new_connection(ConnCacheEntry *entry, UserMapping *user);
 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
@@ -95,6 +104,8 @@ static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
 	 PGresult **result);
 static bool UserMappingPasswordRequired(UserMapping *user);
+static bool disconnect_cached_connections(uint32 hashvalue, bool all,
+		  bool *is_in_use);
 
 /*
  * Get a PGconn which can be used to execute queries on the remote PostgreSQL
@@ -127,15 +138,20 @@ GetConnection(UserMapping *user, bool will_prep_stmt)
 	 HASH_ELEM | HASH_BLOBS);
 
 		/*
-		 * Register some callback functions that manage connection cleanup.
-		 * This should be done just once in each backend.
+		 * Register callback functions that manage connection cleanup. This
+		 * should be done just once in each backend. We don't register the
+		 * callbacks again, if the connection cache is destroyed at least once
+		 * in the backend.
 		 */
-		RegisterXactCallback(pgfdw_xact_callback, NULL);
-		RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
-		CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
-	  pgfdw_inval_callback, (Datum) 0);
-		CacheRegisterSyscacheCallback(USERMAPPINGOID,
-	  pgfdw_inval_callback, (Datum) 0);
+		if (!conn_cache_destroyed)
+		{
+			RegisterXactCallback(pgfdw_xact_callback, NULL);
+			RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
+			CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
+		  pgfdw_inval_callback, (Datum) 0);
+			CacheRegisterSyscacheCallback(USERMAPPINGOID,
+		  pgfdw_inval_callback, (Datum) 0);
+		}
 	}
 
 	/* Set flag that we did GetConnection during the current transaction */
@@ -1095,6 +,13 @@ 

Re: Test harness for regex code (to allow importing Tcl's test suite)

2021-01-16 Thread Alexander Lakhin
Hello Tom,
04.01.2021 08:47, Tom Lane wrote:
> Hm.  There isn't anything about test_regex()'s API that I would want
> to expose to end users ;-) ... it's just designed to replicate the
> test API that Henry Spencer designed a couple decades ago, which IMO
> was not too clean even by the standards of the time.
As test_regex() is not meant to be public, maybe this is of little
importance, but I've found another bug when exploiting the new test module:
select * from test_regex(repeat('(x)', 32), 'a', 'c');
leads to
==00:00:00:05.736 2605072== Invalid write of size 4
==00:00:00:05.736 2605072==    at 0x4866D09: setup_test_matches
(test_regex.c:564)
==00:00:00:05.736 2605072==    by 0x4867276: test_regex (test_regex.c:105)
==00:00:00:05.736 2605072==    by 0x37B10C: ExecMakeTableFunctionResult
(execSRF.c:234)
==00:00:00:05.736 2605072==    by 0x38AAA4: FunctionNext
(nodeFunctionscan.c:95)
==00:00:00:05.736 2605072==    by 0x37BA68: ExecScanFetch (execScan.c:133)
==00:00:00:05.736 2605072==    by 0x37BB05: ExecScan (execScan.c:182)
==00:00:00:05.736 2605072==    by 0x38A9CE: ExecFunctionScan
(nodeFunctionscan.c:270)
==00:00:00:05.736 2605072==    by 0x378E5D: ExecProcNodeFirst
(execProcnode.c:450)
==00:00:00:05.736 2605072==    by 0x372CBC: ExecProcNode (executor.h:247)
==00:00:00:05.736 2605072==    by 0x372CBC: ExecutePlan (execMain.c:1542)
==00:00:00:05.736 2605072==    by 0x372E22: standard_ExecutorRun
(execMain.c:364)
==00:00:00:05.736 2605072==    by 0x372EF2: ExecutorRun (execMain.c:308)
==00:00:00:05.736 2605072==    by 0x4E2B77: PortalRunSelect (pquery.c:912)
==00:00:00:05.736 2605072==  Address 0xee4d5ec is 908 bytes inside a
block of size 1,024 alloc'd
==00:00:00:05.736 2605072==    at 0x483B7F3: malloc (in
/usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==00:00:00:05.736 2605072==    by 0x62239C: AllocSetAlloc (aset.c:919)
==00:00:00:05.736 2605072==    by 0x629656: palloc0 (mcxt.c:995)
==00:00:00:05.736 2605072==    by 0x4866A06: setup_test_matches
(test_regex.c:450)
==00:00:00:05.736 2605072==    by 0x4867276: test_regex (test_regex.c:105)
==00:00:00:05.736 2605072==    by 0x37B10C: ExecMakeTableFunctionResult
(execSRF.c:234)
==00:00:00:05.736 2605072==    by 0x38AAA4: FunctionNext
(nodeFunctionscan.c:95)
==00:00:00:05.736 2605072==    by 0x37BA68: ExecScanFetch (execScan.c:133)
==00:00:00:05.736 2605072==    by 0x37BB05: ExecScan (execScan.c:182)
==00:00:00:05.736 2605072==    by 0x38A9CE: ExecFunctionScan
(nodeFunctionscan.c:270)
==00:00:00:05.736 2605072==    by 0x378E5D: ExecProcNodeFirst
(execProcnode.c:450)
==00:00:00:05.736 2605072==    by 0x372CBC: ExecProcNode (executor.h:247)
==00:00:00:05.736 2605072==    by 0x372CBC: ExecutePlan (execMain.c:1542)
==00:00:00:05.736 2605072==

Best regards,
Alexander




Re: Key management with tests

2021-01-16 Thread Andres Freund
Hi,

On 2021-01-17 11:54:57 +0530, Amit Kapila wrote:
> On Sun, Jan 17, 2021 at 5:38 AM Tom Kincaid  wrote:
> > Admittedly I am a novice on this topic, and the majority of the
> > PostgreSQL source code, however I am hopeful enough (those of you who
> > know me understand that I suffer from eternal optimism) that I am
> > going to attempt to help.
> >
> > Is there a design document for a Postgres feature of this size and
> > scope that people feel would serve as a good example? Alternatively,
> > is there a design document template that has been successfully used in
> > the past?
> >
> 
> We normally write the design considerations and choices we made with
> the reasons in README and code comments. Personally, I am not sure if
> there is a need for any specific document per-se but a README and
> detailed comments in the code should suffice what people are worried
> about here.

Right. It could be a README file, or a long comment at a start of one of
the files. It doesn't matter too much. What matters is that people that
haven't been on those phone call can understand the design and the
implications it has.


> It is mostly from the perspective that other developers
> reading the code, want to do bug-fix, or later enhance that code
> should be able to understand it.

I'd add the perspective of code reviewers as well.


> One recent example I can give is
> Peter's work on bottom-up deletion [1] which I have read today where I
> find that the design is captured via README, appropriate comments in
> the code, and documentation. This feature is quite different and
> probably a lot more new concepts are being introduced but I hope that
> will give you some clue.
> 
> [1] - 
> https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=d168b666823b6e0bcf60ed19ce24fb5fb91b8ccf

This is a great example.

Greetings,

Andres Freund




Re: Key management with tests

2021-01-16 Thread Amit Kapila
On Sun, Jan 17, 2021 at 5:38 AM Tom Kincaid  wrote:
>
> > > > I think that's not at all acceptable. I don't mind hashing out details
> > > > on calls / off-list, but the design needs to be public, documented, and
> > > > reviewable.  And if it's something the community can't understand, then
> > > > it can't get in. We're going to have to maintain this going forward.
> > >
> > > OK, so we don't want it.  That's fine with me.
> >
> > That's not what I said...
> >
>
>
>  I think the majority of us believe that it is important we take this
> first step towards a solid TDE implementation in PostgreSQL that is
> built around the community processes which involves general consensus.
>
> Before this feature falls into the “we will never do it because we
> will never build consensus" category and community PostgreSQL
> potentially gets locked out of more deployment scenarios that require
> this feature I would like to see if I can help with this current
> attempt at it. I will share that I am concerned that if the people who
> have been involved in this to date can’t get this in, it will never
> happen.
>
> Admittedly I am a novice on this topic, and the majority of the
> PostgreSQL source code, however I am hopeful enough (those of you who
> know me understand that I suffer from eternal optimism) that I am
> going to attempt to help.
>
> Is there a design document for a Postgres feature of this size and
> scope that people feel would serve as a good example? Alternatively,
> is there a design document template that has been successfully used in
> the past?
>

We normally write the design considerations and choices we made with
the reasons in README and code comments. Personally, I am not sure if
there is a need for any specific document per-se but a README and
detailed comments in the code should suffice what people are worried
about here. It is mostly from the perspective that other developers
reading the code, want to do bug-fix, or later enhance that code
should be able to understand it. One recent example I can give is
Peter's work on bottom-up deletion [1] which I have read today where I
find that the design is captured via README, appropriate comments in
the code, and documentation. This feature is quite different and
probably a lot more new concepts are being introduced but I hope that
will give you some clue.

[1] - 
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=d168b666823b6e0bcf60ed19ce24fb5fb91b8ccf

-- 
With Regards,
Amit Kapila.




Re: Is Recovery actually paused?

2021-01-16 Thread Dilip Kumar
On Thu, Jan 14, 2021 at 6:49 PM Yugo NAGATA  wrote:
>
> On Wed, 13 Jan 2021 17:49:43 +0530
> Dilip Kumar  wrote:
>
> > On Wed, Jan 13, 2021 at 3:35 PM Dilip Kumar  wrote:
> > >
> > > On Wed, Jan 13, 2021 at 3:27 PM Yugo NAGATA  wrote:
> > > >
> > > > On Thu, 10 Dec 2020 11:25:23 +0530
> > > > Dilip Kumar  wrote:
> > > >
> > > > > > > However, I wonder users don't expect pg_is_wal_replay_paused to 
> > > > > > > wait.
> > > > > > > Especially, if max_standby_streaming_delay is -1, this will be 
> > > > > > > blocked forever,
> > > > > > > although this setting may not be usual. In addition, some users 
> > > > > > > may set
> > > > > > > recovery_min_apply_delay for a large.  If such users call 
> > > > > > > pg_is_wal_replay_paused,
> > > > > > > it could wait for a long time.
> > > > > > >
> > > > > > > At least, I think we need some descriptions on document to explain
> > > > > > > pg_is_wal_replay_paused could wait while a time.
> > > > > >
> > > > > > Ok
> > > > >
> > > > > Fixed this, added some comments in .sgml as well as in function header
> > > >
> > > > Thank you for fixing this.
> > > >
> > > > Also, is it better to fix the description of pg_wal_replay_pause from
> > > > "Pauses recovery." to "Request to pause recovery." in according with
> > > > pg_is_wal_replay_paused?
> > >
> > > Okay
> > >
> > > >
> > > > > > > Also, how about adding a new boolean argument to 
> > > > > > > pg_is_wal_replay_paused to
> > > > > > > control whether this waits for recovery to get paused or not? By 
> > > > > > > setting its
> > > > > > > default value to true or false, users can use the old format for 
> > > > > > > calling this
> > > > > > > and the backward compatibility can be maintained.
> > > > > >
> > > > > > So basically, if the wait_recovery_pause flag is false then we will
> > > > > > immediately return true if the pause is requested?  I agree that it 
> > > > > > is
> > > > > > good to have an API to know whether the recovery pause is requested 
> > > > > > or
> > > > > > not but I am not sure is it good idea to make this API serve both 
> > > > > > the
> > > > > > purpose?  Anyone else have any thoughts on this?
> > > > > >
> > > >
> > > > I think the current pg_is_wal_replay_paused() already has another 
> > > > purpose;
> > > > this waits recovery to actually get paused. If we want to limit this 
> > > > API's
> > > > purpose only to return the pause state, it seems better to fix this to 
> > > > return
> > > > the actual state at the cost of lacking the backward compatibility. If 
> > > > we want
> > > > to know whether pause is requested, we may add a new API like
> > > > pg_is_wal_replay_paluse_requeseted(). Also, if we want to wait recovery 
> > > > to actually
> > > > get paused, we may add an option to pg_wal_replay_pause() for this 
> > > > purpose.
> > > >
> > > > However, this might be a bikeshedding. If anyone don't care that
> > > > pg_is_wal_replay_paused() can make user wait for a long time, I don't 
> > > > care either.
> > >
> > > I don't think that it will be blocked ever, because
> > > pg_wal_replay_pause is sending the WakeupRecovery() which means the
> > > recovery process will not be stuck on waiting for the WAL.
>
> Yes, there is no stuck on waiting for the WAL. However, it can be stuck 
> during resolving
> a recovery conflict. The process could wait for max_standby_streaming_delay or
> max_standby_archive_delay at most before recovery get completely paused.

Okay, I agree that it is possible so for handling this we have a
couple of options
1. pg_is_wal_replay_paused(), interface will wait for recovery to
actually get paused, but user have an option to cancel that.  So I
agree that there is currently no option to just know that recovery
pause is requested without waiting for its actually get paused if it
is requested.  So one option is we can provide an another interface as
you mentioned pg_is_wal_replay_paluse_requeseted(), which can just
return the request status.  I am not sure how useful it is.

2. Pass an option to pg_is_wal_replay_paused whether to wait for
recovery to actually get paused or not.

3. Pass an option to pg_wal_replay_pause(), whether to wait for
recovery pause or just request and return.

I like the option 1, any other opinion on this?

> Also, it could wait for recovery_min_apply_delay if it has a valid value. It 
> is possible
> that a user set this parameter to a large value, so it could wait for a long 
> time. However,
> this will be avoided by calling recoveryPausesHere() or 
> CheckAndSetRecoveryPause() in
> recoveryApplyDelay().

Right

> > > > > > > As another comment, while pg_is_wal_replay_paused is blocking, I 
> > > > > > > can not cancel
> > > > > > > the query. I think CHECK_FOR_INTERRUPTS() is necessary in the 
> > > > > > > waiting loop.
> > > >
> > > > How about this fix? I think users may want to cancel 
> > > > pg_is_wal_replay_paused() during
> > > > this is blocking.
> > >
> > > Yeah, we can do this.  I will send the updated patch 

Re: Deleting older versions in unique indexes to avoid page splits

2021-01-16 Thread Amit Kapila
On Wed, Jan 13, 2021 at 11:16 PM Peter Geoghegan  wrote:
>
> On Mon, Jan 11, 2021 at 9:26 PM Peter Geoghegan  wrote:
> > I'm going to proceed with committing the original version of the patch
> > -- I feel that this settles it.
>
> Pushed both patches from the patch series just now.
>

Nice work! I briefly read the commits and have a few questions.

Do we do this optimization (bottom-up deletion) only when the last
item which can lead to page split has indexUnchanged set to true? If
so, what if the last tuple doesn't have indexUnchanged but the
previous ones do have?

Why are we using terminology bottom-up deletion and why not simply
duplicate version deletion or something on those lines?

There is the following comment in the code:
'Index AM/tableam coordination is central to the design of bottom-up
index deletion.  The index AM provides hints about where to look to
the tableam by marking some entries as "promising".  Index AM does
this with duplicate index tuples that are strongly suspected to be old
versions left behind by UPDATEs that did not logically modify indexed
values.'

How do we distinguish between version duplicate tuples (added due to
the reason that some other index column is updated) or duplicate
tuples (added because a user has inserted a row with duplicate value)
for the purpose of bottom-up deletion?  I think we need to distinguish
between them because in the earlier case we can remove the old version
of the tuple in the index but not in later. Or is it the case that
indexAm doesn't differentiate between them but relies on heapAM to
give the correct answer?

-- 
With Regards,
Amit Kapila.




Re: Is Recovery actually paused?

2021-01-16 Thread Dilip Kumar
On Sun, Jan 17, 2021 at 3:52 AM Masahiko Sawada  wrote:
>
> On Sat, Jan 16, 2021 at 12:28 PM Masahiko Sawada  
> wrote:
> >
> > ---
> > +   /* test for recovery pause if user has requested the pause */
> > +   if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
> > +   recoveryPausesHere(false);
> > +
> > +   now = GetCurrentTimestamp();
> > +
> >
> > Hmm, if the recovery pauses here, the wal receiver isn't launched even
> > when wal_retrieve_retry_interval has passed, which seems not good. I
> > think we want the recovery to be paused but want the wal receiver to
> > continue receiving WAL.
>
> I had misunderstood the code and the patch, please ignore this
> comment. Pausing the recovery here is fine with me.

Thanks for the review Sawada-San,  I will work on your other comments
and post the patch.

-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com




Re: PoC/WIP: Extended statistics on expressions

2021-01-16 Thread Justin Pryzby
On Sun, Jan 17, 2021 at 01:23:39AM +0100, Tomas Vondra wrote:
> diff --git a/doc/src/sgml/ref/create_statistics.sgml 
> b/doc/src/sgml/ref/create_statistics.sgml
> index 4363be50c3..5b8eb8d248 100644
> --- a/doc/src/sgml/ref/create_statistics.sgml
> +++ b/doc/src/sgml/ref/create_statistics.sgml
> @@ -21,9 +21,13 @@ PostgreSQL documentation
>  
>   
>  
> +CREATE STATISTICS [ IF NOT EXISTS ]  class="parameter">statistics_name
> +ON ( expression )
> +FROM table_name

>  CREATE STATISTICS [ IF NOT EXISTS ]  class="parameter">statistics_name
>  [ ( statistics_kind [, ... 
> ] ) ]
> -ON column_name, 
> column_name [, ...]
> +ON { column_name | ( 
> expression ) } [, ...]
>  FROM table_name
>  

I think this part is wrong, since it's not possible to specify a single column
in form#2.

If I'm right, the current way is:

 - form#1 allows expression statistics of a single expression, and doesn't
   allow specifying "kinds";

 - form#2 allows specifying "kinds", but always computes expression statistics,
   and requires multiple columns/expressions.

So it'd need to be column_name|expression, column_name|expression [,...]

> @@ -39,6 +43,16 @@ CREATE STATISTICS [ IF NOT EXISTS ]  class="parameter">statistics_na
> database and will be owned by the user issuing the command.
>
>  
> +  
> +   The CREATE STATISTICS command has two basic forms. The
> +   simple variant allows building statistics for a single expression, does
> +   not allow specifying any statistics kinds and provides benefits similar
> +   to an expression index. The full variant allows defining statistics 
> objects
> +   on multiple columns and expressions, and selecting which statistics kinds 
> will
> +   be built. The per-expression statistics are built automatically when there
> +   is at least one expression.
> +  

> +   
> +expression
> +
> + 
> +  The expression to be covered by the computed statistics. In this case
> +  only a single expression is required, in which case only the expression
> +  statistics kind is allowed. The order of expressions is insignificant.

I think this part is wrong now ?
I guess there's no user-facing expression "kind" left in the CREATE command.
I guess "in which case" means "if only one expr is specified".
"expression" could be either form#1 or form#2.

Maybe it should just say:
> +  An expression to be covered by the computed statistics.

Maybe somewhere else, say:
> In the second form of the command, the order of expressions is insignificant.

-- 
Justin




RE: libpq debug log

2021-01-16 Thread tsunakawa.ta...@fujitsu.com
From: iwata@fujitsu.com 
> This patch includes these items:

Is there anything else in this revision that is not handled?

Below are my comments.

Also, why don't you try running the regression tests with a temporary 
modification to PQtrace() to output the trace to a file?  The sole purpose is 
to confirm that this patch makes the test crash (core dump).


(1)
-   conn->Pfdebug = debug_port;
+   if (pqTraceInit(conn))
+   {
+   conn->Pfdebug = debug_port;
+   if (conn->Pfdebug != NULL)
+   setlinebuf(conn->Pfdebug);
+   }
+   else
+   {
+   /* XXX report ENOMEM? */
+   fprintf(conn->Pfdebug, "Failed to initialize trace support\n");
+   fflush(conn->Pfdebug);
+   }
 }

* When the passed debug_port is NULL, the function should return after calling 
PQuntrace().

* Is setlinebuf() available on Windows?  Maybe you should use setvbuf() instead.

* I don't see the need for separate pqTraceInit() function, because it is only 
called here.


(2)
+bool
+pqTraceInit(PGconn *conn)
+{
+   /* already done? */
+   if (conn->be_msg != NULL)
+   {

What's this code for?  I think it's sufficient that PQuntrace() frees b_msg and 
PQtrace() always allocates b_msg.


(3)
+   conn->fe_msg = malloc(sizeof(pqFrontendMessage) +
+   MAX_FRONTEND_MSGS * 
sizeof(pqFrontendMessageField));
+   conn->fe_msg->field_num = 0;
+   if (conn->fe_msg == NULL)
+   {
+   free(conn->be_msg);
+   /* NULL out for the case that fe_msg malloc fails */
+   conn->be_msg = NULL;
+   return false;
+   }
+   conn->fe_msg->field_num = 0;

The memory for the fields array is one entry larger than necessary.  The 
allocation code would be:

+   conn->fe_msg = malloc(offsetof(pqFrontendMessage, fields) +
+   MAX_FRONTEND_MSGS * 
sizeof(pqFrontendMessageField));

Also, the line with "field_num = 0" appears twice.  The first one should be 
removed.


(4)
+static void
+pqLogMessageByte1(PGconn *conn, char v, PGCommSource commsource)
+{
...
+   if (PG_PROTOCOL_MAJOR(conn->pversion) >= 3)
+   {

I think you can remove the protocol version check in various logging functions, 
because PQtrace() disables logging when the protocol version is 2.


(5)
@@ -966,10 +966,6 @@ pqSaveParameterStatus(PGconn *conn, const char *name, 
const char *value)
pgParameterStatus *pstatus;
pgParameterStatus *prev;
 
-   if (conn->Pfdebug)
-   fprintf(conn->Pfdebug, "pqSaveParameterStatus: '%s' = '%s'\n",
-   name, value);
-

Where is this information output instead?


(6)
+ * Set up state so that we can trace. NB -- this might be called mutiple

mutiple -> multiple


(7)
+   LOG_FIRST_BYTE, /* logging the first byte 
identifing the
+  protocol 
message type */

identifing -> identifying


(8)
+#define pqLogMsgByte1(CONN, CH, SOURCE) \
+((CONN)->Pfdebug ? pqLogMessageByte1(CONN, CH, SOURCE) : 0)

For functions that return void, follow the style of CHECK_FOR_INTERRUPTS() 
defined in miscadmin.h.


(9)
+   currtime = time(NULL);
+   tmp = localtime();
+   strftime(timestr, sizeof(timestr), "%Y-%m-%d 
%H:%M:%S %Z", tmp);
+
+   fprintf(conn->Pfdebug, "%s %s ", timestr, 
message_direction);

It's better to have microsecond accuracy, because knowing the accumulation of 
such fine-grained timings may help to troubleshoot heavily-loaded client cases. 
 You can mimic setup_formatted_log_time() in src/backend/utils/error/elog.c.  
This is used for the %m marker in log_line_prefix.


(10)
I think it's better to use tabs (or any other character that is less likely to 
appear in the log field) as a field separator, because it makes it easier to 
process the log with a spreadsheet or some other tools.


(11)
+   /*
+* True type of message tagged '\0' is known 
when next 4 bytes is
+* checked. So we delay printing message type 
to pqLogMsgInt()
+*/
+   if (v != '\0')
+   fprintf(conn->Pfdebug, "%s ",
+   
pqGetProtocolMsgType((unsigned char) v, commsource));

In what cases does '\0' appear as a message type?


(12)
+static const char *
+pqGetProtocolMsgType(unsigned char c, PGCommSource commsource)
+{
+   if (commsource == FROM_BACKEND && c < lengthof(protocol_message_type_b))
+   return protocol_message_type_b[c];
+   else if (commsource == FROM_FRONTEND && c < 

RE: Disable WAL logging to speed up data loading

2021-01-16 Thread tsunakawa.ta...@fujitsu.com
From: Osumi, Takamichi/大墨 昂道 
> I updated my patch to take in those feedbacks !
> Have a look at the latest patch.

Looks good to me (the status remains ready for committer.)


Regards
Takayuki Tsunakawa





Re: PoC/WIP: Extended statistics on expressions

2021-01-16 Thread Zhihong Yu
Hi,
+* Check that only the base rel is mentioned.  (This should be dead code
+* now that add_missing_from is history.)
+*/
+   if (list_length(pstate->p_rtable) != 1)

If it is dead code, it can be removed, right ?

For statext_mcv_clauselist_selectivity:

+   // bms_free(list_attnums[listidx]);

The commented line can be removed.

+bool
+examine_clause_args2(List *args, Node **exprp, Const **cstp, bool
*expronleftp)

Better add some comment for examine_clause_args2 since there
is examine_clause_args() already.

+   if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)

When would stats->minrows have negative value ?

For serialize_expr_stats():

+   sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+   /* lookup OID of composite type for pg_statistic */
+   typOid = get_rel_type_id(StatisticRelationId);
+   if (!OidIsValid(typOid))
+   ereport(ERROR,

Looks like the table_open() call can be made after the typOid check.

+   Datum   values[Natts_pg_statistic];
+   boolnulls[Natts_pg_statistic];
+   HeapTuple   stup;
+
+   if (!stats->stats_valid)

It seems the local arrays can be declared after the validity check.

+   if (enabled[i] == STATS_EXT_NDISTINCT)
+   ndistinct_enabled = true;
+   if (enabled[i] == STATS_EXT_DEPENDENCIES)
+   dependencies_enabled = true;
+   if (enabled[i] == STATS_EXT_MCV)

the second and third if should be preceded with 'else'

+ReleaseDummy(HeapTuple tuple)
+{
+   pfree(tuple);

Since ReleaseDummy() is just a single pfree call, maybe you don't need this
method - call pfree in its place.

Cheers

On Sat, Jan 16, 2021 at 4:24 PM Tomas Vondra 
wrote:

> On 1/17/21 12:22 AM, Justin Pryzby wrote:
> > On Sat, Jan 16, 2021 at 05:48:43PM +0100, Tomas Vondra wrote:
> >> +  
> >> +   expr text
> >> +  
> >> +  
> >> +   Expression the extended statistics is defined on
> >> +  
> >
> > Expression the extended statistics ARE defined on
> > Or maybe say "on which the extended statistics are defined"
> >
>
> I'm pretty sure "is" is correct because "expression" is singular.
>
> >> +  
> >> +   The CREATE STATISTICS command has two basic
> forms. The
> >> +   simple variant allows to build statistics for a single expression,
> does
> >
> > .. ALLOWS BUILDING statistics for a single expression, AND does (or BUT
> does)
> >
> >> +   Expression statistics are per-expression and are similar to
> creating an
> >> +   index on the expression, except that they avoid the overhead of the
> index.
> >
> > Maybe say "overhead of index maintenance"
> >
>
> Yeah, that sounds better.
>
> >> +   All functions and operators used in a statistics definition must be
> >> +   immutable, that is, their results must depend only on
> >> +   their arguments and never on any outside influence (such as
> >> +   the contents of another table or the current time).  This
> restriction
> >
> > say "outside factor" or "external factor"
> >
>
> In fact, we've removed the immutability restriction, so this paragraph
> should have been removed.
>
> >> +   results of those expression, and uses default estimates as
> illustrated
> >> +   by the first query.  The planner also does not realize the value of
> the
> >
> > realize THAT
> >
>
> OK, changed.
>
> >> +   second column fully defines the value of the other column, because
> date
> >> +   truncated to day still identifies the month. Then expression and
> >> +   ndistinct statistics are built on those two columns:
> >
> > I got an error doing this:
> >
> > CREATE TABLE t AS SELECT generate_series(1,9) AS i;
> > CREATE STATISTICS s ON (i+1) ,(i+1+0) FROM t;
> > ANALYZE t;
> > SELECT i+1 FROM t GROUP BY 1;
> > ERROR:  corrupt MVNDistinct entry
> >
>
> Thanks. There was a thinko in estimate_multivariate_ndistinct, resulting
> in mismatching the ndistinct coefficient items. The attached patch fixes
> that, but I've realized the way we pick the "best" statistics may need
> some improvements (I added an XXX comment about that).
>
>
> regards
>
> --
> Tomas Vondra
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>


Calculation of relids (pull_varnos result) for PlaceHolderVars

2021-01-16 Thread Tom Lane
I've been looking into the planner failure reported at [1].
The given test case is comparable to this query in the
regression database:

regression=# select i8.*, ss.v, t.unique2
  from int8_tbl i8
left join int4_tbl i4 on i4.f1 = 1
left join lateral (select i4.f1 + 1 as v) as ss on true
left join tenk1 t on t.unique2 = ss.v
where q2 = 456;
ERROR:  failed to assign all NestLoopParams to plan nodes

The core of the problem turns out to be that pull_varnos() returns
an incorrect result for the PlaceHolderVar that represents the ss.v
output.  Because the only Var within the PHV's expression is i4.f1,
pull_varnos() returns just the relid set (2), implying that the
value can be calculated after having scanned only the i4 relation.
But that's wrong: i4.f1 here represents an outer-join output, so it
can only be computed after forming the join (1 2) of i8 and i4.
In this example, the erroneous calculation leads the planner to
construct a plan with an invalid join order, which triggers a
sanity-check failure in createplan.c.

The relid set (2) is the minimum possible join level at which such
a PHV could be evaluated; (1 2) is the maximum level, corresponding
to the PHV's syntactic position above the i8/i4 outer join.  After
thinking about this I've realized that what pull_varnos() ideally
ought to use is the PHV's ph_eval_at level, which is the join level
we actually intend to evaluate it at.  There are a couple of
problems, one that's not too awful and one that's a pain in the
rear:

1. pull_varnos() can be used before we've calculated ph_eval_at, as
well as during deconstruct_jointree() which can change ph_eval_at.
This doesn't seem fatal.  We can fall back to the conservative
assumption of using the syntactic level if the PlaceHolderInfo isn't
there yet.  Once it is (i.e., within deconstruct_jointree()) I think
we are okay, because any given PHV's ph_eval_at should have reached
its final value before we consider any qual involving that PHV.

2. pull_varnos() is not passed the planner "root" data structure,
so it can't get at the PlaceHolderInfo list.  We can change its
API of course, but that propagates to dozens of places.

The 0001 patch attached goes ahead and makes those API changes.
I think this is perfectly reasonable to do in HEAD, but it most
likely is an unacceptable API/ABI break for the back branches.
There's one change needed in contrib/postgres_fdw, and other
extensions likely call one or more of the affected functions too.

As an alternative back-branch fix, we could consider the 0002
patch attached, which simply changes pull_varnos() to make the
most conservative assumption that ph_eval_at could wind up as
the PHV's syntactic level (phrels).  The trouble with this is
that we'll lose some valid optimizations.  There is only one
visible plan change in the regression tests, but it's kind of
unpleasant: we fail to remove a join that we did remove before.
So I'm not sure how much of a problem this'd be in the field.

A third way is to preserve the existing pull_varnos() API in
the back branches, changing all the internal calls to use a
new function that has the additional "root" parameter.  This
seems feasible but I've not attempted to code it yet.

(We might be able to get rid of a lot of this mess if I ever
finish the changes I have in mind to represent outer-join outputs
more explicitly.  That seems unlikely to happen for v14 at this
point, and it'd certainly never be back-patchable.)

One loose end is that I'm not sure how far to back-patch.  The
given test case only fails back to v12.  I've not bisected, but
I suspect that the difference is the v12-era changes to collapse out
trivial Result RTEs (4be058fe9 and follow-ons), such as the lateral
sub-select in this test case.  The pull_varnos() calculation is
surely just as wrong for a long time before that, but perhaps it's
only a latent bug before that?  I've not managed to construct a test
case that fails in v11, but I don't have a lot of confidence that
there isn't one.

Thoughts?

regards, tom lane

[1] 
https://www.postgresql.org/message-id/237d2b72-6dd0-7b24-3a6f-94288cd44...@bfw-online.de
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 2f2d4d171c..48c2f23ae0 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5709,7 +5709,8 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
 			 * RestrictInfos, so we must make our own.
 			 */
 			Assert(!IsA(expr, RestrictInfo));
-			rinfo = make_restrictinfo(expr,
+			rinfo = make_restrictinfo(root,
+	  expr,
 	  true,
 	  false,
 	  false,
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 4f5b870d1b..d263ecf082 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -227,7 +227,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
 			}
 			else
 		

Re: list of extended statistics on psql

2021-01-16 Thread Tomas Vondra

On 1/17/21 2:41 AM, Shinoda, Noriyoshi (PN Japan FSIP) wrote:

Hi, hackers.

I tested this committed feature.
It doesn't seem to be available to non-superusers due to the inability to 
access pg_statistics_ext_data.
Is this the expected behavior?



Hmmm, that's a good point. Bummer we haven't noticed that earlier.

I wonder what the right fix should be - presumably we could do something 
like pg_stats_ext (we can't use that view directly, because it formats 
the data, so the sizes are different).


But should it list just the stats the user has access to, or should it 
list everything and leave the inaccessible fields NULL?



regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Why does create_gather_merge_plan need make_sort?

2021-01-16 Thread Tomas Vondra
For the record, this got committed as 6bc27698324 in December, along 
with the other incremental sort fixes and improvements. I forgot to mark 
it as committed in the app, so I've done that now.



regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




RE: list of extended statistics on psql

2021-01-16 Thread Shinoda, Noriyoshi (PN Japan FSIP)
Hi, hackers.

I tested this committed feature. 
It doesn't seem to be available to non-superusers due to the inability to 
access pg_statistics_ext_data. 
Is this the expected behavior?

--- operation ---
postgres=> CREATE STATISTICS stat1_data1 ON c1, c2 FROM data1;
CREATE STATISTICS
postgres=> ANALYZE data1;
ANALYZE
postgres=> SELECT * FROM pg_statistic_ext;
  oid  | stxrelid |   stxname   | stxnamespace | stxowner | stxstattarget | 
stxkeys | stxkind
---+--+-+--+--+---+-+-
 16393 |16385 | stat1_data1 | 2200 |16384 |-1 | 1 2 
| {d,f,m}
(1 row)

postgres=> \dX
ERROR:  permission denied for table pg_statistic_ext_data
postgres=>
postgres=> \connect postgres postgres
You are now connected to database "postgres" as user "postgres".
postgres=#
postgres=# \dX
   List of extended statistics
 Schema |Name |Definition | Ndistinct | Dependencies |MCV
+-+---+---+--+---
 public | stat1_data1 | c1, c2 FROM data1 | built | built| requested
(1 row)

--- operation ---

Regards,
Noriyoshi Shinoda

-Original Message-
From: Tomas Vondra [mailto:tomas.von...@enterprisedb.com] 
Sent: Sunday, January 17, 2021 8:32 AM
To: Julien Rouhaud ; Tatsuro Yamada 

Cc: Alvaro Herrera ; Tomas Vondra 
; Michael Paquier ; Pavel 
Stehule ; PostgreSQL Hackers 

Subject: Re: list of extended statistics on psql



On 1/15/21 5:19 PM, Tomas Vondra wrote:
> 
> 
> On 1/15/21 9:47 AM, Julien Rouhaud wrote:
>> On Wed, Jan 13, 2021 at 10:22:05AM +0900, Tatsuro Yamada wrote:
>>> Hi Tomas,
>>>
>>> On 2021/01/13 7:48, Tatsuro Yamada wrote:
 On 2021/01/12 20:08, Tomas Vondra wrote:
> On 1/12/21 2:57 AM, Tatsuro Yamada wrote:
>> On 2021/01/09 9:01, Tomas Vondra wrote:
> ...>
>>> While working on that, I realized that 'defined' might be a bit 
>>> ambiguous, I initially thought it means 'NOT NULL' (which it does not).
>>> I propose to change it to 'requested' instead. Tatsuro, do you 
>>> agree, or do you think 'defined' is better?
>>
>> Regarding the status of extended stats, I think the followings:
>>
>>    - "defined": it shows the extended stats defined only. We 
>> can't know
>>     whether it needs to analyze or not. I agree this 
>> name was
>>      ambiguous. Therefore we should replace it with a 
>> more suitable
>>     name.
>>    - "requested": it shows the extended stats needs something. Of 
>> course,
>>     we know it needs to ANALYZE because we can create the 
>> patch.
>>     However, I feel there is a little ambiguity for DBA.
>>     To solve this, it would be better to write an 
>> explanation of
>>     the status in the document. For example,
>>
>> ==
>> The column of the kind of extended stats (e. g. Ndistinct) shows some 
>> statuses.
>> "requested" means that it needs to gather data by ANALYZE. 
>> "built" means ANALYZE
>>    was finished, and the planner can use it. NULL means that it doesn't 
>> exists.
>> ==
>>
>> What do you think? :-D
>>
>
> Yes, that seems reasonable to me. Will you provide an updated patch?


 Sounds good. I'll send the updated patch today.
>>>
>>>
>>>
>>> I updated the patch to add the explanation of the extended stats' statuses.
>>> Please feel free to modify the patch to improve it more clearly.
>>>
>>> The attached files are:
>>> 0001: Add psql \dx and the fixed document
>>> 0002: Regression test for psql \dX
>>> app-psql.html: Created by "make html" command (You can check the
>>>explanation of the statuses easily, probably)
>>
>> Hello Yamada-san,
>>
>> I reviewed the patch and don't have specific complaints, it all looks good!
>>
>> I'm however thinking about the "requested" status.  I'm wondering if 
>> it could lead to people think that an ANALYZE is scheduled and will happen 
>> soon.
>> Maybe "defined" or "declared" might be less misleading, or even 
>> "waiting for analyze"?
>>
> 
> Well, the "defined" option is not great either, because it can be 
> interpreted as "NOT NULL" - that's why I proposed "requested". Not 
> sure about "declared" - I wouldn't use it in this context, but I'm not 
> a native speaker so maybe it's OK.
> 

I've pushed this, keeping the "requested". If we decide that some other term is 
a better choice, we can tweak that later of course.

Thanks Tatsuro-san for the patience!


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Alter timestamp without timezone to with timezone rewrites rows

2021-01-16 Thread Noah Misch
On Sat, Jan 16, 2021 at 12:03:19PM -0500, Tom Lane wrote:
> Noah Misch  writes:
> > On Wed, Jan 13, 2021 at 10:28:26AM -0500, Tom Lane wrote:
> >> So a non-rewriting conversion would only be possible if local time is
> >> identical to UTC; which is true for few enough people that nobody has
> >> bothered with attempting the optimization.
> 
> > PostgreSQL 12 and later do have that optimization.  Example DDL:
> > https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=3c59263#patch4
> 
> Ah, I'd forgotten about that patch (obviously).  It is a kluge, without
> a doubt, since it has to hard-wire knowledge about the behavior of two
> specific conversions into what ought to be general-purpose code.  But
> I guess it is useful often enough to justify its existence.
> 
> I wonder whether it'd be worth moving this logic into a planner support
> function attached to those conversion functions?  I wouldn't bother
> right now, but if somebody else comes along with a similar proposal,
> we should think hard about that.

Yes.




Re: Key management with tests

2021-01-16 Thread Tom Kincaid
> > > I think that's not at all acceptable. I don't mind hashing out details
> > > on calls / off-list, but the design needs to be public, documented, and
> > > reviewable.  And if it's something the community can't understand, then
> > > it can't get in. We're going to have to maintain this going forward.
> >
> > OK, so we don't want it.  That's fine with me.
>
> That's not what I said...
>


 I think the majority of us believe that it is important we take this
first step towards a solid TDE implementation in PostgreSQL that is
built around the community processes which involves general consensus.

Before this feature falls into the “we will never do it because we
will never build consensus" category and community PostgreSQL
potentially gets locked out of more deployment scenarios that require
this feature I would like to see if I can help with this current
attempt at it. I will share that I am concerned that if the people who
have been involved in this to date can’t get this in, it will never
happen.

Admittedly I am a novice on this topic, and the majority of the
PostgreSQL source code, however I am hopeful enough (those of you who
know me understand that I suffer from eternal optimism) that I am
going to attempt to help.

Is there a design document for a Postgres feature of this size and
scope that people feel would serve as a good example? Alternatively,
is there a design document template that has been successfully used in
the past? I could guess based on things I have observed reading this
list for many years. However, if there is something that those who are
deeply involved in the development effort feel would suffice as an
example of a "good design document" or a "good design template"
sharing it would be greatly appreciated.




Re: list of extended statistics on psql

2021-01-16 Thread Tomas Vondra




On 1/15/21 5:19 PM, Tomas Vondra wrote:



On 1/15/21 9:47 AM, Julien Rouhaud wrote:

On Wed, Jan 13, 2021 at 10:22:05AM +0900, Tatsuro Yamada wrote:

Hi Tomas,

On 2021/01/13 7:48, Tatsuro Yamada wrote:

On 2021/01/12 20:08, Tomas Vondra wrote:

On 1/12/21 2:57 AM, Tatsuro Yamada wrote:

On 2021/01/09 9:01, Tomas Vondra wrote:

...>

While working on that, I realized that 'defined' might be a bit
ambiguous, I initially thought it means 'NOT NULL' (which it does not).
I propose to change it to 'requested' instead. Tatsuro, do you agree, or
do you think 'defined' is better?


Regarding the status of extended stats, I think the followings:

   - "defined": it shows the extended stats defined only. We can't know
    whether it needs to analyze or not. I agree this name was
     ambiguous. Therefore we should replace it with a more suitable
    name.
   - "requested": it shows the extended stats needs something. Of course,
    we know it needs to ANALYZE because we can create the patch.
    However, I feel there is a little ambiguity for DBA.
    To solve this, it would be better to write an explanation of
    the status in the document. For example,

==
The column of the kind of extended stats (e. g. Ndistinct) shows some statuses.
"requested" means that it needs to gather data by ANALYZE. "built" means ANALYZE
   was finished, and the planner can use it. NULL means that it doesn't exists.
==

What do you think? :-D



Yes, that seems reasonable to me. Will you provide an updated patch?



Sounds good. I'll send the updated patch today.




I updated the patch to add the explanation of the extended stats' statuses.
Please feel free to modify the patch to improve it more clearly.

The attached files are:
0001: Add psql \dx and the fixed document
0002: Regression test for psql \dX
app-psql.html: Created by "make html" command (You can check the
   explanation of the statuses easily, probably)


Hello Yamada-san,

I reviewed the patch and don't have specific complaints, it all looks good!

I'm however thinking about the "requested" status.  I'm wondering if it could
lead to people think that an ANALYZE is scheduled and will happen soon.
Maybe "defined" or "declared" might be less misleading, or even "waiting for
analyze"?



Well, the "defined" option is not great either, because it can be
interpreted as "NOT NULL" - that's why I proposed "requested". Not sure
about "declared" - I wouldn't use it in this context, but I'm not a
native speaker so maybe it's OK.



I've pushed this, keeping the "requested". If we decide that some other 
term is a better choice, we can tweak that later of course.


Thanks Tatsuro-san for the patience!


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: PoC/WIP: Extended statistics on expressions

2021-01-16 Thread Justin Pryzby
On Sat, Jan 16, 2021 at 05:48:43PM +0100, Tomas Vondra wrote:
> +  
> +   expr text
> +  
> +  
> +   Expression the extended statistics is defined on
> +  

Expression the extended statistics ARE defined on
Or maybe say "on which the extended statistics are defined"

> +  
> +   The CREATE STATISTICS command has two basic forms. The
> +   simple variant allows to build statistics for a single expression, does

.. ALLOWS BUILDING statistics for a single expression, AND does (or BUT does)

> +   Expression statistics are per-expression and are similar to creating an
> +   index on the expression, except that they avoid the overhead of the index.

Maybe say "overhead of index maintenance"

> +   All functions and operators used in a statistics definition must be
> +   immutable, that is, their results must depend only on
> +   their arguments and never on any outside influence (such as
> +   the contents of another table or the current time).  This restriction

say "outside factor" or "external factor"

> +   results of those expression, and uses default estimates as illustrated
> +   by the first query.  The planner also does not realize the value of the

realize THAT

> +   second column fully defines the value of the other column, because date
> +   truncated to day still identifies the month. Then expression and
> +   ndistinct statistics are built on those two columns:

I got an error doing this:

CREATE TABLE t AS SELECT generate_series(1,9) AS i;
CREATE STATISTICS s ON (i+1) ,(i+1+0) FROM t;
ANALYZE t;
SELECT i+1 FROM t GROUP BY 1;
ERROR:  corrupt MVNDistinct entry

-- 
Justin




Re: New Table Access Methods for Multi and Single Inserts

2021-01-16 Thread Jeff Davis


> If we agree on removing heap_multi_insert_v2 API and embed that logic
> inside heap_insert_v2, then we can do this - pass the required
> information and the functions ExecInsertIndexTuples and
> ExecARInsertTriggers as callbacks so that, whether or not
> heap_insert_v2 choses single or multi inserts, it can callback these
> functions with the required information passed after the flush. We
> can
> add the callback and required information into TableInsertState. But,
> I'm not quite sure, we would make ExecInsertIndexTuples and
> ExecARInsertTriggers.

How should the API interact with INSERT INTO ... SELECT? Right now it
doesn't appear to be integrated at all, but that seems like a fairly
important path for bulk inserts.

Regards,
Jeff Davis






Re: Is Recovery actually paused?

2021-01-16 Thread Masahiko Sawada
On Sat, Jan 16, 2021 at 12:28 PM Masahiko Sawada  wrote:
>
> ---
> +   /* test for recovery pause if user has requested the pause */
> +   if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
> +   recoveryPausesHere(false);
> +
> +   now = GetCurrentTimestamp();
> +
>
> Hmm, if the recovery pauses here, the wal receiver isn't launched even
> when wal_retrieve_retry_interval has passed, which seems not good. I
> think we want the recovery to be paused but want the wal receiver to
> continue receiving WAL.

I had misunderstood the code and the patch, please ignore this
comment. Pausing the recovery here is fine with me.

Regards,

-- 
Masahiko Sawada
EnterpriseDB:  https://www.enterprisedb.com/




Re: COPY FREEZE and setting PD_ALL_VISIBLE/visibility map bits

2021-01-16 Thread Tomas Vondra

On 1/16/21 4:11 PM, Anastasia Lubennikova wrote:



> ...


As Pavan correctly figured it out before the problem is that 
RelationGetBufferForTuple() moves to the next page, losing free space in 
the block:


 > ... I see that a relcache invalidation arrives
 > after 1st and then after every 32672th block is filled. That clears the
 > rel->rd_smgr field and we lose the information about the saved target
 > block. The code then moves to extend the relation again and thus 
skips the
 > previously less-than-half filled block, losing the free space in that 
block.


The reason of this cache invalidation is vm_extend() call, which happens 
every 32762 blocks.


RelationGetBufferForTuple() tries to use the last page, but for some 
reason this code is under 'use_fsm' check. And COPY FROM doesn't use fsm 
(see TABLE_INSERT_SKIP_FSM).



         /*
          * If the FSM knows nothing of the rel, try the last page 
before we
          * give up and extend.  This avoids one-tuple-per-page syndrome 
during

          * bootstrapping or in a recently-started system.
          */
         if (targetBlock == InvalidBlockNumber)
         {
             BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
             if (nblocks > 0)
                 targetBlock = nblocks - 1;
         }


I think we can use this code without regard to 'use_fsm'. With this 
change, the number of toast rel pages is correct. The patch is attached.




Thanks for the updated patch, this version looks OK to me - I've marked 
it as RFC. I'll do a bit more testing, review, and then I'll get it 
committed.


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Outdated replication protocol error?

2021-01-16 Thread Jeff Davis
On Fri, 2021-01-15 at 13:55 -0800, Andres Freund wrote:
> Has anybody dug out the thread leading to the commit?


https://www.postgresql.org/message-id/CAMsr%2BYEN04ztb%2BSDb_UfD72Kg5M3F%2BCpad31QBKT2rRjysmxRg%40mail.gmail.com

Regards,
Jeff Davis






Re: Printing backtrace of postgres processes

2021-01-16 Thread Tom Lane
vignesh C  writes:
> On Sat, Jan 16, 2021 at 1:40 AM Andres Freund  wrote:
>> Why is a full signal needed? Seems the procsignal infrastructure should
>> suffice?

> Most of the processes have access to ProcSignal, for these processes
> printing of callstack signal was handled by using ProcSignal. Pgstat
> process & syslogger process do not have access to ProcSignal,
> multiplexing with SIGUSR1 is not possible for these processes. So I
> handled the printing of callstack for pgstat process & syslogger using
> the SIGUSR2 signal.

I'd argue that backtraces for those processes aren't really essential,
and indeed that trying to make the syslogger report its own backtrace
is damn dangerous.

(Personally, I think this whole patch fails the safety-vs-usefulness
tradeoff, but I expect I'll get shouted down.)

regards, tom lane




Re: trailing junk in numeric literals

2021-01-16 Thread Tom Lane
Vik Fearing  writes:
> With respect, you are looking at a 10-year-old document and I am not.
> 5.3  has since been modified.

Is a newer version of the spec available online?

regards, tom lane




Re: LET clause

2021-01-16 Thread Joel Jacobson
On Sat, Jan 16, 2021, at 17:21, Vik Fearing wrote:
> I agree on the conciseness, but I'm wondering what performance problem
> you think there is with the CROSS JOIN LATERAL VALUES technique.  Have
> you tried running an EXPLAIN (ANALYZE, VERBOSE) on that?

Yes, I've tried, it results in the same problem due to flattening:

PREPARE calc_easter_day_for_year AS
SELECT make_date($1::integer, easter_month, easter_day)
FROM (VALUES ($1::integer % 19, $1::integer / 100)) AS step1(g,c)
CROSS JOIN LATERAL (VALUES ((c - c/4 - (8*c + 13)/25 + 19*g + 15) % 30)) AS 
step2(h)
CROSS JOIN LATERAL (VALUES (h - (h/28)*(1 - (h/28)*(29/(h + 1))*((21 - 
g)/11 AS step3(i)
CROSS JOIN LATERAL (VALUES (($1::integer + $1::integer/4 + i + 2 - c + c/4) % 
7)) AS step4(j)
CROSS JOIN LATERAL (VALUES (i - j)) AS step5(p)
CROSS JOIN LATERAL (VALUES (3 + (p + 26)/30, 1 + (p + 27 + (p + 6)/40) % 31)) 
AS step6(easter_month, easter_day)
;

SET plan_cache_mode = 'force_generic_plan';

EXPLAIN (ANALYZE, VERBOSE) EXECUTE calc_easter_day_for_year(2021);

Result  (cost=0.00..1.14 rows=1 width=4) (actual time=1.612..1.616 rows=1 
loops=1)
   Output: make_date($1, (3 + (($1 / 100) - (($1 / 100) / 4)) - (((8 * 
($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) - $1 / 100) - 
(($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 
30) / 28) * (1 - ($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 
13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * (29 / ((($1 / 100) - 
(($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 
30) + 1))) * ((21 - ($1 % 19)) / 11) - (($1 + ($1 / 4)) + ((($1 / 
100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) 
+ 15) % 30) - $1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) 
/ 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * (1 - ($1 / 100) - (($1 / 
100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 
28) * (29 / ((($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 
25)) + (19 * ($1 % 19))) + 15) % 30) + 1))) * ((21 - ($1 % 19)) / 11)) + 2) 
- ($1 / 100)) + (($1 / 100) / 4)) % 7)) + 26) / 30)), (1 + ((($1 / 100) 
- (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) 
% 30) - $1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) 
+ (19 * ($1 % 19))) + 15) % 30) / 28) * (1 - ($1 / 100) - (($1 / 100) / 
4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * 
(29 / ((($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + 
(19 * ($1 % 19))) + 15) % 30) + 1))) * ((21 - ($1 % 19)) / 11) - (($1 + 
($1 / 4)) + ((($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 
25)) + (19 * ($1 % 19))) + 15) % 30) - $1 / 100) - (($1 / 100) / 4)) - 
(((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * (1 - 
($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 
* ($1 % 19))) + 15) % 30) / 28) * (29 / ((($1 / 100) - (($1 / 100) / 4)) - 
(((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) + 1))) * ((21 - 
($1 % 19)) / 11)) + 2) - ($1 / 100)) + (($1 / 100) / 4)) % 7)) + 27) + 
(($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 
* ($1 % 19))) + 15) % 30) - $1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 
/ 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * (1 - ($1 / 
100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) 
+ 15) % 30) / 28) * (29 / ((($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 
100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) + 1))) * ((21 - ($1 % 19)) / 
11) - (($1 + ($1 / 4)) + ((($1 / 100) - (($1 / 100) / 4)) - (((8 * 
($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 30) - $1 / 100) - 
(($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 
30) / 28) * (1 - ($1 / 100) - (($1 / 100) / 4)) - (((8 * ($1 / 100)) + 
13) / 25)) + (19 * ($1 % 19))) + 15) % 30) / 28) * (29 / ((($1 / 100) - 
(($1 / 100) / 4)) - (((8 * ($1 / 100)) + 13) / 25)) + (19 * ($1 % 19))) + 15) % 
30) + 1))) * ((21 - ($1 % 19)) / 11)) + 2) - ($1 / 100)) + (($1 / 100) / 
4)) % 7)) + 6) / 40)) % 31)))
Planning Time: 6.132 ms
Execution Time: 2.094 ms
(4 rows)


Re: WIP: System Versioned Temporal Table

2021-01-16 Thread Vik Fearing
On 1/16/21 7:39 PM, Surafel Temesgen wrote:
> On Fri, Jan 15, 2021 at 8:02 PM Simon Riggs 
> wrote:
> 
>>
>> There are no existing applications, so for PostgreSQL, it wouldn't be an
>> issue.
>>
>>
> Yes we don't have but the main function of ALTER TABLE foo ADD SYSTEM
> VERSIONING
> is to add system versioning functionality to existing application

I haven't looked at this patch in a while, but I hope that ALTER TABLE t
ADD SYSTEM VERSIONING is not adding any columns.  That is a bug if it does.
-- 
Vik Fearing




Re: WIP: System Versioned Temporal Table

2021-01-16 Thread Surafel Temesgen
On Fri, Jan 15, 2021 at 8:02 PM Simon Riggs 
wrote:

>
> There are no existing applications, so for PostgreSQL, it wouldn't be an
> issue.
>
>
Yes we don't have but the main function of ALTER TABLE foo ADD SYSTEM
VERSIONING
is to add system versioning functionality to existing application

regards
Surafel


Re: Printing backtrace of postgres processes

2021-01-16 Thread Andres Freund
Hi,

On Sat, Jan 16, 2021, at 09:34, vignesh C wrote:
> On Sat, Jan 16, 2021 at 1:40 AM Andres Freund  wrote:
> >
> > On 2021-01-15 09:53:05 +0100, Peter Eisentraut wrote:
> > > On 2020-12-08 10:38, vignesh C wrote:
> > > > I have implemented printing of backtrace based on handling it in
> > > > CHECK_FOR_INTERRUPTS. This patch also includes the change to allow
> > > > getting backtrace of any particular process based on the suggestions.
> > > > Attached patch has the implementation for the same.
> > > > Thoughts?
> > >
> > > Are we willing to use up a signal for this?
> >
> > Why is a full signal needed? Seems the procsignal infrastructure should
> > suffice?
> 
> Most of the processes have access to ProcSignal, for these processes
> printing of callstack signal was handled by using ProcSignal. Pgstat
> process & syslogger process do not have access to ProcSignal,
> multiplexing with SIGUSR1 is not possible for these processes. So I
> handled the printing of callstack for pgstat process & syslogger using
> the SIGUSR2 signal.
> This is because shared memory is detached before pgstat & syslogger
> process is started by using the below:
> /* Drop our connection to postmaster's shared memory, as well */
> dsm_detach_all();
> PGSharedMemoryDetach();

Sure. But why is it important enough to support those that we are willing to 
dedicate a signal to the task? Their backtraces aren't often interesting, so I 
think we should just ignore them here.

Andres




Re: Printing backtrace of postgres processes

2021-01-16 Thread vignesh C
On Sat, Jan 16, 2021 at 1:40 AM Andres Freund  wrote:
>
> On 2021-01-15 09:53:05 +0100, Peter Eisentraut wrote:
> > On 2020-12-08 10:38, vignesh C wrote:
> > > I have implemented printing of backtrace based on handling it in
> > > CHECK_FOR_INTERRUPTS. This patch also includes the change to allow
> > > getting backtrace of any particular process based on the suggestions.
> > > Attached patch has the implementation for the same.
> > > Thoughts?
> >
> > Are we willing to use up a signal for this?
>
> Why is a full signal needed? Seems the procsignal infrastructure should
> suffice?

Most of the processes have access to ProcSignal, for these processes
printing of callstack signal was handled by using ProcSignal. Pgstat
process & syslogger process do not have access to ProcSignal,
multiplexing with SIGUSR1 is not possible for these processes. So I
handled the printing of callstack for pgstat process & syslogger using
the SIGUSR2 signal.
This is because shared memory is detached before pgstat & syslogger
process is started by using the below:
/* Drop our connection to postmaster's shared memory, as well */
dsm_detach_all();
PGSharedMemoryDetach();

Regards,
Vignesh
EnterpriseDB: http://www.enterprisedb.com




Re: trailing junk in numeric literals

2021-01-16 Thread Vik Fearing
On 1/16/21 6:10 PM, Tom Lane wrote:
> Vik Fearing  writes:
>> On 1/16/21 4:32 PM, Andreas Karlsson wrote:
>>> On 1/16/21 2:02 PM, Vik Fearing wrote:
 I am in favor of such a change so that we can also accept 1_000_000
 which currently parses as "1 AS _000_000" (which also isn't compliant
 because identifiers cannot start with an underscore, but I don't want to
 take it that far).
 It would also allow us to have 0xdead_beef, 0o_777, and 0b1010__1110
 without most of it being interpreted as an alias.
> 
>>> That would be a nice feature. Is it part of the SQL standard?
> 
>> Yes, all of that is in the standard.
> 
> Really?  Please cite chapter and verse.  AFAICS in SQL:2011 5.3 ,
> a numeric literal can't contain any extraneous characters, just sign,
> digits, optional decimal point, and optional exponent.  Hex and octal
> literals are certainly not there either.

With respect, you are looking at a 10-year-old document and I am not.

5.3  has since been modified.
-- 
Vik Fearing




Re: trailing junk in numeric literals

2021-01-16 Thread Tom Lane
Vik Fearing  writes:
> On 1/16/21 4:32 PM, Andreas Karlsson wrote:
>> On 1/16/21 2:02 PM, Vik Fearing wrote:
>>> I am in favor of such a change so that we can also accept 1_000_000
>>> which currently parses as "1 AS _000_000" (which also isn't compliant
>>> because identifiers cannot start with an underscore, but I don't want to
>>> take it that far).
>>> It would also allow us to have 0xdead_beef, 0o_777, and 0b1010__1110
>>> without most of it being interpreted as an alias.

>> That would be a nice feature. Is it part of the SQL standard?

> Yes, all of that is in the standard.

Really?  Please cite chapter and verse.  AFAICS in SQL:2011 5.3 ,
a numeric literal can't contain any extraneous characters, just sign,
digits, optional decimal point, and optional exponent.  Hex and octal
literals are certainly not there either.

regards, tom lane




Re: Alter timestamp without timezone to with timezone rewrites rows

2021-01-16 Thread Tom Lane
Noah Misch  writes:
> On Wed, Jan 13, 2021 at 10:28:26AM -0500, Tom Lane wrote:
>> So a non-rewriting conversion would only be possible if local time is
>> identical to UTC; which is true for few enough people that nobody has
>> bothered with attempting the optimization.

> PostgreSQL 12 and later do have that optimization.  Example DDL:
> https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=3c59263#patch4

Ah, I'd forgotten about that patch (obviously).  It is a kluge, without
a doubt, since it has to hard-wire knowledge about the behavior of two
specific conversions into what ought to be general-purpose code.  But
I guess it is useful often enough to justify its existence.

I wonder whether it'd be worth moving this logic into a planner support
function attached to those conversion functions?  I wouldn't bother
right now, but if somebody else comes along with a similar proposal,
we should think hard about that.

regards, tom lane




Re: trailing junk in numeric literals

2021-01-16 Thread Vik Fearing
On 1/16/21 4:32 PM, Andreas Karlsson wrote:
> On 1/16/21 2:02 PM, Vik Fearing wrote:
>> I am in favor of such a change so that we can also accept 1_000_000
>> which currently parses as "1 AS _000_000" (which also isn't compliant
>> because identifiers cannot start with an underscore, but I don't want to
>> take it that far).
>>
>> It would also allow us to have 0xdead_beef, 0o_777, and 0b1010__1110
>> without most of it being interpreted as an alias.
> 
> That would be a nice feature. Is it part of the SQL standard?

Yes, all of that is in the standard.
-- 
Vik Fearing




Re: recovering from "found xmin ... from before relfrozenxid ..."

2021-01-16 Thread Andrey Borodin
Hi hackers!

Does anyone maintain opensource pg_surgery analogs for released versions of PG?
It seems to me I'll have to use something like this and I just though that I 
should consider pg_surgery in favour of our pg_dirty_hands.

Thanks!

Best regards, Andrey Borodin.



Re: trailing junk in numeric literals

2021-01-16 Thread Andreas Karlsson

On 1/16/21 2:02 PM, Vik Fearing wrote:

I am in favor of such a change so that we can also accept 1_000_000
which currently parses as "1 AS _000_000" (which also isn't compliant
because identifiers cannot start with an underscore, but I don't want to
take it that far).

It would also allow us to have 0xdead_beef, 0o_777, and 0b1010__1110
without most of it being interpreted as an alias.


That would be a nice feature. Is it part of the SQL standard?

Andreas





Re: COPY FREEZE and setting PD_ALL_VISIBLE/visibility map bits

2021-01-16 Thread Anastasia Lubennikova

On 12.01.2021 22:30, Tomas Vondra wrote:
Thanks. These patches seem to resolve the TOAST table issue, freezing 
it as expected. I think the code duplication is not an issue, but I 
wonder why heap_insert uses this condition:


    /*
 * ...
 *
 * No need to update the visibilitymap if it had all_frozen bit set
 * before this insertion.
 */
    if (all_frozen_set && ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0))

while heap_multi_insert only does this:

    if (all_frozen_set) { ... }

I haven't looked at the details, but shouldn't both do the same thing?



I decided to add this check for heap_insert() to avoid unneeded calls of 
visibilitymap_set(). If we insert tuples one by one, we can only call 
this once per page.
In my understanding, heap_multi_insert() inserts tuples in batches, so 
it doesn't need this optimization.





However, I've also repeated the test counting all-frozen pages in both 
the main table and TOAST table, and I get this:


patched
===

select count(*) from pg_visibility((select reltoastrelid from pg_class 
where relname = 't'));


 count

 12
(1 row)


select count(*) from pg_visibility((select reltoastrelid from pg_class 
where relname = 't')) where not all_visible;


 count

  0
(1 row)

That is - all TOAST pages are frozen (as expected, which is good). But 
now there are 12 pages, not just 10 pages. That is, we're now 
creating 2 extra pages, for some reason. I recall Pavan reported 
similar issue with every 32768-th page not being properly filled, but 
I'm not sure if that's the same issue.



regards



As Pavan correctly figured it out before the problem is that 
RelationGetBufferForTuple() moves to the next page, losing free space in 
the block:


> ... I see that a relcache invalidation arrives
> after 1st and then after every 32672th block is filled. That clears the
> rel->rd_smgr field and we lose the information about the saved target
> block. The code then moves to extend the relation again and thus 
skips the
> previously less-than-half filled block, losing the free space in that 
block.


The reason of this cache invalidation is vm_extend() call, which happens 
every 32762 blocks.


RelationGetBufferForTuple() tries to use the last page, but for some 
reason this code is under 'use_fsm' check. And COPY FROM doesn't use fsm 
(see TABLE_INSERT_SKIP_FSM).



        /*
         * If the FSM knows nothing of the rel, try the last page before we
         * give up and extend.  This avoids one-tuple-per-page syndrome 
during

         * bootstrapping or in a recently-started system.
         */
        if (targetBlock == InvalidBlockNumber)
        {
            BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
            if (nblocks > 0)
                targetBlock = nblocks - 1;
        }


I think we can use this code without regard to 'use_fsm'. With this 
change, the number of toast rel pages is correct. The patch is attached.


--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out
index ca4b6e186b..0017e3415c 100644
--- a/contrib/pg_visibility/expected/pg_visibility.out
+++ b/contrib/pg_visibility/expected/pg_visibility.out
@@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition');
  
 (1 row)
 
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+---+-+
+ 0 | t   | t
+ 1 | t   | t
+ 2 | t   | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid 
+
+(0 rows)
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+---+-+
+ 0 | f   | f
+ 1 | f   | f
+ 2 | t   | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid 
+
+(0 rows)
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+---+-+
+ 0 | t   | t
+ 1 | f   | f
+ 2 | t   | 

Re: WIP: System Versioned Temporal Table

2021-01-16 Thread Vik Fearing
On 1/14/21 6:42 PM, Surafel Temesgen wrote:
> Hi Simon,
> Thank you for all the work you does
> 
> On Mon, Jan 11, 2021 at 5:02 PM Simon Riggs 
> wrote:
> 
>>
>>
>> * Anomalies around use of CURRENT_TIMESTAMP are not discussed or resolved.
>> Probably need to add a test that end_timestamp > start_timestamp or ERROR,
>> which effectively enforces serializability.
>>
>>
> 
> This scenario doesn't happen.

It *does* happen and the standard even provides a specific error code
for it (2201H).

Please look at my extension for this feature which implements all the
requirements of the standard (except syntax grammar, of course).
https://github.com/xocolatl/periods/
-- 
Vik Fearing




Re: WIP: System Versioned Temporal Table

2021-01-16 Thread Vik Fearing
On 1/14/21 10:22 PM, Simon Riggs wrote:
> On Thu, Jan 14, 2021 at 5:46 PM Surafel Temesgen  
> wrote:
> 
>> On Fri, Jan 8, 2021 at 7:50 PM Ryan Lambert  wrote:
>>>
>>> I prefer to have them hidden by default.  This was mentioned up-thread with 
>>> no decision, it seems the standard is ambiguous.  MS SQL appears to have 
>>> flip-flopped on this decision [1].
> 
> I think the default should be like this:
> 
> SELECT * FROM foo FOR SYSTEM_TIME AS OF ...
> should NOT include the Start and End timestamp columns
> because this acts like a normal query just with a different snapshot timestamp
> 
> SELECT * FROM foo FOR SYSTEM_TIME BETWEEN x AND y
> SHOULD include the Start and End timestamp columns
> since this form of query can include multiple row versions for the
> same row, so it makes sense to see the validity times


I don't read the standard as being ambiguous about this at all.  The
columns should be shown just like any other column of the table.

I am not opposed to being able to set an attribute on columns allowing
them to be excluded from "*" but that is irrelevant to this patch.
-- 
Vik Fearing




Re: LET clause

2021-01-16 Thread Vik Fearing
On 1/3/21 1:12 PM, Joel Jacobson wrote:
> Hi hackers,
> 
> I just learned about a feature called "LET clause".
> 
> It's not part of the SQL standard, but it's supported by Oracle [1], 
> Couchbase [2] and AsterixDB [3].
> 
> I searched the pgsql-hackers archives and couldn't find any matches on "LET 
> clause",
> so I thought I should share this with you in some people didn't know about it 
> like me.
> 
> "LET clauses can be useful when a (complex) expression is used several times 
> within a query, allowing it to be written once to make the query more 
> concise." [3]
> 
> In the mentioned other databases you can do this with the LET keyword, which 
> "creates a new variable and initializes it with the result of the expression 
> you supply".
> 
> Without the LET clause, your complex queries would need to be divided into 
> two separate queries:
> 
> * One query to get a particular value (or set of values), and
> * One query to use the value (or values) from the first query.
> 
> The example below computes the Easter month and day for a given year:
> 
> Work-around using CROSS JOIN LATERAL:
> 
> CREATE FUNCTION compute_easter_day_for_year(year integer)
> RETURNS date
> LANGUAGE sql
> AS $$
> SELECT make_date(year, easter_month, easter_day)
> FROM (VALUES (year % 19, year / 100)) AS step1(g,c)
> CROSS JOIN LATERAL (VALUES ((c - c/4 - (8*c + 13)/25 + 19*g + 15) % 30)) AS 
> step2(h)
> CROSS JOIN LATERAL (VALUES (h - (h/28)*(1 - (h/28)*(29/(h + 1))*((21 - 
> g)/11 AS step3(i)
> CROSS JOIN LATERAL (VALUES ((year + year/4 + i + 2 - c + c/4) % 7)) AS 
> step4(j)
> CROSS JOIN LATERAL (VALUES (i - j)) AS step5(p)
> CROSS JOIN LATERAL (VALUES (3 + (p + 26)/30, 1 + (p + 27 + (p + 6)/40) % 31)) 
> AS step6(easter_month, easter_day)
> $$;
> 
> (Other possible work arounds: Use MATERIALIZED CTEs or sub-queries with 
> OFFSET 0 to prevent sub-query flattening.)
> 
> If we instead would have LET clauses in PostgreSQL, we could do:
> 
> CREATE FUNCTION compute_easter_day_for_year(year integer)
> RETURNS date
> LANGUAGE sql
> AS $$
> SELECT make_date(year, easter_month, easter_day)
> LET
>   g = year % 19,
>   c = year / 100,
>   h = (c - c/4 - (8*c + 13)/25 + 19*g + 15) % 30,
>   i = h - (h/28)*(1 - (h/28)*(29/(h + 1))*((21 - g)/11)),
>   j = year + year/4 + i + 2 - c + c/4) % 7,
>   p = i - j,
>   easter_month = 3 + (p + 26)/30,
>   easter_day = 1 + (p + 27 + (p + 6)/40) % 31
> $$;
> 
> Without LET clauses, SQL isn't terribly well suited to execute fundamentally 
> stepwise imperative algorithms like this one.
> 
> The work-around is to either sacrifice performance and conciseness and use a 
> hack (CROSS JOIN LATERAL or CTE),
> or, leave the SQL realm and use a PL like plpgsql to get good performance and 
> conciseness.


I agree on the conciseness, but I'm wondering what performance problem
you think there is with the CROSS JOIN LATERAL VALUES technique.  Have
you tried running an EXPLAIN (ANALYZE, VERBOSE) on that?


> I have no opinion if this is something for PostgreSQL,
> since I have no idea on how complicated this would be to implement,
> which means I can't estimate if the increased complication of an 
> implementation
> would outweigh the possible added convenience/performance/conciseness gains.
> 
> I just wanted to share this in case this idea was unknown to some people here.
> 
> [1] 
> https://docs.oracle.com/cd/E93962_01/bigData.Doc/eql_onPrem/src/reql_statement_let.html
> [2] 
> https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/let.html
> [3] https://asterixdb.apache.org/docs/0.9.3/sqlpp/manual.html#Let_clauses
> 
> Kind regards,
> 
> Joel
> 


-- 
Vik Fearing




Re: Dump public schema ownership & seclabels

2021-01-16 Thread Vik Fearing
On 12/30/20 12:59 PM, Noah Misch wrote:
> On Tue, Dec 29, 2020 at 05:49:24AM -0800, Noah Misch wrote:
>> https://postgr.es/m/20201031163518.gb4039...@rfd.leadboat.com gave $SUBJECT 
>> as
>> one of the constituent projects for changing the public schema default ACL.
>> This ended up being simple.  Attached.
> 
> This is defective; it fails to reproduce nspacl after "ALTER SCHEMA public
> OWNER TO pg_write_server_files; REVOKE ALL ON SCHEMA public FROM
> pg_write_server_files;".  I will try again later.

Could I ask you to also include COMMENTs when you try again, please?
-- 
Vik Fearing




Re: trailing junk in numeric literals

2021-01-16 Thread Vik Fearing
On 12/29/20 10:18 AM, Peter Eisentraut wrote:
> On 2020-12-28 21:54, Tom Lane wrote:
>> Peter Eisentraut  writes:
>>> I was surprised to find that this doesn't error:
>>> => select 100a;
>>>     a
>>> -
>>>    100
>>
>>> I suspect this and similar cases used to error before aliases without AS
>>> were introduced.  But now this seems possibly problematic.  Should we
>>> try to handle this better?
>>
>> Meh.  I think you'd get more brickbats than kudos if you start insisting
>> on a space there.
>>
>> I'm too lazy to try to decipher the SQL spec right now, but ISTR that
>> it insists on whitespace between a numeric literal and an identifier.
> 
> Yeah, non-delimiter tokens are supposed to be separated by delimiter
> tokens.
> 
>> So strictly speaking this SQL code is nonstandard anyway.  But our
>> lexer has always been forgiving about not requiring space if it's
>> not logically necessary to separate tokens.  I doubt trying to
>> change that would improve matters.
> 
> Well, the idea is to diagnose potential typos better.  But if there is
> no interest, then that's fine.


I am in favor of such a change so that we can also accept 1_000_000
which currently parses as "1 AS _000_000" (which also isn't compliant
because identifiers cannot start with an underscore, but I don't want to
take it that far).

It would also allow us to have 0xdead_beef, 0o_777, and 0b1010__1110
without most of it being interpreted as an alias.
-- 
Vik Fearing




Re: Logical Replication - behavior of ALTER PUBLICATION .. DROP TABLE and ALTER SUBSCRIPTION .. REFRESH PUBLICATION

2021-01-16 Thread Bharath Rupireddy
On Sat, Jan 16, 2021 at 12:21 PM Bharath Rupireddy
 wrote:
> > In the test, can we have multiple publications for the subscription
> > because that is how we discovered that the originally proposed patch
> > was not correct? Also, is it possible to extend some of the existing
> > tests in 001_rep_changes.pl or anywhere so that we can avoid some cost
> > of the replication setup.
>
> Sure, I will add the multiple publications use case provided by japin
> and also see if I can move the tests from 100_bugs.pl to
> 0001_rep_changes.pl.

Attaching v5 patch set. 0001 - No change. 0002 - I moved the tests to
001_rep_changes.pl so that we could avoid creation of subscriber,
publisher nodes and other set up. I also added the multiple
publication for the subscription test case which was failing with
v2-0001 patch. Note that I had to create subscriber,  publications for
this test case, because I couldn't find the regression (on v2-001
patch) with any of the existing test cases and also I'm dropping them
after the test so that it will not harm any existing following tests.
Hope that's okay. With v5-0002 and v2-0001, the multiple publication
for the subscription test case fails.

Please consider the v5 patch set for further review.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com
From 7dc6f91ca7225119b4fe6b3f0869385519721d2c Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy 
Date: Sat, 16 Jan 2021 11:30:09 +0530
Subject: [PATCH v5 1/1] Fix ALTER PUBLICATION...DROP TABLE behaviour

Currently, in logical replication, publisher/walsender publishes
the tables even though they aren't part of the publication i.e
they are dropped from the publication. Because of this ALTER
PUBLICATION...DROP TABLE doesn't work as expected.

To fix above issue, when the entry got invalidated in
rel_sync_cache_publication_cb(), mark the pubactions to false
and let get_rel_sync_entry() recalculate the pubactions.
---
 src/backend/replication/pgoutput/pgoutput.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 2f01137b42..478cd1f9f5 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1179,5 +1179,18 @@ rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
 	 */
 	hash_seq_init(, RelationSyncCache);
 	while ((entry = (RelationSyncEntry *) hash_seq_search()) != NULL)
+	{
 		entry->replicate_valid = false;
+
+		/*
+		 * There might be some relations dropped from the publication, we do
+		 * not need to publish the changes for them. Since we cannot get the
+		 * the dropped relations (see above), we reset pubactions for all
+		 * entries.
+		 */
+		entry->pubactions.pubinsert = false;
+		entry->pubactions.pubupdate = false;
+		entry->pubactions.pubdelete = false;
+		entry->pubactions.pubtruncate = false;
+	}
 }
-- 
2.25.1

From 95f4855a1f7ecaa8e2897a154e9347945770e76c Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy 
Date: Sat, 16 Jan 2021 17:13:06 +0530
Subject: [PATCH v5 2/2] Test behaviour of ALTER PUBLICATION ... DROP TABLE

---
 src/test/subscription/t/001_rep_changes.pl | 95 +-
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
index 0680f44a1a..215ccf40c9 100644
--- a/src/test/subscription/t/001_rep_changes.pl
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 23;
+use Test::More tests => 27;
 
 # Initialize publisher node
 my $node_publisher = get_new_node('publisher');
@@ -153,6 +153,99 @@ is($result, qq(20|-20|-1),
 $node_publisher->safe_psql('postgres',
 	"INSERT INTO tab_full SELECT generate_series(1,10)");
 
+# Test behaviour of ALTER PUBLICATION ... DROP TABLE
+
+# When publisher drops a table from publication, it should also stop sending
+# its changes to subscriber. We look at the subscriber whether it receives
+# the row that is inserted to the table in the publisher after it is dropped
+# from the publication.
+
+$result = $node_subscriber->safe_psql('postgres',
+	"SELECT count(*), min(a), max(a) FROM tab_ins");
+is($result, qq(1052|1|1002), 'check rows on subscriber before table drop from publication');
+
+# Drop the table from publicaiton
+$node_publisher->safe_psql('postgres',
+	"ALTER PUBLICATION tap_pub_ins_only DROP TABLE tab_ins");
+
+# Insert a row in publisher, but publisher will not send this row to subscriber
+$node_publisher->safe_psql('postgres', "INSERT INTO tab_ins VALUES()");
+
+$node_publisher->wait_for_catchup('tap_sub');
+
+# Subscriber will not receive the inserted row, after table is dropped from
+# publication, so row count should remain the same.
+$result = $node_subscriber->safe_psql('postgres',
+	"SELECT count(*), min(a), max(a) FROM tab_ins");
+is($result, qq(1052|1|1002), 

Re: Alter timestamp without timezone to with timezone rewrites rows

2021-01-16 Thread Noah Misch
On Wed, Jan 13, 2021 at 10:28:26AM -0500, Tom Lane wrote:
> So a non-rewriting conversion would only be possible if local time is
> identical to UTC; which is true for few enough people that nobody has
> bothered with attempting the optimization.

PostgreSQL 12 and later do have that optimization.  Example DDL:
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=3c59263#patch4