Re: Memory leak fix in psql

2022-07-19 Thread Junwang Zhao
Got it, thanks!

On Wed, Jul 20, 2022 at 1:14 PM Michael Paquier  wrote:
>
> On Wed, Jul 20, 2022 at 12:51:24PM +0800, Junwang Zhao wrote:
> > Though the patch looks good, I myself think the patch should be edited
> > and submitted by Tang
> > It's easy to attach a fixed patch based on the comments of the thread,
> > but coins should be
> > given to Tang since he is the first one to find the mem leak.
>
> Please note that I sometimes edit slightly patches that I finish to
> merge into the tree, where the author listed in the commit log is the
> same as the original while I usually don't list mine.  Credit goes
> where it should, and Tang is the one here who authored this patch.
> --
> Michael



-- 
Regards
Junwang Zhao




Re: Remove fls(), use pg_bitutils.h facilities instead?

2022-07-19 Thread Thomas Munro
On Wed, Jul 20, 2022 at 5:26 PM Thomas Munro  wrote:
> On Wed, Jul 20, 2022 at 4:52 PM Tom Lane  wrote:
> > I think we could probably just drop fls() entirely.  It doesn't look
> > to me like any of the existing callers expect a zero argument, so they
> > could be converted to use pg_leftmost_one_pos32() pretty trivially.
> > I don't see that fls() is buying us anything that is worth requiring
> > readers to know yet another nonstandard function.
>
> That was not true for the case in contiguous_pages_to_segment_bin(),
> in dsa.c.  If it's just one place like that (and, hrrm, curiously
> there is an open issue about binning quality on my to do list...),
> then perhaps we should just open code it there.  The attached doesn't
> trigger the assertion that work != 0 in a simple make check.

That double eval macro wasn't nice.  This time with a static inline function.
From f352e9f7c887dbf173a8de37f3c377ad72d8b66d Mon Sep 17 00:00:00 2001
From: Thomas Munro 
Date: Tue, 19 Jul 2022 17:47:10 +1200
Subject: [PATCH v3] Remove fls function.

Commit 4f658dc8 provided the traditional BSD fls() function so it could
be used in several places.  Later we added a bunch of similar sorts of
things in pg_bitutils.h.  Let's drop fls.c and the configure probe, and
reuse the newer code.

Reviewed-by: David Rowley 
Reviewed-by: Tom Lane 
Discussion: https://postgr.es/m/CA%2BhUKG%2B7dSX1XF8yFGmYk-%3D48dbjH2kmzZj16XvhbrWP-9BzRg%40mail.gmail.com
---
 configure  | 13 --
 configure.ac   |  1 -
 src/backend/access/hash/hashutil.c |  2 +-
 src/backend/optimizer/path/allpaths.c  |  5 +-
 src/backend/optimizer/prep/prepunion.c |  2 +-
 src/backend/utils/mmgr/dsa.c   | 14 +-
 src/include/pg_config.h.in |  3 --
 src/include/port.h |  4 --
 src/port/fls.c | 64 --
 src/tools/msvc/Mkvcbuild.pm|  2 +-
 src/tools/msvc/Solution.pm |  1 -
 11 files changed, 19 insertions(+), 92 deletions(-)
 delete mode 100644 src/port/fls.c

diff --git a/configure b/configure
index 59fa82b8d7..33a425562f 100755
--- a/configure
+++ b/configure
@@ -16771,19 +16771,6 @@ esac
 
 fi
 
-ac_fn_c_check_func "$LINENO" "fls" "ac_cv_func_fls"
-if test "x$ac_cv_func_fls" = xyes; then :
-  $as_echo "#define HAVE_FLS 1" >>confdefs.h
-
-else
-  case " $LIBOBJS " in
-  *" fls.$ac_objext "* ) ;;
-  *) LIBOBJS="$LIBOBJS fls.$ac_objext"
- ;;
-esac
-
-fi
-
 ac_fn_c_check_func "$LINENO" "getopt" "ac_cv_func_getopt"
 if test "x$ac_cv_func_getopt" = xyes; then :
   $as_echo "#define HAVE_GETOPT 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index 612dabf698..5295cb5806 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1911,7 +1911,6 @@ fi
 AC_REPLACE_FUNCS(m4_normalize([
 	dlopen
 	explicit_bzero
-	fls
 	getopt
 	getpeereid
 	getrusage
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index fe37bc47cb..32822dbb6b 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -436,7 +436,7 @@ _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
 	 * started.  Masking the most significant bit of new bucket would give us
 	 * old bucket.
 	 */
-	mask = (((uint32) 1) << (fls(new_bucket) - 1)) - 1;
+	mask = (((uint32) 1) << pg_leftmost_one_pos32(new_bucket)) - 1;
 	old_bucket = new_bucket & mask;
 
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 028d9e1680..63f0f6b79c 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -47,6 +47,7 @@
 #include "parser/parsetree.h"
 #include "partitioning/partbounds.h"
 #include "partitioning/partprune.h"
+#include "port/pg_bitutils.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"
 
@@ -1491,7 +1492,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		if (enable_parallel_append)
 		{
 			parallel_workers = Max(parallel_workers,
-   fls(list_length(live_childrels)));
+   pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
 			parallel_workers = Min(parallel_workers,
    max_parallel_workers_per_gather);
 		}
@@ -1542,7 +1543,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		 * the planned number of parallel workers.
 		 */
 		parallel_workers = Max(parallel_workers,
-			   fls(list_length(live_childrels)));
+			   pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
 		parallel_workers = Min(parallel_workers,
 			   max_parallel_workers_per_gather);
 		Assert(parallel_workers > 0);
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 2214920dea..043181b586 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -675,7 +675,7 @@ 

Re: Expose last replayed timeline ID along with last replayed LSN

2022-07-19 Thread Bharath Rupireddy
On Wed, Jul 20, 2022 at 7:06 AM Kyotaro Horiguchi
 wrote:
>
> At Tue, 19 Jul 2022 14:28:40 +0530, Bharath Rupireddy 
>  wrote in
> > Hi,
> >
> > At times it's useful to know the last replayed WAL record's timeline
> > ID (especially on the standbys that are lagging in applying WAL while
> > failing over - for reporting, logging and debugging purposes). AFICS,
> > there's no function that exposes the last replayed TLI. We can either
> > change the existing pg_last_wal_replay_lsn() to report TLI along with
> > the LSN which might break the compatibility or introduce a new
> > function pg_last_wal_replay_info() that emits both LSN and TLI. I'm
> > fine with either of the approaches, but for now, I'm attaching a WIP
> > patch that adds a new function pg_last_wal_replay_info().
> >
> > Thoughts?
>
> There was a more comprehensive discussion [1], which went nowhere..
>
> [1] https://www.postgresql.org/message-id/20191211052002.GK72921%40paquier.xyz

Thanks Kyotaro-san for pointing at that thread. Infact, I did think
about having a new set of info functions pg_current_wal_info,
pg_current_wal_insert_info, pg_current_wal_flush_info,
pg_last_wal_receive_info, pg_last_wal_replay_info - IMO, these APIs
are the ones that we would want to keep in the code going forward.
Although they introduce some more code momentarily, eventually, it
makes sense to delete pg_current_wal_lsn, pg_current_wal_insert_lsn,
pg_current_wal_flush_lsn, pg_last_wal_receive_lsn,
pg_last_wal_replay_lsn, perhaps in the future versions of PG.

Thoughts?

Regards,
Bharath Rupireddy.




Re: Add proper planner support for ORDER BY / DISTINCT aggregates

2022-07-19 Thread David Rowley
On Thu, 4 Nov 2021 at 20:59, Ronan Dunklau  wrote:
> I took some time to toy with this again.
>
> At first I thought that charging a discount in foreign grouping paths for
> Aggref targets (since they are computed remotely) would be a good idea,
> similar to what is done for the grouping keys.

I've been working on this patch again. There was a bit of work to do
to rebase it atop db0d67db2.  The problem there was that since this
patch appends pathkeys to suit ORDER BY / DISTINCT aggregates to the
query's group_pathkeys, db0d67db2 goes and tries to rearrange those,
but fails to find the SortGroupClause corresponding to the PathKey in
group_pathkeys. I wish the code I came up with to make that work was a
bit nicer, but what's there at least seems to work. There are a few
more making copies of Lists than I'd like.

I've also went and added LLVM support to make JIT work with the new
DISTINCT expression evaluation step types.

Also, James mentioned in [1] about the Merge Join plan change that
this patch was causing in an existing test.  I looked into that and
found the cause.  The plan change is not really the fault of this
patch, so I've proposed a fix for to make that work more efficiently
in [2]. The basics there are that select_outer_pathkeys_for_merge()
pre-dates Incremental Sorts and didn't consider prefixes of the
query_pathkeys after matching all the join quals. The patch on that
thread relaxes that rule and makes this patch produce an Incremental
Sort plan for the query in question.

Another annoying part of this patch is that I've added an
"aggpresorted" field to Aggref, which the planner sets.  That's a
parse node type and it would be nicer not to have the planner mess
around with those. We maybe could wrap up the Aggrefs in some planner
struct and pass those to the executor instead.  That would require a
bit more churn than what I've got in the attached.

I've attached the v7 patch.

David

[1] 
https://www.postgresql.org/message-id/CAAaqYe-yxXkXVPJkRw1nDA=CJBw28jvhACRyDcU10dAOcdSj=q...@mail.gmail.com
[2] 
https://www.postgresql.org/message-id/caaphdvrtzu0phvfdpfm4yx3jnr2wuwosv+t2zqa7lrhhbr2...@mail.gmail.com
From f586a426d1bb3b32aa3be8dec94b17944353600e Mon Sep 17 00:00:00 2001
From: David Rowley 
Date: Mon, 3 May 2021 16:31:29 +1200
Subject: [PATCH v7] Add planner support for ORDER BY aggregates

ORDER BY aggreagtes have, since implemented in Postgres, been executed by
always performing a sort in nodeAgg.c to sort the tuples in the current
group into the correct order before calling the transition function on the
sorted results.  This was not great as often there might be an index that
could have provided pre-sorted input and allowed the transition functions
to be called as the rows come in, rather than having to store them in a
tuplestore in order to sort them later.

Here we get the planner on-board with picking a plan that provides
pre-sorted inputs to ORDER BY aggregates.

Since there can be many ORDER BY aggregates in any given query level, it's
very possible that we can't find an order that suits all aggregates.  The
sort order the the planner chooses is simply the one that suits the most
aggregate functions.  We take the most strictly sorted variation of each
order and see how many aggregate functions can use that, then we try again
with the order of the remaining aggregates to see if another order would
suit more aggregate functions.  For example:

SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...

would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;

SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...

would just pick a plan ordered by {a}.

SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...

would choose to order by {b} since two aggregates suit that vs just one
that requires order by {a}.

Discussion: 
https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
---
 .../postgres_fdw/expected/postgres_fdw.out|  46 +--
 src/backend/executor/execExpr.c   |  52 +++-
 src/backend/executor/execExprInterp.c | 102 ++
 src/backend/executor/nodeAgg.c|  34 +-
 src/backend/jit/llvm/llvmjit_expr.c   |  48 +++
 src/backend/jit/llvm/llvmjit_types.c  |   2 +
 src/backend/optimizer/path/pathkeys.c |  44 ++-
 src/backend/optimizer/plan/planagg.c  |   2 +-
 src/backend/optimizer/plan/planner.c  | 294 --
 src/backend/optimizer/prep/prepagg.c  |   7 +-
 src/include/executor/execExpr.h   |  17 +
 src/include/executor/nodeAgg.h|   9 +
 src/include/nodes/pathnodes.h |  16 +-
 src/include/nodes/primnodes.h |   7 +
 src/include/optimizer/paths.h |   4 +-
 src/test/regress/expected/aggregates.out  |  80 -
 .../regress/expected/partition_aggregate.out  | 118 +++
 src/test/regress/expected/sqljson.out |  12 +-
 

Re: Remove fls(), use pg_bitutils.h facilities instead?

2022-07-19 Thread Thomas Munro
On Wed, Jul 20, 2022 at 4:52 PM Tom Lane  wrote:
> I think we could probably just drop fls() entirely.  It doesn't look
> to me like any of the existing callers expect a zero argument, so they
> could be converted to use pg_leftmost_one_pos32() pretty trivially.
> I don't see that fls() is buying us anything that is worth requiring
> readers to know yet another nonstandard function.

That was not true for the case in contiguous_pages_to_segment_bin(),
in dsa.c.  If it's just one place like that (and, hrrm, curiously
there is an open issue about binning quality on my to do list...),
then perhaps we should just open code it there.  The attached doesn't
trigger the assertion that work != 0 in a simple make check.
From 19a3b2aa631a5291023dcf7185f5cee474fae6b5 Mon Sep 17 00:00:00 2001
From: Thomas Munro 
Date: Tue, 19 Jul 2022 17:47:10 +1200
Subject: [PATCH v2] Remove fls function.

Commit 4f658dc8 provided the traditional BSD fls() function so it could
be used in several places.  Later we added a bunch of similar sorts of
things in pg_bitutils.h.  Let's drop fls.c and the configure probe, and
reuse the newer code.

Reviewed-by: David Rowley 
Reviewed-by: Tom Lane 
Discussion: https://postgr.es/m/CA%2BhUKG%2B7dSX1XF8yFGmYk-%3D48dbjH2kmzZj16XvhbrWP-9BzRg%40mail.gmail.com
---
 configure  | 13 --
 configure.ac   |  1 -
 src/backend/access/hash/hashutil.c |  2 +-
 src/backend/optimizer/path/allpaths.c  |  5 +-
 src/backend/optimizer/prep/prepunion.c |  2 +-
 src/backend/utils/mmgr/dsa.c   |  4 +-
 src/include/pg_config.h.in |  3 --
 src/include/port.h |  4 --
 src/port/fls.c | 64 --
 src/tools/msvc/Mkvcbuild.pm|  2 +-
 src/tools/msvc/Solution.pm |  1 -
 11 files changed, 9 insertions(+), 92 deletions(-)
 delete mode 100644 src/port/fls.c

diff --git a/configure b/configure
index 59fa82b8d7..33a425562f 100755
--- a/configure
+++ b/configure
@@ -16771,19 +16771,6 @@ esac
 
 fi
 
-ac_fn_c_check_func "$LINENO" "fls" "ac_cv_func_fls"
-if test "x$ac_cv_func_fls" = xyes; then :
-  $as_echo "#define HAVE_FLS 1" >>confdefs.h
-
-else
-  case " $LIBOBJS " in
-  *" fls.$ac_objext "* ) ;;
-  *) LIBOBJS="$LIBOBJS fls.$ac_objext"
- ;;
-esac
-
-fi
-
 ac_fn_c_check_func "$LINENO" "getopt" "ac_cv_func_getopt"
 if test "x$ac_cv_func_getopt" = xyes; then :
   $as_echo "#define HAVE_GETOPT 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index 612dabf698..5295cb5806 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1911,7 +1911,6 @@ fi
 AC_REPLACE_FUNCS(m4_normalize([
 	dlopen
 	explicit_bzero
-	fls
 	getopt
 	getpeereid
 	getrusage
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index fe37bc47cb..32822dbb6b 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -436,7 +436,7 @@ _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
 	 * started.  Masking the most significant bit of new bucket would give us
 	 * old bucket.
 	 */
-	mask = (((uint32) 1) << (fls(new_bucket) - 1)) - 1;
+	mask = (((uint32) 1) << pg_leftmost_one_pos32(new_bucket)) - 1;
 	old_bucket = new_bucket & mask;
 
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 028d9e1680..63f0f6b79c 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -47,6 +47,7 @@
 #include "parser/parsetree.h"
 #include "partitioning/partbounds.h"
 #include "partitioning/partprune.h"
+#include "port/pg_bitutils.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/lsyscache.h"
 
@@ -1491,7 +1492,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		if (enable_parallel_append)
 		{
 			parallel_workers = Max(parallel_workers,
-   fls(list_length(live_childrels)));
+   pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
 			parallel_workers = Min(parallel_workers,
    max_parallel_workers_per_gather);
 		}
@@ -1542,7 +1543,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		 * the planned number of parallel workers.
 		 */
 		parallel_workers = Max(parallel_workers,
-			   fls(list_length(live_childrels)));
+			   pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
 		parallel_workers = Min(parallel_workers,
 			   max_parallel_workers_per_gather);
 		Assert(parallel_workers > 0);
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 2214920dea..043181b586 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -675,7 +675,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		if (enable_parallel_append)
 		{
 			parallel_workers = Max(parallel_workers,
-   

Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Amit Kapila
On Wed, Jul 20, 2022 at 9:01 AM Masahiko Sawada  wrote:
>
> On Wed, Jul 20, 2022 at 12:11 PM Amit Kapila  wrote:
> >
> > On Tue, Jul 19, 2022 at 7:28 PM Masahiko Sawada  
> > wrote:
> >
> > Why do you think we can't remove
> > ReorderBufferInitialXactsSetCatalogChanges() from the back branch
> > patch? I think we don't need to change the existing function
> > ReorderBufferXidHasCatalogChanges() but instead can have a wrapper
> > like SnapBuildXidHasCatalogChanges() similar to master branch patch.
>
> IIUC we need to change SnapBuildCommitTxn() but it's exposed.
>
> Currently, we call like DecodeCommit() -> SnapBuildCommitTxn() ->
> ReorderBufferXidHasCatalogChanges(). If we have a wrapper function, we
> call like DecodeCommit() -> SnapBuildCommitTxn() ->
> SnapBuildXidHasCatalogChanges() ->
> ReorderBufferXidHasCatalogChanges(). In
> SnapBuildXidHasCatalogChanges(), we need to check if the transaction
> has XACT_XINFO_HAS_INVALS, which means DecodeCommit() needs to pass
> either parsed->xinfo or (parsed->xinfo & XACT_XINFO_HAS_INVALS != 0)
> down to SnapBuildXidHasCatalogChanges(). However, since
> SnapBuildCommitTxn(), between DecodeCommit() and
> SnapBuildXidHasCatalogChanges(), is exposed we cannot change it.
>

Agreed.

> Another idea would be to have functions, say
> SnapBuildCommitTxnWithXInfo() and SnapBuildCommitTxn_ext(). The latter
> does actual work of handling transaction commits and both
> SnapBuildCommitTxn() and SnapBuildCommit() call
> SnapBuildCommitTxnWithXInfo() with different arguments.
>

Do you want to say DecodeCommit() instead of SnapBuildCommit() in
above para? Yet another idea could be to have another flag
RBTXN_HAS_INVALS which will be set by DecodeCommit for top-level TXN.
Then, we can retrieve it even for each of the subtxn's if and when
required.

-- 
With Regards,
Amit Kapila.




Re: Memory leak fix in psql

2022-07-19 Thread Michael Paquier
On Wed, Jul 20, 2022 at 12:51:24PM +0800, Junwang Zhao wrote:
> Though the patch looks good, I myself think the patch should be edited
> and submitted by Tang
> It's easy to attach a fixed patch based on the comments of the thread,
> but coins should be
> given to Tang since he is the first one to find the mem leak.

Please note that I sometimes edit slightly patches that I finish to
merge into the tree, where the author listed in the commit log is the
same as the original while I usually don't list mine.  Credit goes
where it should, and Tang is the one here who authored this patch.
--
Michael


signature.asc
Description: PGP signature


Re: fix stats_fetch_consistency value in postgresql.conf.sample

2022-07-19 Thread Justin Pryzby
On Tue, Jul 19, 2022 at 03:04:27PM +0900, Kyotaro Horiguchi wrote:
> At Wed, 13 Jul 2022 18:54:45 -0500, Justin Pryzby  
> wrote in 
> > On Thu, Jul 14, 2022 at 08:46:02AM +0900, Michael Paquier wrote:
> > > On Wed, Jul 13, 2022 at 12:30:00PM -0500, Justin Pryzby wrote:
> > > > How did you make this list ?  Was it by excluding things that failed 
> > > > for you ?
> 
> Yes. I didn't confirm each variable. They are the variables differ on
> RHEL-family OSes.  io_concurrency differs according to
> USE_PREFETCH. Regarding to effects of macro definitions, I searched
> guc.c for non-GUC_NOT_IN_SAMPLE variables with macro-affected defaults.

I think you'd also need to handle the ones which are changed by initdb.c.

This patch takes Andres' suggestion.

The list of GUCs I flagged is probably incomplete, maybe inaccurate, and at
least up for discussion.

BTW I still think it might have been better to leave pg_settings_get_flags()
deliberately undocumented.

-- 
Justin
>From 5f9a56a9c0d73a221e55b2c02e006d3104f4937b Mon Sep 17 00:00:00 2001
From: Justin Pryzby 
Date: Tue, 19 Jul 2022 08:31:56 -0500
Subject: [PATCH 1/2] wip: add DYNAMIC_DEFAULT for settings which are vary by
 configure flags or platform or initdb

---
 doc/src/sgml/func.sgml   |  4 
 src/backend/utils/misc/guc.c | 36 +++-
 src/include/utils/guc.h  |  6 --
 3 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e3e24e185a4..0fd6234eaf2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24977,6 +24977,10 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id'));
 an empty array if the GUC exists but there are no flags to show.
 Only the most useful flags are exposed, as of the following:
 
+ 
+  DYNAMIC_DEFAULT: parameters whose default varies by
+  platform, or compile-time options, or initdb.
+ 
  
   EXPLAIN: parameters included in
   EXPLAIN (SETTINGS) commands.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 0328029d430..dbbadc5a475 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1435,7 +1435,7 @@ static struct config_bool ConfigureNamesBool[] =
 		{"debug_assertions", PGC_INTERNAL, PRESET_OPTIONS,
 			gettext_noop("Shows whether the running server has assertion checks enabled."),
 			NULL,
-			GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+			GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | GUC_DYNAMIC_DEFAULT
 		},
 		_enabled,
 #ifdef USE_ASSERT_CHECKING
@@ -1611,7 +1611,8 @@ static struct config_bool ConfigureNamesBool[] =
 	{
 		{"update_process_title", PGC_SUSET, PROCESS_TITLE,
 			gettext_noop("Updates the process title to show the active SQL command."),
-			gettext_noop("Enables updating of the process title every time a new SQL command is received by the server.")
+			gettext_noop("Enables updating of the process title every time a new SQL command is received by the server."),
+			GUC_DYNAMIC_DEFAULT
 		},
 		_process_title,
 #ifdef WIN32
@@ -2362,6 +2363,7 @@ static struct config_int ConfigureNamesInt[] =
 			gettext_noop("Sets the maximum number of concurrent connections."),
 			NULL
 		},
+
 		,
 		100, 1, MAX_BACKENDS,
 		check_maxconnections, NULL, NULL
@@ -2397,7 +2399,7 @@ static struct config_int ConfigureNamesInt[] =
 		{"shared_buffers", PGC_POSTMASTER, RESOURCES_MEM,
 			gettext_noop("Sets the number of shared memory buffers used by the server."),
 			NULL,
-			GUC_UNIT_BLOCKS
+			GUC_UNIT_BLOCKS | GUC_DYNAMIC_DEFAULT
 		},
 		,
 		16384, 16, INT_MAX / 2,
@@ -3142,7 +3144,7 @@ static struct config_int ConfigureNamesInt[] =
 			RESOURCES_ASYNCHRONOUS,
 			gettext_noop("Number of simultaneous requests that can be handled efficiently by the disk subsystem."),
 			NULL,
-			GUC_EXPLAIN
+			GUC_EXPLAIN|GUC_DYNAMIC_DEFAULT
 		},
 		_io_concurrency,
 #ifdef USE_PREFETCH
@@ -3160,7 +3162,7 @@ static struct config_int ConfigureNamesInt[] =
 			RESOURCES_ASYNCHRONOUS,
 			gettext_noop("A variant of effective_io_concurrency that is used for maintenance work."),
 			NULL,
-			GUC_EXPLAIN
+			GUC_EXPLAIN|GUC_DYNAMIC_DEFAULT
 		},
 		_io_concurrency,
 #ifdef USE_PREFETCH
@@ -3327,9 +3329,11 @@ static struct config_int ConfigureNamesInt[] =
 			gettext_noop("Shows the size of write ahead log segments."),
 			NULL,
 			GUC_UNIT_BYTE | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | GUC_RUNTIME_COMPUTED
+
 		},
 		_segment_size,
 		DEFAULT_XLOG_SEG_SIZE,
+
 		WalSegMinSize,
 		WalSegMaxSize,
 		NULL, NULL, NULL
@@ -3523,6 +3527,7 @@ static struct config_int ConfigureNamesInt[] =
 			GUC_UNIT_BLOCKS | GUC_EXPLAIN,
 		},
 		_cache_size,
+
 		DEFAULT_EFFECTIVE_CACHE_SIZE, 1, INT_MAX,
 		NULL, NULL, NULL
 	},
@@ -3620,7 +3625,7 @@ static struct config_int ConfigureNamesInt[] =
 		{"debug_discard_caches", PGC_SUSET, DEVELOPER_OPTIONS,
 			gettext_noop("Aggressively flush system caches for 

Re: Handle infinite recursion in logical replication setup

2022-07-19 Thread Peter Smith
On Tue, Jul 19, 2022 at 11:34 PM Amit Kapila  wrote:
>
> On Mon, Jul 18, 2022 at 9:46 PM vignesh C  wrote:
> >
> > I have updated the patch to handle the origin value case
> > insensitively. The attached patch has the changes for the same.
> >
>
> Thanks, the patch looks mostly good to me. I have made a few changes
> in 0001 patch which are as follows: (a) make a comparison of origin
> names in maybe_reread_subscription similar to slot names as in future
> we may support origin names other than 'any' and 'none', (b) made
> comment changes at few places and minor change in one of the error
> message, (c) ran pgindent and slightly changed the commit message.
>
> I am planning to push this day after tomorrow unless there are any
> comments/suggestions.

FYI, the function name in the comment is not same as the function name here:

+/*
+ * IsReservedName
+ * True iff name is either "none" or "any".
+ */
+static bool
+IsReservedOriginName(const char *name)

--
Kind Regards,
Peter Smith.
Fujitsu Australia




Re: Backup command and functions can cause assertion failure and segmentation fault

2022-07-19 Thread Fujii Masao




On 2022/07/16 11:36, Michael Paquier wrote:

I was thinking about doing that only on HEAD.  One thing interesting
about this patch is that it can also be used as a point of reference
for other future things.


Ok, here are review comments:

+my $connstr =
+  $node->connstr('postgres') . " replication=database dbname=postgres";

Since the result of connstr() includes "dbname=postgres", you don't need to add 
"dbname=postgres" again.

+# The psql command should fail on pg_stop_backup().

Typo: s/pg_stop_backup/pg_stop_backup

I reported two trouble cases; they are the cases where BASE_BACKUP is canceled 
and terminated, respectively. But you added the test only for one of them. Is 
this intentional?


Since one of them failed to be applied to v14 or before cleanly, I
also created the patch for those back branches. So I attached three
patches.


Fine by me.


I pushed these bugfix patches at first. Thanks!

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION




Re: Remove fls(), use pg_bitutils.h facilities instead?

2022-07-19 Thread Tom Lane
Thomas Munro  writes:
> Back in commit 4f658dc8 we gained src/port/fls.c.  As anticipated by
> its commit message, we later finished up with something better in
> src/include/port/pg_bitutils.h.  fls() ("find last set") is an
> off-by-one cousin of pg_leftmost_one_pos32().  I don't know why ffs()
> ("find first set", the rightmost variant) made it into POSIX while
> fls() did not, other than perhaps its more amusing name.  fls() is
> present on *BSD, Macs and maybe more, but not everywhere, hence the
> configure test.  Let's just do it with pg_bitutils.h instead, and drop
> some cruft?  Open to better ideas on whether we need a new function,

I think we could probably just drop fls() entirely.  It doesn't look
to me like any of the existing callers expect a zero argument, so they
could be converted to use pg_leftmost_one_pos32() pretty trivially.
I don't see that fls() is buying us anything that is worth requiring
readers to know yet another nonstandard function.

regards, tom lane




Re: Memory leak fix in psql

2022-07-19 Thread Junwang Zhao
-1

Though the patch looks good, I myself think the patch should be edited
and submitted by Tang
It's easy to attach a fixed patch based on the comments of the thread,
but coins should be
given to Tang since he is the first one to find the mem leak.

No offense, but that's what I think how open source works ;)

On Wed, Jul 20, 2022 at 11:51 AM Michael Paquier  wrote:
>
> On Wed, Jul 20, 2022 at 03:14:35AM +, tanghy.f...@fujitsu.com wrote:
> > Your fix LGTM so please allow me to merge it in the attached patch.
> > Based on your rebased version, now this new patch version is V3.
>
> What about the argument of upthread where we could use a goto in
> functions where there are multiple pattern validation checks?  Per se
> v4 attached.
> --
> Michael



-- 
Regards
Junwang Zhao




Re: [PATCH] Use indexes on the subscriber when REPLICA IDENTITY is full on the publisher

2022-07-19 Thread Amit Kapila
On Tue, Jul 19, 2022 at 1:46 PM Önder Kalacı  wrote:
>
> Hi, thanks for your reply.
>
> Amit Kapila , 18 Tem 2022 Pzt, 08:29 tarihinde şunu 
> yazdı:
>>
>> >
>>
>> IIUC, this proposal is to optimize cases where users can't have a
>> unique/primary key for a relation on the subscriber and those
>> relations receive lots of updates or deletes?
>
>
> Yes, that is right.
>
> In a similar perspective, I see this patch useful for reducing the "use 
> primary key/unique index" requirement to "use any index" for a reasonably 
> performant logical replication with updates/deletes.
>

Agreed. BTW, have you seen any such requirements from users where this
will be useful for them?

>>
>>
>> It seems that in favorable cases it will improve performance but we
>> should consider unfavorable cases as well. Two things that come to
>> mind in that regard are (a) while choosing index/seq. scan paths, the
>> patch doesn't account for cost for tuples_equal() which needs to be
>> performed for index scans, (b) it appears to me that the patch decides
>> which index to use the first time it opens the rel (or if the rel gets
>> invalidated) on subscriber and then for all consecutive operations it
>> uses the same index. It is quite possible that after some more
>> operations on the table, using the same index will actually be
>> costlier than a sequence scan or some other index scan
>
>
> Regarding (b), yes that is a concern I share. And, I was actually considering 
> sending another patch regarding this.
>
> Currently, I can see two options and happy to hear your take on these (or 
> maybe another idea?)
>
> - Add a new class of invalidation callbacks: Today, if we do ALTER TABLE or 
> CREATE INDEX on a table, the CacheRegisterRelcacheCallback helps us to 
> re-create the cache entries. In this case, as far as I can see, we need a 
> callback that is called when table "ANALYZE"d, because that is when the 
> statistics change. That is the time picking a new index makes sense.
> However, that seems like adding another dimension to this patch, which I can 
> try but also see that committing becomes even harder.
>

This idea sounds worth investigating. I see that this will require
more work but OTOH, we can't allow the existing system to regress
especially because depending on workload it might regress badly. We
can create a patch for this atop the base patch for easier review/test
but I feel we need some way to address this point.

 So, please see the next idea as well.
>
> - Ask users to manually pick the index they want to use: Currently, the main 
> complexity of the patch comes with the planner related code. In fact, if you 
> look into the logical replication related changes, those are relatively 
> modest changes. If we can drop the feature that Postgres picks the index, and 
> provide a user interface to set the indexes per table in the subscription, we 
> can probably have an easier patch to review & test. For example, we could add 
> `ALTER SUBSCRIPTION sub ALTER TABLE t USE INDEX i` type of an API. This also 
> needs some coding, but probably much simpler than the current code. And, 
> obviously, this pops up the question of can users pick the right index?
>

I think picking the right index is one point and another is what if
the subscription has many tables (say 10K or more), doing it for
individual tables per subscription won't be fun. Also, users need to
identify which tables belong to a particular subscription, now, users
can find the same via pg_subscription_rel or some other way but doing
this won't be straightforward for users. So, my inclination would be
to pick the right index automatically rather than getting the input
from the user.

Now, your point related to planner code in the patch bothers me as
well but I haven't studied the patch in detail to provide any
alternatives at this stage. Do you have any other ideas to make it
simpler or solve this problem in some other way?

-- 
With Regards,
Amit Kapila.




Re: Windows now has fdatasync()

2022-07-19 Thread Thomas Munro
On Wed, Jul 20, 2022 at 4:14 PM Thomas Munro  wrote:
> On Wed, Jul 20, 2022 at 4:08 PM David Rowley  wrote:
> > On Wed, 20 Jul 2022 at 15:22, Thomas Munro  wrote:
> > > Ok, I've pushed the Windows patch.  I'll watch the build farm to see
> > > if I've broken any of the frankentoolchain Windows animals.
> >
> > Just to get in there before the farm does... I just got a boatload of
> > redefinition of HAVE_FDATASYNC warnings.  I see it already gets
> > defined in pg_config.h
> >
> > All compiles cleanly with the attached.
>
> Oops.  Thanks, pushed.

... and here's a rebase of the patch that removes that macro stuff,
since cfbot is watching this thread.
From c0c3e67637abf67d9b946a08ddc5c597b37b40a1 Mon Sep 17 00:00:00 2001
From: Thomas Munro 
Date: Wed, 20 Jul 2022 12:32:26 +1200
Subject: [PATCH v2 1/2] Remove O_FSYNC and associated macros.

O_FSYNC was a pre-standard way of spelling O_SYNC.  It's not needed on
any modern system.  We can just use O_SYNC directly, if it exists, and
get rid of our OPEN_SYNC_FLAG macro.

Likewise for O_DSYNC, we can just use it directly, if it exists, and get
rid of our OPEN_DATASYNC_FLAG macro.  The only complication is that we
still want to avoid choosing open_datasync as a default if O_DSYNC has
the same value as O_SYNC, so there is no change in behavior.
---
 src/backend/access/transam/xlog.c | 20 ++--
 src/bin/pg_test_fsync/pg_test_fsync.c | 12 ++--
 src/include/access/xlogdefs.h | 19 +--
 3 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9854b51c6a..1da3b8eb2e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -171,10 +171,10 @@ const struct config_enum_entry sync_method_options[] = {
 #ifdef HAVE_FDATASYNC
 	{"fdatasync", SYNC_METHOD_FDATASYNC, false},
 #endif
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 	{"open_sync", SYNC_METHOD_OPEN, false},
 #endif
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 	{"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
 #endif
 	{NULL, 0, false}
@@ -7894,10 +7894,10 @@ get_sync_bit(int method)
 
 	/*
 	 * Optimize writes by bypassing kernel cache with O_DIRECT when using
-	 * O_SYNC/O_FSYNC and O_DSYNC.  But only if archiving and streaming are
-	 * disabled, otherwise the archive command or walsender process will read
-	 * the WAL soon after writing it, which is guaranteed to cause a physical
-	 * read if we bypassed the kernel cache. We also skip the
+	 * O_SYNC and O_DSYNC.  But only if archiving and streaming are disabled,
+	 * otherwise the archive command or walsender process will read the WAL
+	 * soon after writing it, which is guaranteed to cause a physical read if
+	 * we bypassed the kernel cache. We also skip the
 	 * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
 	 * reason.
 	 *
@@ -7921,13 +7921,13 @@ get_sync_bit(int method)
 		case SYNC_METHOD_FSYNC_WRITETHROUGH:
 		case SYNC_METHOD_FDATASYNC:
 			return 0;
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 		case SYNC_METHOD_OPEN:
-			return OPEN_SYNC_FLAG | o_direct_flag;
+			return O_SYNC | o_direct_flag;
 #endif
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 		case SYNC_METHOD_OPEN_DSYNC:
-			return OPEN_DATASYNC_FLAG | o_direct_flag;
+			return O_DSYNC | o_direct_flag;
 #endif
 		default:
 			/* can't happen (unless we are out of sync with option array) */
diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c
index f7bc199a30..6739214eb8 100644
--- a/src/bin/pg_test_fsync/pg_test_fsync.c
+++ b/src/bin/pg_test_fsync/pg_test_fsync.c
@@ -300,7 +300,7 @@ test_sync(int writes_per_op)
 	printf(LABEL_FORMAT, "open_datasync");
 	fflush(stdout);
 
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 	if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
@@ -407,8 +407,8 @@ test_sync(int writes_per_op)
 	printf(LABEL_FORMAT, "open_sync");
 	fflush(stdout);
 
-#ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
+#ifdef O_SYNC
+	if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
 		fs_warning = true;
@@ -466,7 +466,7 @@ test_open_syncs(void)
 static void
 test_open_sync(const char *msg, int writes_size)
 {
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 	int			tmpfile,
 ops,
 writes;
@@ -475,8 +475,8 @@ test_open_sync(const char *msg, int writes_size)
 	printf(LABEL_FORMAT, msg);
 	fflush(stdout);
 
-#ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
+#ifdef O_SYNC
+	if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
 		printf(NA_FORMAT, _("n/a*"));
 	else
 	{
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index a47e3eeb1f..a720169b17 100644
--- a/src/include/access/xlogdefs.h
+++ 

Re: Remove fls(), use pg_bitutils.h facilities instead?

2022-07-19 Thread David Rowley
On Wed, 20 Jul 2022 at 16:21, Thomas Munro  wrote:
> Back in commit 4f658dc8 we gained src/port/fls.c.  As anticipated by
> its commit message, we later finished up with something better in
> src/include/port/pg_bitutils.h.  fls() ("find last set") is an
> off-by-one cousin of pg_leftmost_one_pos32().

Seems like a good idea to me.

One thing I noticed was that pg_leftmost_one_pos32() expects a uint32
but the new function passes it an int. Is it worth mentioning that's
ok in a comment somewhere or maybe adding an explicit cast?

David




Re: Use "WAL segment" instead of "log segment" consistently in user-facing messages

2022-07-19 Thread Bharath Rupireddy
On Wed, Jul 20, 2022 at 6:55 AM Kyotaro Horiguchi
 wrote:
>
> At Tue, 19 Jul 2022 09:58:28 -0700, Nathan Bossart  
> wrote in
> > On Tue, Jul 19, 2022 at 02:43:59PM +0530, Bharath Rupireddy wrote:
> > > Done. PSA v5 patch set.
> >
> > LGTM.  I've marked this as ready-for-committer.
>
> I find the following sentense in config.sgml. "Specifies the minimum
> size of past log file segments kept in the pg_wal directory"
>
> postgresql.conf.sample contains "logfile segment" in a few lines.

Done. PSA v6 patch set.

Regards,
Bharath Rupireddy.


v6-0001-Use-WAL-segment-instead-of-log-segment.patch
Description: Binary data


v6-0002-Consistently-use-WAL-file-s-in-docs.patch
Description: Binary data


Remove fls(), use pg_bitutils.h facilities instead?

2022-07-19 Thread Thomas Munro
Hi,

Back in commit 4f658dc8 we gained src/port/fls.c.  As anticipated by
its commit message, we later finished up with something better in
src/include/port/pg_bitutils.h.  fls() ("find last set") is an
off-by-one cousin of pg_leftmost_one_pos32().  I don't know why ffs()
("find first set", the rightmost variant) made it into POSIX while
fls() did not, other than perhaps its more amusing name.  fls() is
present on *BSD, Macs and maybe more, but not everywhere, hence the
configure test.  Let's just do it with pg_bitutils.h instead, and drop
some cruft?  Open to better ideas on whether we need a new function,
or there is some way to use the existing facilities directly without
worrying about undefined behaviour for 0, etc.

Noticed while looking for configure stuff to cull.  Mentioning
separately because this isn't a simple
no-longer-needed-crutch-for-prestandard-system case like the others in
a nearby thread.
From 0260f65533bfdff024c5af6c0b14692484305b06 Mon Sep 17 00:00:00 2001
From: Thomas Munro 
Date: Tue, 19 Jul 2022 17:47:10 +1200
Subject: [PATCH] Remove replacement fls function.

Commit 4f658dc8 provided the traditional BSD fls() function so it could
be used in several places.  Later we added a bunch of similar sorts of
things in pg_bitutils.h.  Let's drop fls.c and the configure probe, and
reuse the newer code.
---
 configure  | 13 --
 configure.ac   |  1 -
 src/backend/access/hash/hashutil.c |  2 +-
 src/backend/optimizer/path/allpaths.c  |  4 +-
 src/backend/optimizer/prep/prepunion.c |  2 +-
 src/backend/utils/mmgr/dsa.c   |  3 +-
 src/include/pg_config.h.in |  3 --
 src/include/port.h |  4 --
 src/include/port/pg_bitutils.h | 15 ++
 src/port/fls.c | 64 --
 src/tools/msvc/Mkvcbuild.pm|  2 +-
 src/tools/msvc/Solution.pm |  1 -
 12 files changed, 22 insertions(+), 92 deletions(-)
 delete mode 100644 src/port/fls.c

diff --git a/configure b/configure
index a4f4d321fb..4db38683e5 100755
--- a/configure
+++ b/configure
@@ -16771,19 +16771,6 @@ esac
 
 fi
 
-ac_fn_c_check_func "$LINENO" "fls" "ac_cv_func_fls"
-if test "x$ac_cv_func_fls" = xyes; then :
-  $as_echo "#define HAVE_FLS 1" >>confdefs.h
-
-else
-  case " $LIBOBJS " in
-  *" fls.$ac_objext "* ) ;;
-  *) LIBOBJS="$LIBOBJS fls.$ac_objext"
- ;;
-esac
-
-fi
-
 ac_fn_c_check_func "$LINENO" "getopt" "ac_cv_func_getopt"
 if test "x$ac_cv_func_getopt" = xyes; then :
   $as_echo "#define HAVE_GETOPT 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index 5bd29a4d2f..b826823e50 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1911,7 +1911,6 @@ fi
 AC_REPLACE_FUNCS(m4_normalize([
 	dlopen
 	explicit_bzero
-	fls
 	getopt
 	getpeereid
 	getrusage
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index fe37bc47cb..856462b641 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -436,7 +436,7 @@ _hash_get_oldblock_from_newbucket(Relation rel, Bucket new_bucket)
 	 * started.  Masking the most significant bit of new bucket would give us
 	 * old bucket.
 	 */
-	mask = (((uint32) 1) << (fls(new_bucket) - 1)) - 1;
+	mask = (((uint32) 1) << (pg_fls(new_bucket) - 1)) - 1;
 	old_bucket = new_bucket & mask;
 
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 358bb2aed6..6f19721751 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -1491,7 +1491,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		if (enable_parallel_append)
 		{
 			parallel_workers = Max(parallel_workers,
-   fls(list_length(live_childrels)));
+   pg_fls(list_length(live_childrels)));
 			parallel_workers = Min(parallel_workers,
    max_parallel_workers_per_gather);
 		}
@@ -1542,7 +1542,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		 * the planned number of parallel workers.
 		 */
 		parallel_workers = Max(parallel_workers,
-			   fls(list_length(live_childrels)));
+			   pg_fls(list_length(live_childrels)));
 		parallel_workers = Min(parallel_workers,
 			   max_parallel_workers_per_gather);
 		Assert(parallel_workers > 0);
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index f004fad1d9..3af7b55e7d 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -668,7 +668,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		if (enable_parallel_append)
 		{
 			parallel_workers = Max(parallel_workers,
-   fls(list_length(partial_pathlist)));
+   pg_fls(list_length(partial_pathlist)));
 			parallel_workers = Min(parallel_workers,
    max_parallel_workers_per_gather);
 		}
diff --git 

Re: Windows now has fdatasync()

2022-07-19 Thread Thomas Munro
On Wed, Jul 20, 2022 at 4:08 PM David Rowley  wrote:
> On Wed, 20 Jul 2022 at 15:22, Thomas Munro  wrote:
> > Ok, I've pushed the Windows patch.  I'll watch the build farm to see
> > if I've broken any of the frankentoolchain Windows animals.
>
> Just to get in there before the farm does... I just got a boatload of
> redefinition of HAVE_FDATASYNC warnings.  I see it already gets
> defined in pg_config.h
>
> All compiles cleanly with the attached.

Oops.  Thanks, pushed.




Re: Windows now has fdatasync()

2022-07-19 Thread David Rowley
On Wed, 20 Jul 2022 at 15:22, Thomas Munro  wrote:
> Ok, I've pushed the Windows patch.  I'll watch the build farm to see
> if I've broken any of the frankentoolchain Windows animals.

Just to get in there before the farm does... I just got a boatload of
redefinition of HAVE_FDATASYNC warnings.  I see it already gets
defined in pg_config.h

All compiles cleanly with the attached.

David
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 5ea66528fa..4de5bf3bf6 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -87,7 +87,9 @@
  * We have a replacement for fdatasync() in src/port/fdatasync.c, which is
  * unconditionally used by MSVC and Mingw builds.
  */
+#ifndef HAVE_FDATASYNC
 #define HAVE_FDATASYNC
+#endif
 
 #define USES_WINSOCK
 


Re: Memory leak fix in psql

2022-07-19 Thread Japin Li


On Wed, 20 Jul 2022 at 11:51, Michael Paquier  wrote:
> On Wed, Jul 20, 2022 at 03:14:35AM +, tanghy.f...@fujitsu.com wrote:
>> Your fix LGTM so please allow me to merge it in the attached patch. 
>> Based on your rebased version, now this new patch version is V3.
>
> What about the argument of upthread where we could use a goto in
> functions where there are multiple pattern validation checks?  Per se
> v4 attached.

+1. LGTM.

-- 
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.




Re: Is select_outer_pathkeys_for_merge() too strict now we have Incremental Sorts?

2022-07-19 Thread David Rowley
On Wed, 20 Jul 2022 at 15:02, David Rowley  wrote:
> I've attached a patch for this and it changes the plan for the above query to:

Looks like I based that patch on the wrong branch.

Here's another version of the patch that's based on master.

David
diff --git a/src/backend/optimizer/path/pathkeys.c 
b/src/backend/optimizer/path/pathkeys.c
index b5d6c977ee..5f0ead2db8 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1890,11 +1890,13 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
  * Since we assume here that a sort is required, there is no particular use
  * in matching any available ordering of the outerrel.  (joinpath.c has an
  * entirely separate code path for considering sort-free mergejoins.)  Rather,
- * it's interesting to try to match the requested query_pathkeys so that a
- * second output sort may be avoided; and failing that, we try to list "more
- * popular" keys (those with the most unmatched EquivalenceClass peers)
- * earlier, in hopes of making the resulting ordering useful for as many
- * higher-level mergejoins as possible.
+ * it's interesting to try to match, or match a prefix of the requested
+ * query_pathkeys so that a second output sort may be avoided or an
+ * incremental sort may be done instead.  We can get away with just a prefix
+ * of the query_pathkeys when that prefix covers the entire join condition.
+ * Failing that, we try to list "more popular" keys  (those with the most
+ * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting
+ * ordering useful for as many higher-level mergejoins as possible.
  */
 List *
 select_outer_pathkeys_for_merge(PlannerInfo *root,
@@ -1964,11 +1966,16 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
 
/*
 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
-* can generate a sort order that's also useful for final output. There 
is
-* no percentage in a partial match, though, so we have to have 'em all.
+* can generate a sort order that's also useful for final output. If we
+* only have a prefix of the query_pathkeys, and that prefix is the 
entire
+* join condition, then it's useful to use the prefix as the pathkeys as
+* this increases the chances that an incremental sort will be able to 
be
+* used by the upper planner.
 */
if (root->query_pathkeys)
{
+   int matches = 0;
+
foreach(lc, root->query_pathkeys)
{
PathKey*query_pathkey = (PathKey *) lfirst(lc);
@@ -1981,6 +1988,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
}
if (j >= necs)
break;  /* didn't find match */
+
+   matches++;
}
/* if we got to the end of the list, we have them all */
if (lc == NULL)
@@ -2003,6 +2012,23 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
}
}
}
+
+   /*
+* If we didn't find a full match, but matched all of the join 
clauses
+* then we'll make use of these as partially sorted input is 
better
+* than nothing for the upper planner as it may lead to 
incremental
+* sorts instead of full sorts.
+*/
+   else if (matches == nClauses)
+   {
+   pathkeys = list_copy_head(root->query_pathkeys, 
matches);
+
+   /* we have all of the join pathkeys, so nothing more to 
do */
+   pfree(ecs);
+   pfree(scores);
+
+   return pathkeys;
+   }
}
 
/*
diff --git a/src/test/regress/expected/join.out 
b/src/test/regress/expected/join.out
index e1d9d971d6..e83c552159 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2437,6 +2437,37 @@ select count(*) from
  1
 (1 row)
 
+set enable_hashjoin = 0;
+set enable_nestloop = 0;
+set enable_hashagg = 0;
+--
+-- check that we use the pathkeys from a prefix of the group by / order by
+-- clause for the join pathkeys when that prefix covers all join quals.  We
+-- expect this to lead to an incremental sort for the group by / order by.
+--
+explain (costs off)
+select x.thousand, x.twothousand, count(*)
+from tenk1 x inner join tenk1 y on x.thousand = y.thousand
+group by x.thousand, x.twothousand
+order by x.thousand desc, x.twothousand;
+QUERY PLAN 
   
+--
+ GroupAggregate
+   Group Key: x.thousand, x.twothousand
+   ->  Incremental Sort
+ Sort Key: x.thousand 

Re: Memory leak fix in psql

2022-07-19 Thread Michael Paquier
On Wed, Jul 20, 2022 at 03:14:35AM +, tanghy.f...@fujitsu.com wrote:
> Your fix LGTM so please allow me to merge it in the attached patch. 
> Based on your rebased version, now this new patch version is V3.

What about the argument of upthread where we could use a goto in
functions where there are multiple pattern validation checks?  Per se
v4 attached.
--
Michael
From ccc8bb83baf618ea1a481193403a9009cd4a24a5 Mon Sep 17 00:00:00 2001
From: Michael Paquier 
Date: Wed, 20 Jul 2022 12:47:52 +0900
Subject: [PATCH v4] fix the memory leak in psql describe

---
 src/bin/psql/describe.c | 239 +++-
 1 file changed, 212 insertions(+), 27 deletions(-)

diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 88d92a08ae..1c40cc6d59 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -112,7 +112,10 @@ describeAggregates(const char *pattern, bool verbose, bool showSystem)
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 4;");
 
@@ -182,7 +185,10 @@ describeAccessMethods(const char *pattern, bool verbose)
 NULL, "amname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -244,7 +250,10 @@ describeTablespaces(const char *pattern, bool verbose)
 NULL, "spcname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -534,7 +543,9 @@ describeFunctions(const char *functypes, const char *func_pattern,
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
-		return false;
+	{
+		goto error_return;
+	}
 
 	for (int i = 0; i < num_arg_patterns; i++)
 	{
@@ -561,7 +572,9 @@ describeFunctions(const char *functypes, const char *func_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
-return false;
+			{
+goto error_return;
+			}
 		}
 		else
 		{
@@ -599,6 +612,10 @@ describeFunctions(const char *functypes, const char *func_pattern,
 
 	PQclear(res);
 	return true;
+
+error_return:
+	termPQExpBuffer();
+	return false;
 }
 
 
@@ -682,7 +699,10 @@ describeTypes(const char *pattern, bool verbose, bool showSystem)
 "pg_catalog.format_type(t.oid, NULL)",
 "pg_catalog.pg_type_is_visible(t.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -836,7 +856,9 @@ describeOperators(const char *oper_pattern,
 "n.nspname", "o.oprname", NULL,
 "pg_catalog.pg_operator_is_visible(o.oid)",
 NULL, 3))
-		return false;
+	{
+		goto error_return;
+	}
 
 	if (num_arg_patterns == 1)
 		appendPQExpBufferStr(, "  AND o.oprleft = 0\n");
@@ -866,7 +888,9 @@ describeOperators(const char *oper_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
-return false;
+			{
+goto error_return;
+			}
 		}
 		else
 		{
@@ -890,6 +914,10 @@ describeOperators(const char *oper_pattern,
 
 	PQclear(res);
 	return true;
+
+error_return:
+	termPQExpBuffer();
+	return false;
 }
 
 
@@ -953,7 +981,10 @@ listAllDbs(const char *pattern, bool verbose)
 		if (!validateSQLNamePattern(, pattern, false, false,
 	NULL, "d.datname", NULL, NULL,
 	NULL, 1))
+		{
+			termPQExpBuffer();
 			return false;
+		}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 	res = PSQLexec(buf.data);
@@ -1106,7 +1137,10 @@ permissionsList(const char *pattern)
 "n.nspname", "c.relname", NULL,
 "n.nspname !~ '^pg_' AND pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -1177,16 +1211,15 @@ listDefaultACLs(const char *pattern)
 "pg_catalog.pg_get_userbyid(d.defaclrole)",
 NULL,
 NULL, 3))
-		return false;
+	{
+		goto error_return;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 3;");
 
 	res = PSQLexec(buf.data);
 	if (!res)
-	{
-		termPQExpBuffer();
-		return false;
-	}
+		goto error_return;
 
 	myopt.nullPrint = NULL;
 	printfPQExpBuffer(, _("Default access privileges"));
@@ -1200,6 +1233,10 @@ listDefaultACLs(const char *pattern)
 	termPQExpBuffer();
 	PQclear(res);
 	return true;
+
+error_return:
+	termPQExpBuffer();
+	return false;
 }
 
 
@@ -1253,7 +1290,9 @@ objectDescription(const char *pattern, bool showSystem)
 false, "n.nspname", "pgc.conname", NULL,
 "pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
-		return false;
+	{
+		goto error_return;
+	}
 
 	/* Domain constraint descriptions */
 	appendPQExpBuffer(,
@@ -1277,7 +1316,9 @@ objectDescription(const char *pattern, bool showSystem)
 false, "n.nspname", "pgc.conname", NULL,
 

Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Masahiko Sawada
On Wed, Jul 20, 2022 at 12:11 PM Amit Kapila  wrote:
>
> On Tue, Jul 19, 2022 at 7:28 PM Masahiko Sawada  wrote:
> >
> > On Tue, Jul 19, 2022 at 9:25 PM Amit Kapila  wrote:
> > >
> > > On Tue, Jul 19, 2022 at 1:10 PM Masahiko Sawada  
> > > wrote:
> > > >
> > >
> > > > BTW on backbranches, I think that the reason why we add
> > > > initial_running_xacts stuff to ReorderBuffer is that we cannot modify
> > > > SnapBuild that could be serialized. Can we add a (private) array for
> > > > the initial running xacts in snapbuild.c instead of adding new
> > > > variables to ReorderBuffer?
> > > >
> > >
> > > While thinking about this, I wonder if the current patch for back
> > > branches can lead to an ABI break as it changes the exposed structure?
> > > If so, it may be another reason to change it to some other way
> > > probably as you are suggesting.
> >
> > Yeah, it changes the size of ReorderBuffer, which is not good.
> >
>
> So, are you planning to give a try with your idea of making a private
> array for the initial running xacts?

Yes.

>  I am not sure but I guess you are
> proposing to add it in SnapBuild structure, if so, that seems safe as
> that structure is not exposed.

We cannot add it in SnapBuild as it gets serialized to the disk.

>
> > Changing the function names and arguments would also break ABI. So
> > probably we cannot do the above idea of removing
> > ReorderBufferInitialXactsSetCatalogChanges() as well.
> >
>
> Why do you think we can't remove
> ReorderBufferInitialXactsSetCatalogChanges() from the back branch
> patch? I think we don't need to change the existing function
> ReorderBufferXidHasCatalogChanges() but instead can have a wrapper
> like SnapBuildXidHasCatalogChanges() similar to master branch patch.

IIUC we need to change SnapBuildCommitTxn() but it's exposed.

Currently, we call like DecodeCommit() -> SnapBuildCommitTxn() ->
ReorderBufferXidHasCatalogChanges(). If we have a wrapper function, we
call like DecodeCommit() -> SnapBuildCommitTxn() ->
SnapBuildXidHasCatalogChanges() ->
ReorderBufferXidHasCatalogChanges(). In
SnapBuildXidHasCatalogChanges(), we need to check if the transaction
has XACT_XINFO_HAS_INVALS, which means DecodeCommit() needs to pass
either parsed->xinfo or (parsed->xinfo & XACT_XINFO_HAS_INVALS != 0)
down to SnapBuildXidHasCatalogChanges(). However, since
SnapBuildCommitTxn(), between DecodeCommit() and
SnapBuildXidHasCatalogChanges(), is exposed we cannot change it.

Another idea would be to have functions, say
SnapBuildCommitTxnWithXInfo() and SnapBuildCommitTxn_ext(). The latter
does actual work of handling transaction commits and both
SnapBuildCommitTxn() and SnapBuildCommit() call
SnapBuildCommitTxnWithXInfo() with different arguments.

Regards,

-- 
Masahiko Sawada
EDB:  https://www.enterprisedb.com/




Re: Windows now has fdatasync()

2022-07-19 Thread Thomas Munro
Ok, I've pushed the Windows patch.  I'll watch the build farm to see
if I've broken any of the frankentoolchain Windows animals.

Mikael kindly upgraded conchuela, so that leaves just prairiedog
without fdatasync.  I've attached a patch to drop the configure probe
for that once prairiedog's host is reassigned to new duties, if we're
agreed on that.

While in this part of the code I noticed another anachronism that
could be cleaned up: our handling of the old pre-standard BSD O_FSYNC
flag.  Pulling on that I noticed I could remove a bunch of associated
macrology.
From b78c07a573c9f7e634eb9d33f57b07e9c37bfb47 Mon Sep 17 00:00:00 2001
From: Thomas Munro 
Date: Wed, 20 Jul 2022 12:32:26 +1200
Subject: [PATCH 1/2] Remove O_FSYNC and associated macros.

O_FSYNC was a pre-standard way of spelling O_SYNC.  It's not needed on
any modern system.  We can just use O_SYNC directly, if it exists, and
get rid of our OPEN_SYNC_FLAG macro.

Likewise for O_DSYNC, we can just use it directly, if it exists, and get
rid of our OPEN_DATASYNC_FLAG macro.  The only complication is that we
still want to avoid choosing open_datasync as a default if O_DSYNC has
the same value as O_SYNC, so there is no change in behavior.
---
 src/backend/access/transam/xlog.c | 20 ++--
 src/bin/pg_test_fsync/pg_test_fsync.c | 12 ++--
 src/include/access/xlogdefs.h | 19 +--
 3 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9854b51c6a..1da3b8eb2e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -171,10 +171,10 @@ const struct config_enum_entry sync_method_options[] = {
 #ifdef HAVE_FDATASYNC
 	{"fdatasync", SYNC_METHOD_FDATASYNC, false},
 #endif
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 	{"open_sync", SYNC_METHOD_OPEN, false},
 #endif
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 	{"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
 #endif
 	{NULL, 0, false}
@@ -7894,10 +7894,10 @@ get_sync_bit(int method)
 
 	/*
 	 * Optimize writes by bypassing kernel cache with O_DIRECT when using
-	 * O_SYNC/O_FSYNC and O_DSYNC.  But only if archiving and streaming are
-	 * disabled, otherwise the archive command or walsender process will read
-	 * the WAL soon after writing it, which is guaranteed to cause a physical
-	 * read if we bypassed the kernel cache. We also skip the
+	 * O_SYNC and O_DSYNC.  But only if archiving and streaming are disabled,
+	 * otherwise the archive command or walsender process will read the WAL
+	 * soon after writing it, which is guaranteed to cause a physical read if
+	 * we bypassed the kernel cache. We also skip the
 	 * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
 	 * reason.
 	 *
@@ -7921,13 +7921,13 @@ get_sync_bit(int method)
 		case SYNC_METHOD_FSYNC_WRITETHROUGH:
 		case SYNC_METHOD_FDATASYNC:
 			return 0;
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 		case SYNC_METHOD_OPEN:
-			return OPEN_SYNC_FLAG | o_direct_flag;
+			return O_SYNC | o_direct_flag;
 #endif
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 		case SYNC_METHOD_OPEN_DSYNC:
-			return OPEN_DATASYNC_FLAG | o_direct_flag;
+			return O_DSYNC | o_direct_flag;
 #endif
 		default:
 			/* can't happen (unless we are out of sync with option array) */
diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c
index f7bc199a30..6739214eb8 100644
--- a/src/bin/pg_test_fsync/pg_test_fsync.c
+++ b/src/bin/pg_test_fsync/pg_test_fsync.c
@@ -300,7 +300,7 @@ test_sync(int writes_per_op)
 	printf(LABEL_FORMAT, "open_datasync");
 	fflush(stdout);
 
-#ifdef OPEN_DATASYNC_FLAG
+#ifdef O_DSYNC
 	if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
@@ -407,8 +407,8 @@ test_sync(int writes_per_op)
 	printf(LABEL_FORMAT, "open_sync");
 	fflush(stdout);
 
-#ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
+#ifdef O_SYNC
+	if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
 		fs_warning = true;
@@ -466,7 +466,7 @@ test_open_syncs(void)
 static void
 test_open_sync(const char *msg, int writes_size)
 {
-#ifdef OPEN_SYNC_FLAG
+#ifdef O_SYNC
 	int			tmpfile,
 ops,
 writes;
@@ -475,8 +475,8 @@ test_open_sync(const char *msg, int writes_size)
 	printf(LABEL_FORMAT, msg);
 	fflush(stdout);
 
-#ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
+#ifdef O_SYNC
+	if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
 		printf(NA_FORMAT, _("n/a*"));
 	else
 	{
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index a47e3eeb1f..a720169b17 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -70,27 +70,10 @@ typedef uint16 RepOriginId;
  * default method.  

Re: System column support for partitioned tables using heap

2022-07-19 Thread Morris de Oryx
On Tue, Jul 19, 2022 at 10:38 PM Robert Haas  wrote:


> For MERGE itself, I wonder if some information about this should be
> included in the command tag. It looks like MERGE already includes some
> sort of row count in the command tag, but I guess perhaps it doesn't
> distinguish between inserts and updates. I don't know why we couldn't
> expose multiple values this way, though.

It would be great to get some sort of feedback from MERGE accessible
through SQL results, even if that doesn't come in the form of a RETURNING
list.

> I wonder whether you could just have the CTEs bubble up 1 or 0 and
> then sum them at some stage, instead of relying on xmax. Presumably
> your UPSERT simulation knows which thing it did in each case.

It might help if I show a sample insert handling function. The issue is
with the line at the end of the top CTE, insert_rows:

returning xmax as inserted_transaction_id),

That's what fails on partitions. Is there an alternative way to test what
happened to the row(s)? here's the full function. . I wrote a code
generator, so I don't have to hand-code all of these bits for each
table+version:

-- Create a function to accept an array of rows formatted as item_type_v1
for UPSERT into item_type.
DROP FUNCTION IF EXISTS types_plus.insert_item_type_v1
(types_plus.item_type_v1[]);

CREATE OR REPLACE FUNCTION types_plus.insert_item_type_v1 (data_in
types_plus.item_type_v1[])

RETURNS TABLE (
   insert_countinteger,
   estimated_update_count  integer,
   transaction_id  text)

LANGUAGE SQL

BEGIN ATOMIC

-- The CTE below is a roundabout way of returning an insertion count from a
pure SQL function in Postgres.
WITH
inserted_rows as (
INSERT INTO item_type (
id,
marked_for_deletion,
name_)

SELECT
rows_in.id,
rows_in.marked_for_deletion,
rows_in.name_

FROM unnest(data_in) as rows_in

ON CONFLICT(id) DO UPDATE SET
marked_for_deletion = EXCLUDED.marked_for_deletion,
name_   = EXCLUDED.name_

returning xmax as inserted_transaction_id),

status_data AS (
 select count(*) FILTER (where inserted_transaction_id  = 0) AS
insert_count,
count(*) FILTER (where inserted_transaction_id != 0) AS
estimated_update_count,
pg_current_xact_id_if_assigned()::text   AS
transaction_id

   from inserted_rows),

insert_log_entry AS (
   INSERT INTO insert_log (
   data_file_id,
   ib_version,
   job_run_id,

  schema_name,
  table_name,
  records_submitted,
  insert_count,
  estimated_update_count)

SELECT
   coalesce_session_variable(
 'data_file_id',
 '')::uuid,

   coalesce_session_variable('ib_version'),  -- Default result is ''

   coalesce_session_variable(
 'job_run_id',
 '')::uuid,

   'ascendco',
   'item_type',
   (select cardinality(data_in)),
   insert_count,
   estimated_update_count

  FROM status_data
)

-- Final output/result.
   select insert_count,
  estimated_update_count,
  transaction_id

  from status_data;

END;


RE: Memory leak fix in psql

2022-07-19 Thread tanghy.f...@fujitsu.com
On Tuesday, July 19, 2022 7:41 PM, Japin Li  wrote:
> After looking around, I found psql/describe.c also has some memory
> leaks,
> attached a patch to fix these leaks.

Thanks for your check and improvement.
Your fix LGTM so please allow me to merge it in the attached patch. 
Based on your rebased version, now this new patch version is V3.

Regards,
Tang



v3-0001-fix-the-memory-leak-in-psql-describe.patch
Description: v3-0001-fix-the-memory-leak-in-psql-describe.patch


Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Amit Kapila
On Tue, Jul 19, 2022 at 7:28 PM Masahiko Sawada  wrote:
>
> On Tue, Jul 19, 2022 at 9:25 PM Amit Kapila  wrote:
> >
> > On Tue, Jul 19, 2022 at 1:10 PM Masahiko Sawada  
> > wrote:
> > >
> >
> > > BTW on backbranches, I think that the reason why we add
> > > initial_running_xacts stuff to ReorderBuffer is that we cannot modify
> > > SnapBuild that could be serialized. Can we add a (private) array for
> > > the initial running xacts in snapbuild.c instead of adding new
> > > variables to ReorderBuffer?
> > >
> >
> > While thinking about this, I wonder if the current patch for back
> > branches can lead to an ABI break as it changes the exposed structure?
> > If so, it may be another reason to change it to some other way
> > probably as you are suggesting.
>
> Yeah, it changes the size of ReorderBuffer, which is not good.
>

So, are you planning to give a try with your idea of making a private
array for the initial running xacts? I am not sure but I guess you are
proposing to add it in SnapBuild structure, if so, that seems safe as
that structure is not exposed.

> Changing the function names and arguments would also break ABI. So
> probably we cannot do the above idea of removing
> ReorderBufferInitialXactsSetCatalogChanges() as well.
>

Why do you think we can't remove
ReorderBufferInitialXactsSetCatalogChanges() from the back branch
patch? I think we don't need to change the existing function
ReorderBufferXidHasCatalogChanges() but instead can have a wrapper
like SnapBuildXidHasCatalogChanges() similar to master branch patch.

-- 
With Regards,
Amit Kapila.




Is select_outer_pathkeys_for_merge() too strict now we have Incremental Sorts?

2022-07-19 Thread David Rowley
Hackers,

Currently, if we have a query such as:

SELECT a,b, COUNT(*)
FROM a
INNER JOIN b on a.a = b.x
GROUP BY a,b
ORDER BY a DESC, b;

With enable_hashagg = off, we get the following plan:

  QUERY PLAN
---
 GroupAggregate
   Group Key: a.a, a.b
   ->  Sort
 Sort Key: a.a DESC, a.b
 ->  Merge Join
   Merge Cond: (a.a = b.x)
   ->  Sort
 Sort Key: a.a
 ->  Seq Scan on a
   ->  Sort
 Sort Key: b.x
 ->  Seq Scan on b

We can see that the merge join picked to sort the input on a.a rather
than a.a DESC.  This is due to the way
select_outer_pathkeys_for_merge() only picks the query_pathkeys as a
prefix of the join pathkeys if we can find all of the join pathkeys in
the query_pathkeys.

I think we can relax this now that we have incremental sort.  I think
a better way to limit this is to allow a prefix of the query_pathkeys
providing that covers *all* of the join pathkeys.  That way, for the
above query, it leaves it open for the planner to do the Merge Join by
sorting by a.a DESC then just do an Incremental Sort to get the
GroupAggregate input sorted by a.b.

I've attached a patch for this and it changes the plan for the above query to:

   QUERY PLAN

 GroupAggregate
   Group Key: a.a, a.b
   ->  Incremental Sort
 Sort Key: a.a DESC, a.b
 Presorted Key: a.a
 ->  Merge Join
   Merge Cond: (a.a = b.x)
   ->  Sort
 Sort Key: a.a DESC
 ->  Seq Scan on a
   ->  Sort
 Sort Key: b.x DESC
 ->  Seq Scan on b

The current behaviour is causing me a bit of trouble in plan
regression for the ORDER BY / DISTINCT aggregate improvement patch
that I have pending.

Is there any reason that we shouldn't do this?

David
diff --git a/src/backend/optimizer/path/pathkeys.c 
b/src/backend/optimizer/path/pathkeys.c
index 11de286e60..cf08fcac51 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1924,11 +1924,13 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
  * Since we assume here that a sort is required, there is no particular use
  * in matching any available ordering of the outerrel.  (joinpath.c has an
  * entirely separate code path for considering sort-free mergejoins.)  Rather,
- * it's interesting to try to match the requested query_pathkeys so that a
- * second output sort may be avoided; and failing that, we try to list "more
- * popular" keys (those with the most unmatched EquivalenceClass peers)
- * earlier, in hopes of making the resulting ordering useful for as many
- * higher-level mergejoins as possible.
+ * it's interesting to try to match, or match a prefix of the requested
+ * query_pathkeys so that a second output sort may be avoided.  We can get
+ * away with just a prefix of the query_pathkeys when that prefix covers the
+ * entire join condition.  Failing that, we try to list "more popular" keys
+ *  (those with the most unmatched EquivalenceClass peers) earlier, in hopes
+ * of making the resulting ordering useful for as many higher-level mergejoins
+ * as possible.
  */
 List *
 select_outer_pathkeys_for_merge(PlannerInfo *root,
@@ -1998,11 +2000,16 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
 
/*
 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
-* can generate a sort order that's also useful for final output. There 
is
-* no percentage in a partial match, though, so we have to have 'em all.
+* can generate a sort order that's also useful for final output. If we
+* only have a prefix of the query_pathkeys, and that prefix is the 
entire
+* join condition, then it's useful to use the prefix as the pathkeys as
+* this increases the chances that an incremental sort will be able to 
be
+* used by the upper planner.
 */
if (root->query_pathkeys)
{
+   int matches = 0;
+
foreach(lc, root->query_pathkeys)
{
PathKey*query_pathkey = (PathKey *) lfirst(lc);
@@ -2015,6 +2022,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
}
if (j >= necs)
break;  /* didn't find match */
+
+   matches++;
}
/* if we got to the end of the list, we have them all */
if (lc == NULL)
@@ -2037,6 +2046,22 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
}
}
}
+
+   /*
+* If we didn't find a full match, but matched all of the join 
clauses

Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Masahiko Sawada
On Wed, Jul 20, 2022 at 9:58 AM Kyotaro Horiguchi
 wrote:
>
> At Tue, 19 Jul 2022 17:31:07 +0900, Masahiko Sawada  
> wrote in
> > On Tue, Jul 19, 2022 at 4:35 PM Kyotaro Horiguchi
> >  wrote:
> > > At Tue, 19 Jul 2022 10:17:15 +0530, Amit Kapila  
> > > wrote in
> > > > Good work. I wonder without comments this may create a problem in the
> > > > future. OTOH, I don't see adding a check "catchange.xcnt > 0" before
> > > > freeing the memory any less robust. Also, for consistency, we can use
> > > > a similar check based on xcnt in the SnapBuildRestore to free the
> > > > memory in the below code:
> > > > + /* set catalog modifying transactions */
> > > > + if (builder->catchange.xip)
> > > > + pfree(builder->catchange.xip);
> > >
> > > But xip must be positive there.  We can add a comment explains that.
> > >
> >
> > Yes, if we add the comment for it, probably we need to explain a gcc's
> > optimization but it seems to be too much to me.
>
> Ah, sorry. I confused with other place in SnapBuildPurgeCommitedTxn.
> I agree to you, that we don't need additional comment *there*.
>
> > > +   catchange_xip = 
> > > ReorderBufferGetCatalogChangesXacts(builder->reorder);
> > >
> > > catchange_xip is allocated in the current context, but ondisk is
> > > allocated in builder->context.  I see it kind of inconsistent (even if
> > > the current context is same with build->context).
> >
> > Right. I thought that since the lifetime of catchange_xip is short,
> > until the end of SnapBuildSerialize() function we didn't need to
> > allocate it in builder->context. But given ondisk, we need to do that
> > for catchange_xip as well. Will fix it.
>
> Thanks.
>
> > > +   if (builder->committed.xcnt > 0)
> > > +   {
> > >
> > > It seems to me comitted.xip is always non-null, so we don't need this.
> > > I don't strongly object to do that, though.
> >
> > But committed.xcnt could be 0, right? We don't need to copy anything
> > by calling memcpy with size = 0 in this case. Also, it looks more
> > consistent with what we do for catchange_xcnt.
>
> Mmm. the patch changed that behavior. AllocateSnapshotBuilder always
> allocate the array with a fixed size. SnapBuildAddCommittedTxn still
 > assumes builder->committed.xip is non-NULL.  SnapBuildRestore *kept*
> ondisk.builder.commited.xip populated with a valid array pointer. But
> the patch allows committed.xip be NULL, thus in that case,
> SnapBuildAddCommitedTxn calls repalloc(NULL) which triggers assertion
> failure.

IIUC the patch doesn't allow committed.xip to be NULL since we don't
overwrite it if builder->committed.xcnt is 0 (i.e.,
ondisk.builder.committed.xip is NULL):

builder->committed.xcnt = ondisk.builder.committed.xcnt;
/* We only allocated/stored xcnt, not xcnt_space xids ! */
/* don't overwrite preallocated xip, if we don't have anything here */
if (builder->committed.xcnt > 0)
{
pfree(builder->committed.xip);
builder->committed.xcnt_space = ondisk.builder.committed.xcnt;
builder->committed.xip = ondisk.builder.committed.xip;
}

Regards,

-- 
Masahiko Sawada
EDB:  https://www.enterprisedb.com/




Re: Expose last replayed timeline ID along with last replayed LSN

2022-07-19 Thread Kyotaro Horiguchi
At Tue, 19 Jul 2022 14:28:40 +0530, Bharath Rupireddy 
 wrote in 
> Hi,
> 
> At times it's useful to know the last replayed WAL record's timeline
> ID (especially on the standbys that are lagging in applying WAL while
> failing over - for reporting, logging and debugging purposes). AFICS,
> there's no function that exposes the last replayed TLI. We can either
> change the existing pg_last_wal_replay_lsn() to report TLI along with
> the LSN which might break the compatibility or introduce a new
> function pg_last_wal_replay_info() that emits both LSN and TLI. I'm
> fine with either of the approaches, but for now, I'm attaching a WIP
> patch that adds a new function pg_last_wal_replay_info().
> 
> Thoughts?

There was a more comprehensive discussion [1], which went nowhere..

[1] https://www.postgresql.org/message-id/20191211052002.GK72921%40paquier.xyz

regadrs.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: errdetail/errhint style

2022-07-19 Thread Michael Paquier
On Tue, Jul 19, 2022 at 09:51:13PM +0900, Michael Paquier wrote:
> +1.

I have looked at that and added the extra periods, and applied it.
Thanks, Justin.
--
Michael


signature.asc
Description: PGP signature


Re: Use "WAL segment" instead of "log segment" consistently in user-facing messages

2022-07-19 Thread Kyotaro Horiguchi
At Tue, 19 Jul 2022 09:58:28 -0700, Nathan Bossart  
wrote in 
> On Tue, Jul 19, 2022 at 02:43:59PM +0530, Bharath Rupireddy wrote:
> > Done. PSA v5 patch set.
> 
> LGTM.  I've marked this as ready-for-committer.

I find the following sentense in config.sgml. "Specifies the minimum
size of past log file segments kept in the pg_wal directory"

postgresql.conf.sample contains "logfile segment" in a few lines.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Kyotaro Horiguchi
At Tue, 19 Jul 2022 17:31:07 +0900, Masahiko Sawada  
wrote in 
> On Tue, Jul 19, 2022 at 4:35 PM Kyotaro Horiguchi
>  wrote:
> > At Tue, 19 Jul 2022 10:17:15 +0530, Amit Kapila  
> > wrote in
> > > Good work. I wonder without comments this may create a problem in the
> > > future. OTOH, I don't see adding a check "catchange.xcnt > 0" before
> > > freeing the memory any less robust. Also, for consistency, we can use
> > > a similar check based on xcnt in the SnapBuildRestore to free the
> > > memory in the below code:
> > > + /* set catalog modifying transactions */
> > > + if (builder->catchange.xip)
> > > + pfree(builder->catchange.xip);
> >
> > But xip must be positive there.  We can add a comment explains that.
> >
> 
> Yes, if we add the comment for it, probably we need to explain a gcc's
> optimization but it seems to be too much to me.

Ah, sorry. I confused with other place in SnapBuildPurgeCommitedTxn.
I agree to you, that we don't need additional comment *there*.

> > +   catchange_xip = 
> > ReorderBufferGetCatalogChangesXacts(builder->reorder);
> >
> > catchange_xip is allocated in the current context, but ondisk is
> > allocated in builder->context.  I see it kind of inconsistent (even if
> > the current context is same with build->context).
> 
> Right. I thought that since the lifetime of catchange_xip is short,
> until the end of SnapBuildSerialize() function we didn't need to
> allocate it in builder->context. But given ondisk, we need to do that
> for catchange_xip as well. Will fix it.

Thanks.

> > +   if (builder->committed.xcnt > 0)
> > +   {
> >
> > It seems to me comitted.xip is always non-null, so we don't need this.
> > I don't strongly object to do that, though.
> 
> But committed.xcnt could be 0, right? We don't need to copy anything
> by calling memcpy with size = 0 in this case. Also, it looks more
> consistent with what we do for catchange_xcnt.

Mmm. the patch changed that behavior. AllocateSnapshotBuilder always
allocate the array with a fixed size. SnapBuildAddCommittedTxn still
assumes builder->committed.xip is non-NULL.  SnapBuildRestore *kept*
ondisk.builder.commited.xip populated with a valid array pointer. But
the patch allows committed.xip be NULL, thus in that case,
SnapBuildAddCommitedTxn calls repalloc(NULL) which triggers assertion
failure.

> > +   Assert((xcnt > 0) && (xcnt == rb->catchange_ntxns));
> >
> > (xcnt > 0) is obvious here (otherwise means dlist_foreach is broken..).
> > (xcnt == rb->catchange_ntxns) is not what should be checked here. The
> > assert just requires that catchange_txns and catchange_ntxns are
> > consistent so it should be checked just after dlist_empty.. I think.
> >
> 
> If we want to check if catchange_txns and catchange_ntxns are
> consistent, should we check (xcnt == rb->catchange_ntxns) as well, no?
> This function requires the caller to use rb->catchange_ntxns as the
> length of the returned array. I think this assertion ensures that the
> actual length of the array is consistent with the length we
> pre-calculated.

Sorry again. Please forget the comment about xcnt == rb->catchange_ntxns..

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Add a test for "cannot truncate foreign table"

2022-07-19 Thread Yugo NAGATA
On Wed, 20 Jul 2022 09:38:17 +0900
Fujii Masao  wrote:

> 
> 
> On 2022/07/15 16:52, Fujii Masao wrote:
> > 
> > 
> > On 2022/07/08 17:03, Etsuro Fujita wrote:
> >> Rather than doing so, I'd vote for adding a test case to file_fdw, as
> >> proposed in the patch, because that would be much simpler and much
> >> less expensive.
> > 
> > So ISTM that most agreed to push Nagata-san's patch adding the test for 
> > TRUNCATE on foreign table with file_fdw. So barring any objection, I will 
> > commit the patch.
> 
> Pushed. Thanks!

Thanks!

> 
> Regards,
> 
> -- 
> Fujii Masao
> Advanced Computing Technology Center
> Research and Development Headquarters
> NTT DATA CORPORATION


-- 
Yugo NAGATA 




Re: Add a test for "cannot truncate foreign table"

2022-07-19 Thread Fujii Masao




On 2022/07/15 16:52, Fujii Masao wrote:



On 2022/07/08 17:03, Etsuro Fujita wrote:

Rather than doing so, I'd vote for adding a test case to file_fdw, as
proposed in the patch, because that would be much simpler and much
less expensive.


So ISTM that most agreed to push Nagata-san's patch adding the test for 
TRUNCATE on foreign table with file_fdw. So barring any objection, I will 
commit the patch.


Pushed. Thanks!

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION




Re: Memory leak fix in psql

2022-07-19 Thread Michael Paquier
On Tue, Jul 19, 2022 at 09:43:21AM -0700, Mark Dilger wrote:
> This looks ok, but comments down-thread seem reasonable, so I
> suspect a new patch will be needed.  Would you like to author it, or
> would you prefer that I, as the guilty party, do so? 

If any of you could update the patch, that would be great.  Thanks!
--
Michael


signature.asc
Description: PGP signature


Re: Memory leak fix in psql

2022-07-19 Thread Michael Paquier
On Tue, Jul 19, 2022 at 12:36:31PM -0400, Tom Lane wrote:
> Yeah, it's old school, but please let's not have a few functions that
> do it randomly differently from all their neighbors.

True enough.  And it is not like we should free the PQExpBuffer given
by the caller in validateSQLNamePattern().
--
Michael


signature.asc
Description: PGP signature


Re: [Commitfest 2022-07] Begins Now

2022-07-19 Thread Joe Conway

On 7/18/22 02:53, Alvaro Herrera wrote:

On 2022-Jul-18, Aleksander Alekseev wrote:


Hi hackers,


If someone put a lot of review into a patchset a few months ago, they
absolutely deserve credit. But if that entry has been sitting with no
feedback this month, why is it useful to keep that Reviewer around?


As I recall, several committers reported before that they use
Reviewers field in the CF application when writing the commit message.
I would argue that this is the reason.


Maybe we need two separate reviewer columns -- one for credits
(historical tracking) and one for people currently reviewing a patch.
So we definitely expect an email "soon" from someone in the second
column, but not from somebody who is only in the first column.


+1


--
Joe Conway
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com




Re: Convert planner's AggInfo and AggTransInfo to Nodes

2022-07-19 Thread Tom Lane
I wrote:
> * WRITE_OID_ARRAY and WRITE_BOOL_ARRAY needed extension to handle a null
> array pointer.  I think we should make all the WRITE_FOO_ARRAY macros
> work alike, so I added that to all of them.  I first tried to make the
> rest work like WRITE_INDEX_ARRAY, but that failed because readfuncs.c
> isn't expecting "<>" for an empty array; it's expecting nothing at
> all.  (Note there is no readfuncs equivalent to WRITE_INDEX_ARRAY.)
> What I've done here is to change WRITE_INDEX_ARRAY to work like the
> others and print nothing for an empty array, but I wonder if now
> wouldn't be a good time to redefine the serialized representation
> to be more robust.  I'm imagining "<>" for a NULL array pointer and
> "(item item item)" otherwise, allowing a cross-check that we're
> getting the right number of items.

Concretely, about like this.

regards, tom lane

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 2b85f97f39..fe97d559b6 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -16,6 +16,7 @@
 
 #include 
 
+#include "access/attnum.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/bitmapset.h"
@@ -96,48 +97,30 @@ static void outChar(StringInfo str, char c);
 	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
 	 outBitmapset(str, node->fldname))
 
+/* Write a variable-length array of AttrNumber */
 #define WRITE_ATTRNUMBER_ARRAY(fldname, len) \
-	do { \
-		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %d", node->fldname[i]); \
-	} while(0)
+	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
+	 writeAttrNumberCols(str, node->fldname, len))
 
+/* Write a variable-length array of Oid */
 #define WRITE_OID_ARRAY(fldname, len) \
-	do { \
-		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %u", node->fldname[i]); \
-	} while(0)
+	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
+	 writeOidCols(str, node->fldname, len))
 
-/*
- * This macro supports the case that the field is NULL.  For the other array
- * macros, that is currently not needed.
- */
+/* Write a variable-length array of Index */
 #define WRITE_INDEX_ARRAY(fldname, len) \
-	do { \
-		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		if (node->fldname) \
-			for (int i = 0; i < len; i++) \
-appendStringInfo(str, " %u", node->fldname[i]); \
-		else \
-			appendStringInfoString(str, "<>"); \
-	} while(0)
+	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
+	 writeIndexCols(str, node->fldname, len))
 
+/* Write a variable-length array of int */
 #define WRITE_INT_ARRAY(fldname, len) \
-	do { \
-		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %d", node->fldname[i]); \
-	} while(0)
+	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
+	 writeIntCols(str, node->fldname, len))
 
+/* Write a variable-length array of bool */
 #define WRITE_BOOL_ARRAY(fldname, len) \
-	do { \
-		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %s", booltostr(node->fldname[i])); \
-	} while(0)
-
+	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
+	 writeBoolCols(str, node->fldname, len))
 
 #define booltostr(x)  ((x) ? "true" : "false")
 
@@ -196,6 +179,37 @@ outChar(StringInfo str, char c)
 	outToken(str, in);
 }
 
+/*
+ * common implementation for scalar-array-writing functions
+ *
+ * The data format is either "<>" for a NULL pointer or "(item item item)".
+ * fmtstr must include a leading space.
+ * convfunc can be empty, or the name of a conversion macro or function.
+ */
+#define WRITE_SCALAR_ARRAY(fnname, datatype, fmtstr, convfunc) \
+static void \
+fnname(StringInfo str, const datatype *arr, int len) \
+{ \
+	if (arr != NULL) \
+	{ \
+		appendStringInfoChar(str, '('); \
+		for (int i = 0; i < len; i++) \
+			appendStringInfo(str, fmtstr, convfunc(arr[i])); \
+		appendStringInfoChar(str, ')'); \
+	} \
+	else \
+		appendStringInfoString(str, "<>"); \
+}
+
+WRITE_SCALAR_ARRAY(writeAttrNumberCols, AttrNumber, " %d",)
+WRITE_SCALAR_ARRAY(writeOidCols, Oid, " %u",)
+WRITE_SCALAR_ARRAY(writeIndexCols, Index, " %u",)
+WRITE_SCALAR_ARRAY(writeIntCols, int, " %d",)
+WRITE_SCALAR_ARRAY(writeBoolCols, bool, " %s", booltostr)
+
+/*
+ * Print a List.
+ */
 static void
 _outList(StringInfo str, const List *node)
 {
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index a2391280be..c77c3984ca 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -502,97 +502,45 @@ readDatum(bool typbyval)
 }
 
 /*
- * readAttrNumberCols
- */
-AttrNumber *
-readAttrNumberCols(int numCols)
-{
-	int			tokenLength,
-i;
-	const char *token;
-	

Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 08:31:49 -0700, Andres Freund wrote:
> But I think we probably should err on the side of adding more
> declarations, rather than the oposite.

To see if there's other declarations that should be added, I used
https://codesearch.debian.net/search?q=load_external_function=1=1

which shows plpgsql_check and hstore_pllua. All the hstore symbols for
the latter are exported already.

Greetings,

Andres Freund




Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 16:28:07 +0200, Alvaro Herrera wrote:
> Anyway, the minimal patch that makes plpgsql_check tests pass is
> attached.

Do you want to commit that or should I?

Greetings,

Andres Freund




Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 15:08:38 -0700, Jacob Champion wrote:
> v2 adds escaping to pg_clean_ascii(). My original attempt used
> StringInfo allocation, but that didn't play well with guc_malloc(), so
> I switched to a two-pass API where the caller allocates. Let me know
> if I'm missing something obvious; this way is more verbose than I'd
> like...

Hm, that's pretty awkward. Perhaps we can have a better API for
everything but guc.c?

Or alternatively, perhaps we can just make pg_clean_ascii() return NULL
if allocation failed and then guc_strdup() the result in guc.c?

If we end up needing a two phase approach, why use the same function for
both phases? That seems quite awkward.

Greetings,

Andres Freund




Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Jacob Champion
On Tue, Jul 19, 2022 at 10:09 AM Andres Freund  wrote:
> On 2022-07-19 12:39:43 -0400, Tom Lane wrote:
> > Having said that, I struggle to see why we are panicking about badly
> > encoded log data from this source while blithely ignoring the problems
> > posed by non-ASCII role names, database names, and tablespace names.
>
> I think we should fix these as well. I'm not as concerned about post-auth
> encoding issues (i.e. tablespace name) as about pre-auth data (role name,
> database name) - obviously being allowed to log in already is a pretty good
> filter...

v2 adds escaping to pg_clean_ascii(). My original attempt used
StringInfo allocation, but that didn't play well with guc_malloc(), so
I switched to a two-pass API where the caller allocates. Let me know
if I'm missing something obvious; this way is more verbose than I'd
like...

Thanks,
--Jacob
From d99b59652f0b8479a5df0bc50357b5c4617f9fc2 Mon Sep 17 00:00:00 2001
From: Jacob Champion 
Date: Tue, 19 Jul 2022 10:48:58 -0700
Subject: [PATCH v2 1/2] pg_clean_ascii(): escape bytes rather than lose them

Rather than replace each unprintable byte with a '?' character, replace
it with a hex escape instead. The API is now two-pass (one pass to get
the escaped length of the string, the second pass to perform the
escaping), in order to allow the use of guc_malloc'd buffers.
---
 src/backend/postmaster/postmaster.c |  4 +--
 src/backend/utils/misc/guc.c| 10 ++--
 src/common/string.c | 38 ++---
 src/include/common/string.h |  2 +-
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 1c25457526..8f5cdf4380 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2284,9 +2284,9 @@ retry1:
  */
 if (strcmp(nameptr, "application_name") == 0)
 {
-	char	   *tmp_app_name = pstrdup(valptr);
+	char	   *tmp_app_name = palloc(pg_clean_ascii(valptr, NULL));
 
-	pg_clean_ascii(tmp_app_name);
+	pg_clean_ascii(valptr, tmp_app_name);
 
 	port->application_name = tmp_app_name;
 }
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 0328029d43..2e1a7af315 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -12480,7 +12480,10 @@ static bool
 check_application_name(char **newval, void **extra, GucSource source)
 {
 	/* Only allow clean ASCII chars in the application name */
-	pg_clean_ascii(*newval);
+	char *buf = guc_malloc(ERROR, pg_clean_ascii(*newval, NULL));
+
+	pg_clean_ascii(*newval, buf);
+	*newval = buf;
 
 	return true;
 }
@@ -12496,7 +12499,10 @@ static bool
 check_cluster_name(char **newval, void **extra, GucSource source)
 {
 	/* Only allow clean ASCII chars in the cluster name */
-	pg_clean_ascii(*newval);
+	char *buf = guc_malloc(ERROR, pg_clean_ascii(*newval, NULL));
+
+	pg_clean_ascii(*newval, buf);
+	*newval = buf;
 
 	return true;
 }
diff --git a/src/common/string.c b/src/common/string.c
index 16940d1fa7..82a8afa4a9 100644
--- a/src/common/string.c
+++ b/src/common/string.c
@@ -22,6 +22,7 @@
 #endif
 
 #include "common/string.h"
+#include "lib/stringinfo.h"
 
 
 /*
@@ -59,9 +60,11 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
 
 
 /*
- * pg_clean_ascii -- Replace any non-ASCII chars with a '?' char
+ * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
  *
- * Modifies the string passed in which must be '\0'-terminated.
+ * Puts an escaped copy into the dst buffer, which must be at least as big as
+ * the return value of pg_clean_ascii(src, NULL). The input string must be
+ * '\0'-terminated.
  *
  * This function exists specifically to deal with filtering out
  * non-ASCII characters in a few places where the client can provide an almost
@@ -73,22 +76,39 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
  * In general, this function should NOT be used- instead, consider how to handle
  * the string without needing to filter out the non-ASCII characters.
  *
- * Ultimately, we'd like to improve the situation to not require stripping out
- * all non-ASCII but perform more intelligent filtering which would allow UTF or
+ * Ultimately, we'd like to improve the situation to not require replacing all
+ * non-ASCII but perform more intelligent filtering which would allow UTF or
  * similar, but it's unclear exactly what we should allow, so stick to ASCII only
  * for now.
  */
-void
-pg_clean_ascii(char *str)
+size_t
+pg_clean_ascii(const char *str, char *dst)
 {
-	/* Only allow clean ASCII chars in the string */
-	char	   *p;
+	const char *p;
+	size_t		i = 0;
 
 	for (p = str; *p != '\0'; p++)
 	{
+		/* Only allow clean ASCII chars in the string */
 		if (*p < 32 || *p > 126)
-			*p = '?';
+		{
+			if (dst)
+snprintf([i], 5, "\\x%02x", (unsigned char) *p);
+			i += 4;
+		}
+		else
+		{
+			if (dst)
+dst[i] 

Re: pg_parameter_aclcheck() and trusted extensions

2022-07-19 Thread Nathan Bossart
On Tue, Jul 19, 2022 at 04:27:08PM -0400, Tom Lane wrote:
> Nathan Bossart  writes:
>> However, I wonder if a
>> better way to fix this is to provide a way to stop set_config_option() from
>> throwing errors (e.g., setting elevel to -1).  That way, we could remove
>> the manual permissions checks in favor of always using the real ones, which
>> might help prevent similar bugs in the future.
> 
> I thought about that for a bit.  You could almost do it today if you
> passed elevel == DEBUG5; the ensuing log chatter for failures would be
> down in the noise compared to everything else you would see with
> min_messages cranked down that far.  However,
> 
> (1) As things stand, set_config_option()'s result does not distinguish
> no-permissions failures from other problems, so we'd need some rejiggering
> of its API anyway.
> 
> (2) As you mused upthread, it's possible that ACL_SET isn't what we should
> be checking here, but some more-specific privilege.  So I'd just as soon
> keep this privilege check separate from set_config_option's.

I think we'd also need to keep the manual permissions checks for
placeholders, so it wouldn't save much, anyway.

> I'll push ahead with fixing it like this.

Sounds good.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Slow standby snapshot

2022-07-19 Thread Michail Nikolaev
Hello, Andrey.

> I've looked into v5.
Thanks!

Patch is updated accordingly your remarks.

Best regards,
Michail.
From 1301a262dea7f541c11092851e82f10932150ee3 Mon Sep 17 00:00:00 2001
From: Michail Nikolaev 
Date: Tue, 19 Jul 2022 23:50:53 +0300
Subject: [PATCH v6] Currently KnownAssignedXidsGetAndSetXmin requires an
 iterative loop through KnownAssignedXids array, including xids marked as
 invalid. Performance impact is especially noticeable in the presence of long
 (few seconds) transactions on primary, big number (few thousands) of
 max_connections and high read workload on standby. Most of the cpu spent on
 looping throw KnownAssignedXids while almost all xid are invalid anyway.
 KnownAssignedXidsCompress removes invalid xids from time to time, but
 performance is still affected.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To increase performance we lazily maintain an offset in the KnownAssignedXidsNextOffset array to skip known
to be invalid xids. KnownAssignedXidsNextOffset does not always point to “next” valid xid, it is just some
offset safe to skip (known to be filled by only invalid xids).
---
 src/backend/storage/ipc/procarray.c | 57 -
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index dadaa958a8..f613ae2f09 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -271,6 +271,7 @@ static TransactionId cachedXidIsNotInProgress = InvalidTransactionId;
  */
 static TransactionId *KnownAssignedXids;
 static bool *KnownAssignedXidsValid;
+static int32 *KnownAssignedXidsNextOffset;
 static TransactionId latestObservedXid = InvalidTransactionId;
 
 /*
@@ -450,6 +451,12 @@ CreateSharedProcArray(void)
 			ShmemInitStruct("KnownAssignedXidsValid",
 			mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
 			);
+		KnownAssignedXidsNextOffset = (int32 *)
+ShmemInitStruct("KnownAssignedXidsNextOffset",
+mul_size(sizeof(int32), TOTAL_MAX_CACHED_SUBXIDS),
+);
+		for (int i = 0; i < TOTAL_MAX_CACHED_SUBXIDS; i++)
+			KnownAssignedXidsNextOffset[i] = 1;
 	}
 }
 
@@ -4539,7 +4546,15 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * XID entry itself.  This preserves the property that the XID entries are
  * sorted, so we can do binary searches easily.  Periodically we compress
  * out the unused entries; that's much cheaper than having to compress the
- * array immediately on every deletion.
+ * array immediately on every deletion. Also, we lazily maintain an offset
+ * in KnownAssignedXidsNextOffset[] array to skip known to be invalid xids.
+ * It helps to skip the gaps; it could significantly increase performance in
+ * the case of long transactions on the primary. KnownAssignedXidsNextOffset
+ * is s updated during taking the snapshot. The KnownAssignedXidsNextOffset
+ * contains not an offset to the next valid xid but a number which tends to
+ * the offset to next valid xid. KnownAssignedXidsNextOffset[] values read
+ * and updated without additional locking because four-bytes read-writes are
+ * assumed to be atomic.
  *
  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
  * are those with indexes tail <= i < head; items outside this subscript range
@@ -4577,7 +4592,7 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  *		must happen)
  *	* Compressing the array is O(S) and requires exclusive lock
  *	* Removing an XID is O(logS) and requires exclusive lock
- *	* Taking a snapshot is O(S) and requires shared lock
+ *	* Taking a snapshot is O(S), amortized O(N) next call; requires shared lock
  *	* Checking for an XID is O(logS) and requires shared lock
  *
  * In comparison, using a hash table for KnownAssignedXids would mean that
@@ -4637,12 +4652,13 @@ KnownAssignedXidsCompress(bool force)
 	 * re-aligning data to 0th element.
 	 */
 	compress_index = 0;
-	for (i = tail; i < head; i++)
+	for (i = tail; i < head; i += KnownAssignedXidsNextOffset[i])
 	{
 		if (KnownAssignedXidsValid[i])
 		{
 			KnownAssignedXids[compress_index] = KnownAssignedXids[i];
 			KnownAssignedXidsValid[compress_index] = true;
+			KnownAssignedXidsNextOffset[compress_index] = 1;
 			compress_index++;
 		}
 	}
@@ -4745,6 +4761,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
 	{
 		KnownAssignedXids[head] = next_xid;
 		KnownAssignedXidsValid[head] = true;
+		KnownAssignedXidsNextOffset[head] = 1;
 		TransactionIdAdvance(next_xid);
 		head++;
 	}
@@ -4960,7 +4977,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
 	tail = pArray->tailKnownAssignedXids;
 	head = pArray->headKnownAssignedXids;
 
-	for (i = tail; i < head; i++)
+	for (i = tail; i < head; i += KnownAssignedXidsNextOffset[i])
 	{
 		if (KnownAssignedXidsValid[i])
 		{
@@ -4983,7 +5000,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
 	/*
 	 * 

Re: First draft of the PG 15 release notes

2022-07-19 Thread Bruce Momjian
On Tue, Jul 19, 2022 at 01:13:07PM -0500, Justin Pryzby wrote:
> On Tue, Jul 19, 2022 at 01:24:30PM -0400, Bruce Momjian wrote:
> > Well, I put the --no-synchronized-snapshots item in incompatibilities
> > since it is a user-visible change that might require script adjustments.
> > However, I put the limit of pg_dump to 9.2 and greater into the pg_dump
> > section.  Are you suggesting I move the--no-synchronized-snapshots item
> > down there?  That doesn't match with the way I have listed other
> > incompatibilities so I am resistant to do that.
> 
> I'd rather see the "limit support to v9.2" be moved or added to the
> "incompatibilities" section, maybe with "remove --no-synchronized-snapshots"
> as a secondary sentence.

Is removing support for an older version an incompatibility --- I didn't
think so.

> > > > 0. Add support for LZ4 and Zstandard compression of server-side base 
> > > > backups (Jeevan Ladhe, Robert Haas)
> > > > 1. Allow pg_basebackup to use LZ4 and Zstandard compression on 
> > > > server-side base backup files (Dipesh Pandit, Jeevan Ladhe)
> > > > 2. Allow pg_basebackup's --compress option to control the compression 
> > > > method and options (Michael Paquier, Robert Haas)
> > > >New options include server-gzip (gzip on the server), client-gzip 
> > > > (same as gzip).
> > > > 3. Allow pg_basebackup to compress on the server side and decompress on 
> > > > the client side before storage (Dipesh Pandit)
> > > >This is accomplished by specifying compression on the server side 
> > > > and plain output format.
> > > 
> > > I still think these expose the incremental development rather than the
> > > user-facing change.
> > 
> > > 1. It seems wrong to say "server-side" since client-side compression with
> > > LZ4/zstd is also supported.
> > 
> > Agreed.  I changed it to:
> > 
> >Allow pg_basebackup to do LZ4 and Zstandard server-side compression
> >on base backup files (Dipesh Pandit, Jeevan Ladhe)
> 
> This still misses the point that those compression algs are also supported on
> the client side, so it seems misleading to mention "server-side" support.

I reworked that paragraph in the attached patch.  What we did was to add
server-side gzip/LZ/ZSTD, and client-side LZ/ZSTD.  (We already had
client-side gzip.)  Hopefully the new text is clearer.  You can see the
new output here:

https://momjian.us/pgsql_docs/release-15.html

> > > > Allow custom scan provders to indicate if they support projections 
> > > > (Sven Klemm)
> > > > The default is now that custom scan providers can't support 
> > > > projections, so they need to be updated for this release.
> > > 
> > > Per the commit message, they don't "need" to be updated.
> > > I think this should say "The default now assumes that a custom scan 
> > > provider
> > > does not support projections; to retain optimal performance, they should 
> > > be
> > > updated to indicate whether that's supported.
> > 
> > Okay, I went with this text:
> > 
> >   The default is now that custom scan providers are assumed to not
> >   support projections;  those that do need to be updated for this
> >   release.
> 
> I'd say "those that do *will need to be updated" otherwise the sentence can
> sound like it means "those that need to be updated [will] ..."

Oh, good point, done.

Cumulative patch attached.

-- 
  Bruce Momjian  https://momjian.us
  EDB  https://enterprisedb.com

  Indecision is a decision.  Inaction is an action.  Mark Batterson

diff --git a/doc/src/sgml/release-15.sgml b/doc/src/sgml/release-15.sgml
new file mode 100644
index dfa3c74..1cf6375
*** a/doc/src/sgml/release-15.sgml
--- b/doc/src/sgml/release-15.sgml
*** Author: Tom Lane 
*** 544,551 
  
   
The default is now that custom scan providers are assumed to not
!   support projections;  those that do need to be updated for this
!   release.
   
  
  
--- 544,551 
  
   
The default is now that custom scan providers are assumed to not
!   support projections;  those that do will need to be updated for
!   this release.
   
  
  
*** Author: Robert Haas 
  2022-03-08 [7cf085f07] Add support for zstd base backup compression.
  -->
  
   

!Allow pg_basebackup to do LZ4 and
!Zstandard server-side compression on base backup files (Dipesh
!Pandit, Jeevan Ladhe)

-  
- 
- 
  
-  

!Allow pg_basebackup's
!--compress option to control the compression
!method and options (Michael Paquier, Robert Haas)

   
  
--- 2495,2515 
  2022-02-11 [751b8d23b] pg_basebackup: Allow client-side LZ4 (de)compression.
  Author: Robert Haas 
  2022-03-08 [7cf085f07] Add support for zstd base backup compression.
+ Author: Robert Haas 
+ 2022-01-24 [0ad803291] Server-side gzip compression.
  -->
  
   

!Allow 

Re: pg_parameter_aclcheck() and trusted extensions

2022-07-19 Thread Tom Lane
Nathan Bossart  writes:
>> I think this is because GUCArrayReset() is the only caller of
>> validate_option_array_item() that sets skipIfNoPermissions to true.  The
>> others fall through to set_config_option(), which does a
>> pg_parameter_aclcheck().  So, you are right.

> Here's a small patch that seems to fix this case.

Yeah, this is more or less what I was thinking of.

> However, I wonder if a
> better way to fix this is to provide a way to stop set_config_option() from
> throwing errors (e.g., setting elevel to -1).  That way, we could remove
> the manual permissions checks in favor of always using the real ones, which
> might help prevent similar bugs in the future.

I thought about that for a bit.  You could almost do it today if you
passed elevel == DEBUG5; the ensuing log chatter for failures would be
down in the noise compared to everything else you would see with
min_messages cranked down that far.  However,

(1) As things stand, set_config_option()'s result does not distinguish
no-permissions failures from other problems, so we'd need some rejiggering
of its API anyway.

(2) As you mused upthread, it's possible that ACL_SET isn't what we should
be checking here, but some more-specific privilege.  So I'd just as soon
keep this privilege check separate from set_config_option's.

I'll push ahead with fixing it like this.

regards, tom lane




Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Martin Kalcher

Am 19.07.22 um 16:20 schrieb Tom Lane:


So I withdraw my original position.  These functions should just
shuffle or select in the array's first dimension, preserving
subarrays.  Or else be lazy and reject more-than-one-D arrays;
but it's probably not that hard to handle them.



Here is a patch with dimension aware array_shuffle() and array_sample().

If you think array_flatten() is desirable, i can take a look at it. 
Maybe a second parameter would be nice to specify the target dimension:


  select array_flatten(array[[[1,2],[3,4]],[[5,6],[7,8]]], 1);
  ---
   {1,2,3,4,5,6,7,8}

  select array_flatten(array[[[1,2],[3,4]],[[5,6],[7,8]]], 2);
  ---
   {{1,2,3,4},{5,6,7,8}}

MartinFrom 2aa6d72ff0f4d8835ee2f09f8cdf16b7e8005e56 Mon Sep 17 00:00:00 2001
From: Martin Kalcher 
Date: Sun, 17 Jul 2022 18:06:04 +0200
Subject: [PATCH] Introduce array_shuffle() and array_sample()

* array_shuffle() shuffles the elements of an array.
* array_sample() chooses n elements from an array by random.

Shuffling of arrays can already be accomplished with SQL
using unnest() and array_agg(order by random()). But that is
very slow compared to the new functions. In addition the new functions
are dimension aware.
---
 doc/src/sgml/func.sgml   |  35 +
 src/backend/utils/adt/arrayfuncs.c   | 189 +++
 src/include/catalog/pg_proc.dat  |   6 +
 src/test/regress/expected/arrays.out |  60 +
 src/test/regress/sql/arrays.sql  |  17 +++
 5 files changed, 307 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b6783b7ad0..6b96897244 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19395,6 +19395,41 @@ SELECT NULLIF(value, '(none)') ...

   
 
+  
+   
+
+ array_sample
+
+array_sample ( array anyarray, n integer )
+anyarray
+   
+   
+Returns n randomly chosen elements from array.
+The order of the elements in resulting array is unspecified.
+   
+   
+array_sample(ARRAY[[1,2],[3,4],[5,6]], 2)
+{{5,6},{3,4}}
+   
+  
+
+  
+   
+
+ array_shuffle
+
+array_shuffle ( anyarray )
+anyarray
+   
+   
+Shuffles the first dimension of the array.
+   
+   
+array_shuffle(ARRAY[[1,2],[3,4],[5,6]])
+{{5,6},{3,4},{1,2}}
+   
+  
+
   

 
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index fb167f226a..3de1b0db30 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -6872,3 +6872,192 @@ trim_array(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(result);
 }
+
+/*
+ * Produce array with max n random items from given array in random order.
+ *
+ * array: array object to examine (must not be NULL)
+ * n: max number of items
+ * elmtyp, elmlen, elmbyval, elmalign: info for the datatype of the items
+ *
+ * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
+ * from the system catalogs, given the elmtype.  However, the caller is
+ * in a better position to cache this info across multiple uses, or even
+ * to hard-wire values if the element type is hard-wired.
+ */
+static ArrayType *
+array_shuffle_n(ArrayType *array, int n,
+Oid elmtyp, int elmlen, bool elmbyval, char elmalign)
+{
+	ArrayType  *result;
+	int			ndim,
+			   *dims,
+			   *lbs,
+rdims[MAXDIM],
+nelm,
+nitem,
+i,
+j,
+k;
+	Datum		elm,
+			   *elms,
+			   *relms;
+	bool		nul,
+			   *nuls,
+			   *rnuls;
+
+	ndim = ARR_NDIM(array);
+	dims = ARR_DIMS(array);
+	lbs = ARR_LBOUND(array);
+
+	if (ndim < 1 || dims[0] < 1 || n < 1)
+		return construct_empty_array(elmtyp);
+
+	deconstruct_array(array, elmtyp, elmlen, elmbyval, elmalign,
+	  , , );
+
+	/* Calculate number of elements per item. */
+	nelm = (ndim > 1) ? ArrayGetNItems(ndim - 1, dims + 1) : 1;
+	elms = relms;
+	nuls = rnuls;
+	nitem = dims[0];
+	n = Min(n, nitem);
+
+	/*
+	 * Shuffle array using Fisher-Yates algorithm. Swap head with an randomly
+	 * chosen item and increment head.
+	 */
+	for (i = 0; i < n; i++)
+	{
+		k = (rand() % (nitem - i)) * nelm;
+
+		/* Swap all elements in head with elements in item k. */
+		for (j = 0; j < nelm; j++)
+		{
+			elm = *elms;
+			nul = *nuls;
+			*elms = elms[k];
+			*nuls = nuls[k];
+			elms[k] = elm;
+			nuls[k] = nul;
+			elms++;
+			nuls++;
+		}
+	}
+
+	memcpy(rdims, dims, ndim * sizeof(int));
+	rdims[0] = n;
+
+	result = construct_md_array(relms, rnuls, ndim, rdims, lbs,
+elmtyp, elmlen, elmbyval, elmalign);
+
+	pfree(relms);
+	pfree(rnuls);
+
+	return result;
+}
+
+/*
+ * Shuffle the elements of an array.
+ */
+Datum
+array_shuffle(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array,
+			   *result;
+	int16		elmlen;
+	bool		elmbyval;
+	char		elmalign;
+	Oid			elmtyp;
+	TypeCacheEntry *typentry;
+	int			n;
+
+	

Re: postgres_fdw: using TABLESAMPLE to collect remote sample

2022-07-19 Thread Tom Lane
Tomas Vondra  writes:
> I we want to improve sampling for partitioned cases (where the foreign
> table is just one of many partitions), I think we'd have to rework how
> we determine sample size for each partition. Now we simply calculate
> that from relpages, which seems quite fragile (different amounts of
> bloat, different tuple densities) and somewhat strange for FDW serves
> that don't use the same "page" concept.

> So it may easily happen we determine bogus sample sizes for each
> partition. The difficulties when calculating the sample_frac is just a
> secondary issue.

> OTOH the concept of a "row" seems way more general, so perhaps
> acquire_inherited_sample_rows should use reltuples, and if we want to do
> correction it should happen at this stage already.

Yeah, there's definitely something to be said for changing that to be
based on rowcount estimates instead of physical size.  I think it's
a matter for a different patch though, and not a reason to hold up
this one.

regards, tom lane




Re: Convert planner's AggInfo and AggTransInfo to Nodes

2022-07-19 Thread Tom Lane
Peter Eisentraut  writes:
> On 18.07.22 18:08, Tom Lane wrote:
>> I'm kind of tempted to mount an effort to get rid of as many of
>> pathnodes.h's "read_write_ignore" annotations as possible.  Some are
>> necessary to prevent infinite recursion, and others represent considered
>> judgments that they'd bloat node dumps more than they're worth --- but
>> I think quite a lot of them arose from plain laziness about updating
>> outfuncs.c.  With the infrastructure we have now, that's no longer
>> a good reason.

> That was my impression as well, and I agree it would be good to sort 
> that out.

I had a go at doing this, and ended up with something that seems
reasonable for now (attached).  The thing that'd have to be done to
make additional progress is to convert a lot of partitioning-related
structs into full Nodes.  That seems like it might possibly be
worth doing, but I don't feel like doing it.  I doubt that making
planner node dumps smarter is a sufficient excuse for that anyway.
(But possibly if we then larded related code with castNode() and
sibling macros, there'd be enough of a gain in type-safety to
justify it?)

I learned a couple of interesting things along the way:

* I'd thought we already had outfuncs support for writing an array
of node pointers.  We don't, but it's easily added.  I chose to
write the array with parenthesis decoration, mainly because that
eases moving around it in emacs.

* WRITE_OID_ARRAY and WRITE_BOOL_ARRAY needed extension to handle a null
array pointer.  I think we should make all the WRITE_FOO_ARRAY macros
work alike, so I added that to all of them.  I first tried to make the
rest work like WRITE_INDEX_ARRAY, but that failed because readfuncs.c
isn't expecting "<>" for an empty array; it's expecting nothing at
all.  (Note there is no readfuncs equivalent to WRITE_INDEX_ARRAY.)
What I've done here is to change WRITE_INDEX_ARRAY to work like the
others and print nothing for an empty array, but I wonder if now
wouldn't be a good time to redefine the serialized representation
to be more robust.  I'm imagining "<>" for a NULL array pointer and
"(item item item)" otherwise, allowing a cross-check that we're
getting the right number of items.

* gen_node_support.pl was being insufficiently careful about parsing
type names, so I tightened its regexes a bit.

regards, tom lane

diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index f3309c3000..bee48696c7 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -441,6 +441,8 @@ foreach my $infile (@ARGV)
 	$type =~ s/\s*$//;
 	# strip space between type and "*" (pointer) */
 	$type =~ s/\s+\*$/*/;
+	# strip space between type and "**" (array of pointers) */
+	$type =~ s/\s+\*\*$/**/;
 
 	die
 	  "$infile:$lineno: cannot parse data type in \"$line\"\n"
@@ -745,8 +747,8 @@ _equal${n}(const $n *a, const $n *b)
   unless $equal_ignore || $t eq 'CoercionForm';
 			}
 		}
-		# scalar type pointer
-		elsif ($t =~ /(\w+)\*/ and elem $1, @scalar_types)
+		# arrays of scalar types
+		elsif ($t =~ /^(\w+)\*$/ and elem $1, @scalar_types)
 		{
 			my $tt = $1;
 			if (!defined $array_size_field)
@@ -780,13 +782,14 @@ _equal${n}(const $n *a, const $n *b)
 			print $eff "\tCOMPARE_SCALAR_FIELD($f);\n" unless $equal_ignore;
 		}
 		# node type
-		elsif ($t =~ /(\w+)\*/ and elem $1, @node_types)
+		elsif (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
+			and elem $1, @node_types)
 		{
 			print $cff "\tCOPY_NODE_FIELD($f);\n"unless $copy_ignore;
 			print $eff "\tCOMPARE_NODE_FIELD($f);\n" unless $equal_ignore;
 		}
 		# array (inline)
-		elsif ($t =~ /\w+\[/)
+		elsif ($t =~ /^\w+\[\w+\]$/)
 		{
 			print $cff "\tCOPY_ARRAY_FIELD($f);\n"unless $copy_ignore;
 			print $eff "\tCOMPARE_ARRAY_FIELD($f);\n" unless $equal_ignore;
@@ -894,11 +897,16 @@ _read${n}(void)
 		my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
 
 		# extract per-field attributes
-		my $read_write_ignore = 0;
+		my $array_size_field;
 		my $read_as_field;
+		my $read_write_ignore = 0;
 		foreach my $a (@a)
 		{
-			if ($a =~ /^read_as\(([\w.]+)\)$/)
+			if ($a =~ /^array_size\(([\w.]+)\)$/)
+			{
+$array_size_field = $1;
+			}
+			elsif ($a =~ /^read_as\(([\w.]+)\)$/)
 			{
 $read_as_field = $1;
 			}
@@ -1015,19 +1023,10 @@ _read${n}(void)
 			print $off "\tWRITE_ENUM_FIELD($f, $t);\n";
 			print $rff "\tREAD_ENUM_FIELD($f, $t);\n" unless $no_read;
 		}
-		# arrays
-		elsif ($t =~ /(\w+)(\*|\[)/ and elem $1, @scalar_types)
+		# arrays of scalar types
+		elsif ($t =~ /^(\w+)(\*|\[\w+\])$/ and elem $1, @scalar_types)
 		{
 			my $tt = uc $1;
-			my $array_size_field;
-			foreach my $a (@a)
-			{
-if ($a =~ /^array_size\(([\w.]+)\)$/)
-{
-	$array_size_field = $1;
-	last;
-}
-			}
 			if (!defined $array_size_field)
 			{
 die "no array size defined for $n.$f of type $t\n";
@@ -1080,11 +1079,38 @@ 

Re: postgres_fdw: using TABLESAMPLE to collect remote sample

2022-07-19 Thread Tomas Vondra



On 7/18/22 20:45, Tom Lane wrote:
> Tomas Vondra  writes:
>> Thanks. I'll switch this to "needs review" now.
> 
> OK, I looked through this, and attach some review suggestions in the
> form of a delta patch.  (0001 below is your two patches merged, 0002
> is my delta.)  A lot of the delta is comment-smithing, but not all.
> 

Thanks!

> After reflection I think that what you've got, ie use reltuples but
> don't try to sample if reltuples <= 0, is just fine.  The remote
> would only have reltuples <= 0 in a never-analyzed table, which
> shouldn't be a situation that persists for long unless the table
> is tiny.  Also, if reltuples is in error, the way to bet is that
> it's too small thanks to the table having been enlarged.  But
> an error in that direction doesn't hurt us: we'll overestimate
> the required sample_frac and pull back more data than we need,
> but we'll still end up with a valid sample of the right size.
> So I doubt it's worth the complication to try to correct based
> on relpages etc.  (Note that any such correction would almost
> certainly end in increasing our estimate of reltuples.  But
> it's safer to have an underestimate than an overestimate.)
> 

I mostly agree, particularly for the non-partitioned case.

I we want to improve sampling for partitioned cases (where the foreign
table is just one of many partitions), I think we'd have to rework how
we determine sample size for each partition. Now we simply calculate
that from relpages, which seems quite fragile (different amounts of
bloat, different tuple densities) and somewhat strange for FDW serves
that don't use the same "page" concept.

So it may easily happen we determine bogus sample sizes for each
partition. The difficulties when calculating the sample_frac is just a
secondary issue.

OTOH the concept of a "row" seems way more general, so perhaps
acquire_inherited_sample_rows should use reltuples, and if we want to do
correction it should happen at this stage already.


> I messed around with the sample_frac choosing logic slightly,
> to make it skip pointless calculations if we decide right off
> the bat to disable sampling.  That way we don't need to worry
> about avoiding zero divide, nor do we have to wonder if any
> of the later calculations could misbehave.
> 

Thanks.

> I left your logic about "disable if saving fewer than 100 rows"
> alone, but I have to wonder if using an absolute threshold rather
> than a relative one is well-conceived.  Sampling at a rate of
> 99.9 percent seems pretty pointless, but this code is perfectly
> capable of accepting that if reltuples is big enough.  So
> personally I'd do that more like
> 
>   if (sample_frac > 0.95)
>   method = ANALYZE_SAMPLE_OFF;
> 
> which is simpler and would also eliminate the need for the previous
> range-clamp step.  I'm not sure what the right cutoff is, but
> your "100 tuples" constant is just as arbitrary.
> 

I agree there probably is not much difference between a threshold on
sample_frac directly and number of rows, at least in general. My
reasoning for switching to "100 rows" is that in most cases the network
transfer is probably more costly than "local costs", and 5% may be quite
a few rows (particularly with higher statistics target). I guess the
proper approach would be to make some simple costing, but that seems
like an overkill.

> I rearranged the docs patch too.  Where you had it, analyze_sampling
> was between fdw_startup_cost/fdw_tuple_cost and the following para
> discussing them, which didn't seem to me to flow well at all.  I ended
> up putting analyze_sampling in its own separate list.  You could almost
> make a case for giving it its own , but I concluded that was
> probably overkill.
> 

Thanks.

> One thing I'm not happy about, but did not touch here, is the expense
> of the test cases you added.  On my machine, that adds a full 10% to
> the already excessively long runtime of postgres_fdw.sql --- and I
> do not think it's buying us anything.  It is not this module's job
> to test whether bernoulli sampling works on partitioned tables.
> I think you should just add enough to make sure we exercise the
> relevant code paths in postgres_fdw itself.
> 

Right, I should have commented on that. The purpose of those tests was
verifying that if we change the sampling method on server/table, the
generated query changes accordingly, etc. But that's a bit futile
because we don't have a good way of verifying what query was used - it
worked during development, as I added elog(WARNING).


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: First draft of the PG 15 release notes

2022-07-19 Thread Justin Pryzby
On Tue, Jul 19, 2022 at 01:24:30PM -0400, Bruce Momjian wrote:
> > > Remove pg_dump's --no-synchronized-snapshots option since all supported 
> > > server versions support synchronized snapshots (Tom Lane)
> > 
> > It'd be better to put that after the note about dropping support for 
> > upgrading
> > clusters older than v9.2 in psql/pg_dump/pg_upgrade.
> 
> Well, I put the --no-synchronized-snapshots item in incompatibilities
> since it is a user-visible change that might require script adjustments.
> However, I put the limit of pg_dump to 9.2 and greater into the pg_dump
> section.  Are you suggesting I move the--no-synchronized-snapshots item
> down there?  That doesn't match with the way I have listed other
> incompatibilities so I am resistant to do that.

I'd rather see the "limit support to v9.2" be moved or added to the
"incompatibilities" section, maybe with "remove --no-synchronized-snapshots"
as a secondary sentence.

> > > 0. Add support for LZ4 and Zstandard compression of server-side base 
> > > backups (Jeevan Ladhe, Robert Haas)
> > > 1. Allow pg_basebackup to use LZ4 and Zstandard compression on 
> > > server-side base backup files (Dipesh Pandit, Jeevan Ladhe)
> > > 2. Allow pg_basebackup's --compress option to control the compression 
> > > method and options (Michael Paquier, Robert Haas)
> > >New options include server-gzip (gzip on the server), client-gzip 
> > > (same as gzip).
> > > 3. Allow pg_basebackup to compress on the server side and decompress on 
> > > the client side before storage (Dipesh Pandit)
> > >This is accomplished by specifying compression on the server side and 
> > > plain output format.
> > 
> > I still think these expose the incremental development rather than the
> > user-facing change.
> 
> > 1. It seems wrong to say "server-side" since client-side compression with
> > LZ4/zstd is also supported.
> 
> Agreed.  I changed it to:
> 
>Allow pg_basebackup to do LZ4 and Zstandard server-side compression
>on base backup files (Dipesh Pandit, Jeevan Ladhe)

This still misses the point that those compression algs are also supported on
the client side, so it seems misleading to mention "server-side" support.

> > > Allow custom scan provders to indicate if they support projections (Sven 
> > > Klemm)
> > > The default is now that custom scan providers can't support projections, 
> > > so they need to be updated for this release.
> > 
> > Per the commit message, they don't "need" to be updated.
> > I think this should say "The default now assumes that a custom scan provider
> > does not support projections; to retain optimal performance, they should be
> > updated to indicate whether that's supported.
> 
> Okay, I went with this text:
> 
>   The default is now that custom scan providers are assumed to not
>   support projections;  those that do need to be updated for this
>   release.

I'd say "those that do *will need to be updated" otherwise the sentence can
sound like it means "those that need to be updated [will] ..."

Thanks,
-- 
Justin




Re: System catalog documentation chapter

2022-07-19 Thread Bruce Momjian
On Mon, Jul 18, 2022 at 09:22:24PM -0400, Tom Lane wrote:
> Bruce Momjian  writes:
> > On Sat, Jul 16, 2022 at 10:53:17AM +0200, Peter Eisentraut wrote:
> >> Maybe this whole notion that "system views" is one thing is not suitable.
> 
> > Are you thinking we should just call the chapter "System Catalogs and
> > Views" and just place them alphabetically in a single chapter?
> 
> I didn't think that was Peter's argument at all.  He's complaining
> that "system views" isn't a monolithic category, which is a reasonable
> point, especially since we have a bunch of built-in views that appear
> in other chapters.  But to then also confuse them with catalogs isn't
> improving the situation.

I think I see now --- system _tables_ are really not for user consumption
but system views often are.  I am thinking the best approach is to move
most of the system views out of the system views section and into the
sections where they make sense.

> The views that are actually reinterpretations of catalog contents should
> probably be documented near the catalogs.  But a lot of stuff in that
> chapter is no such thing.  For example, it's really unclear why

Right.

> pg_backend_memory_contexts is documented here and not somewhere near
> the stats views.  We also have stuff like pg_available_extensions,

Right.

> pg_file_settings, and pg_timezone_names, which are reporting ground truth
> of some sort that didn't come from the catalogs.  I'm not sure if those
> belong near the catalogs or not.

I am thinking some of those need to be in the Server Configuration
chapter.

> The larger point, perhaps, is that this whole area is underneath
> "Part VII: Internals", and that being the case what you would expect
> to find here is stuff that we don't intend people to interact with
> in day-to-day usage.  Most of the "system views" are specifically
> intended for day-to-day use, maybe only by DBAs, but nonetheless they
> are user-facing in a way that the catalogs aren't.
> 
> Maybe we should move them all to Part IV, in a chapter or chapters
> adjacent to the Information Schema chapter.  Or maybe try to separate
> "user" views from "DBA" views, and put user views in Part IV while
> DBA views go into a new chapter in Part III, near the stats views.

I am going to look at moving system views that make sense into the
chapters where their contents are mentioned.  I don't think having a
central list of views is really helping us because we expect the views
to be used in ways the system catalogs would not be.

I will develop a proposed patch.

-- 
  Bruce Momjian  https://momjian.us
  EDB  https://enterprisedb.com

  Indecision is a decision.  Inaction is an action.  Mark Batterson





Re: First draft of the PG 15 release notes

2022-07-19 Thread Bruce Momjian
On Mon, Jul 18, 2022 at 08:23:23PM -0500, Justin Pryzby wrote:
> > Increase hash_mem_multiplier default to 2.0 (Peter Geoghegan)
> > This allows query hash operations to use double the amount of work_mem 
> > memory as other operations.
> 
> I wonder if it's worth pointing out that a query may end up using not just 2x
> more memory (since work_mem*hash_mem_multiplier is per node), but 2**N more,
> for N nodes.

Uh, I said "per operation" so people might realize there can be multiple
work_mem memory operations per query.  I don't think I can do more in
this text.  I can't think of a way to improve it without making it more
confusing.

> > Remove pg_dump's --no-synchronized-snapshots option since all supported 
> > server versions support synchronized snapshots (Tom Lane)
> 
> It'd be better to put that after the note about dropping support for upgrading
> clusters older than v9.2 in psql/pg_dump/pg_upgrade.

Well, I put the --no-synchronized-snapshots item in incompatibilities
since it is a user-visible change that might require script adjustments.
However, I put the limit of pg_dump to 9.2 and greater into the pg_dump
section.  Are you suggesting I move the--no-synchronized-snapshots item
down there?  That doesn't match with the way I have listed other
incompatibilities so I am resistant to do that.

> > Enable system and TOAST btree indexes to efficiently store duplicates 
> > (Peter Geoghegan)
> 
> Say "btree indexes on system [and TOAST] tables"

Okay, updated to:

Allow btree indexes on system and TOAST tables to efficiently
store duplicates (Peter Geoghegan)

> > Prevent changes to columns only indexed by BRIN indexes from disabling HOT 
> > updates (Josef Simanek)
> 
> This was reverted

Ah, yes, removed.

> > Generate periodic log message during slow server starts (Nitin Jadhav, 
> > Robert Haas)
> messages plural
> 
> > Messages report the cause of the delay. The time interval for notification 
> > is controlled by the new server variable log_startup_progress_interval.
> *The messages

Ah, yes, fixed.

> > Add server variable shared_memory_size to report the size of allocated 
> > shared memory (Nathan Bossart)
> > Add server variable shared_memory_size_in_huge_pages to report the number 
> > of huge memory pages required (Nathan Bossart)
> 
> Maybe these should say server *parameter* since they're not really "variable".

Uh, I think of parameters as something passed.  We do call them server
variables, or read-only server variables.  I can add "read-only" but it
seems odd.

> > 0. Add support for LZ4 and Zstandard compression of server-side base 
> > backups (Jeevan Ladhe, Robert Haas)
> > 1. Allow pg_basebackup to use LZ4 and Zstandard compression on server-side 
> > base backup files (Dipesh Pandit, Jeevan Ladhe)
> > 2. Allow pg_basebackup's --compress option to control the compression 
> > method and options (Michael Paquier, Robert Haas)
> >New options include server-gzip (gzip on the server), client-gzip (same 
> > as gzip).
> > 3. Allow pg_basebackup to compress on the server side and decompress on the 
> > client side before storage (Dipesh Pandit)
> >This is accomplished by specifying compression on the server side and 
> > plain output format.
> 
> I still think these expose the incremental development rather than the
> user-facing change.

Well, they are in different parts of the system, though they are clearly
all related.  I am afraid merging them would be even more confusing.

> 1. It seems wrong to say "server-side" since client-side compression with
> LZ4/zstd is also supported.

Agreed.  I changed it to:

   Allow pg_basebackup to do LZ4 and Zstandard server-side compression
   on base backup files (Dipesh Pandit, Jeevan Ladhe)

> 2. It's confusing to say that the new options are server-gzip and client-gzip,
> since it just mentioned new algorithms;

I see your point since there will be new options for LZ4 and Zstandard
too, so I just removed that paragraph.

> 3. I'm not sure this needs to be mentioned at all; maybe it should be a
> "detail" following the item about server-side compression.

See my concerns above --- it seems too complex to merge into something
else.  However, I am open to an entire rewrite of these items.

> > Tables added to the listed schemas in the future will also be replicated.
> 
> "Tables later added" is clearer.  Otherwise "in the future" sounds like maybe
> in v16 or v17 we'll start replicating those tables.

Agreed, new wording:

Tables added later to the listed schemas will also be replicated.

> > Allow subscribers to stop logical replication application on error (Osumi 
> > Takamichi, Mark Dilger)
> "application" sounds off.

Agreed.  New text is:

Allow subscribers to stop the application of logical replication
changes on error

> > Add new default WAL-logged method for database creation (Dilip Kumar)
> "New default" sounds off.  Say "Add new WAL-logged method for database 
> creation, used by 

Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 12:39:43 -0400, Tom Lane wrote:
> Having said that, I struggle to see why we are panicking about badly
> encoded log data from this source while blithely ignoring the problems
> posed by non-ASCII role names, database names, and tablespace names.

I think we should fix these as well. I'm not as concerned about post-auth
encoding issues (i.e. tablespace name) as about pre-auth data (role name,
database name) - obviously being allowed to log in already is a pretty good
filter...

Greetings,

Andres Freund




Re: Use "WAL segment" instead of "log segment" consistently in user-facing messages

2022-07-19 Thread Nathan Bossart
On Tue, Jul 19, 2022 at 02:43:59PM +0530, Bharath Rupireddy wrote:
> Done. PSA v5 patch set.

LGTM.  I've marked this as ready-for-committer.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




StrategyAM for IndexAMs

2022-07-19 Thread Simon Riggs
I'm preparing the way for a later patch that would allow unique hash
indexes to be primary keys. There are various parts to the problem. I
was surprised at how many times we hardcode BTREE_AM_OID and
associated BT Strategy Numbers in many parts of the code, so have been
looking for ways to reconcile how Hash and Btree strategies work and
potentially remove hardcoding. There are various comments that say we
need a way to be able to define which Strategy Numbers are used by
indexams.

I came up with a rather simple way: the indexam just says "I'm like a
btree", which allows you to avoid adding hundreds of operator classes
for the new index, since that is cumbersome and insecure.

Specifically, we add a "strategyam" field to the IndexAmRoutine that
allows an indexam to declare whether it is like a btree, like a hash
index or another am. This then allows us to KEEP the hardcoded
BTREE_AM_OID tests, but point them at the strategyam rather than the
relam, which can be cached in various places as needed. No catalog
changes needed.

I've coded this up and it works fine.

The attached patch is still incomplete because we use this in a few
places and they all need to be checked. So before I do that, it seems
sensible to agree the approach.

(Obviously, there are hundreds of places where BTEqualStrategyNumber
is hardcoded, and this doesn't change that at all, in case that wasn't
clear).

Comments welcome on this still WIP patch.

-- 
Simon Riggshttp://www.EnterpriseDB.com/


strategyam.v1.patch
Description: Binary data


Re: Allow placeholders in ALTER ROLE w/o superuser

2022-07-19 Thread Nathan Bossart
On Tue, Jul 19, 2022 at 12:55:14AM -0500, Steve Chavez wrote:
> Taking your options into consideration, for me the correct behaviour should
> be:
> 
> - The ALTER ROLE placeholder should always be stored with a PGC_USERSET
> GucContext. It's a placeholder anyway, so it should be the less restrictive
> one. If the user wants to define it as PGC_SUSET or other this should be
> done through a custom extension.
> - When an extension claims the placeholder, we should check the
> DefineCustomXXXVariable GucContext with PGC_USERSET. If there's a match,
> then the value gets applied, otherwise WARN or ERR.
>   The role GUCs get applied at login time right? So at this point we can
> WARN or ERR about the defined role GUCs.
> 
> What do you think?

Hm.  I would expect ALTER ROLE to store the PGC_SUSET context when executed
by a superuser or a role with privileges via pg_parameter_acl.  Storing all
placeholder GUC settings as PGC_USERSET would make things more restrictive
than they are today.  For example, it would no longer be possible to apply
any ALTER ROLE settings from superusers for placeholders that later become
custom GUCS.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Autovacuum worker spawning strategy

2022-07-19 Thread Nathan Bossart
On Tue, Jul 19, 2022 at 12:40:06PM -0300, Rafael Thofehrn Castro wrote:
> PG prioritizes databases that need to be frozen and since a temporary table
> can't
> be frozen by a process other than the session that created it, that DB will
> remain
> a priority until the table is dropped.
> 
> I acknowledge that having a temp table existing for long enough to reach
> `autovacuum_freeze_max_age`
> is a problem itself as the table will never be frozen and if age reaches 2
> billion
> the instance will shut down. That being said, perhaps there is room for
> improvement
> in the AV worker spawning strategy to avoid leaving other DBs in the dark.
> 
> This database where I spotted the problem is from a customer that consumes
> 100m xacts/hour
> and makes extensive uses of temp tables to load data, so that scenario can
> actually
> happen.

I wonder if it's worth tracking a ѕeparate datfrozenxid that does not
include stuff that is beyond autovacuum's control, like temporary tables.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Memory leak fix in psql

2022-07-19 Thread Mark Dilger



> On Jul 19, 2022, at 2:02 AM, tanghy.f...@fujitsu.com wrote:
> 
> I think there is a newly introduced memory leak in your patch d2d3547.

I agree.  Thanks for noticing, and for the patch!

> Try to fix it in the attached patch. 
> Kindly to have a check.

This looks ok, but comments down-thread seem reasonable, so I suspect a new 
patch will be needed.  Would you like to author it, or would you prefer that I, 
as the guilty party, do so?

—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company







Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Tom Lane
Jacob Champion  writes:
> On 7/19/22 09:14, Andres Freund wrote:
>> IMO this should replace the existing ascii escape function instead.

> That will affect the existing behavior of application_name and
> cluster_name; is that acceptable?

I think Andres' point is exactly that these should all act alike.

Having said that, I struggle to see why we are panicking about badly
encoded log data from this source while blithely ignoring the problems
posed by non-ASCII role names, database names, and tablespace names.

regards, tom lane




Re: Memory leak fix in psql

2022-07-19 Thread Tom Lane
Andres Freund  writes:
> On 2022-07-19 21:08:53 +0800, Japin Li wrote:
>> +{
>> +termPQExpBuffer();
>>  return false;
>> +}

> Adding copy over copy of this same block doesn't seem great. Can we instead
> add a helper for it or such?

The usual style in these files is something like

if (bad things happened)
goto fail;

...

fail:
termPQExpBuffer();
return false;

Yeah, it's old school, but please let's not have a few functions that
do it randomly differently from all their neighbors.

regards, tom lane




Re: Convert planner's AggInfo and AggTransInfo to Nodes

2022-07-19 Thread Tom Lane
=?utf-8?Q?Dagfinn_Ilmari_Manns=C3=A5ker?=  writes:
> It seems like a reasonable idea, but I don't know enough to judge the
> wider ramifications of it.  But one thing that the patch should also do,
> is switch to using the l*_node() functions instead of manual casting.

Hm, I didn't bother with that on the grounds that there's no question
what should be in those two lists.  But I guess it's not much extra
code, so pushed that way.

regards, tom lane




Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Jacob Champion
[resending to list]

On 7/19/22 09:14, Andres Freund wrote:
> IMO this should replace the existing ascii escape function instead.
That will affect the existing behavior of application_name and
cluster_name; is that acceptable?

--Jacob




Re: Memory leak fix in psql

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 21:08:53 +0800, Japin Li wrote:
> From b2bcc3a1bac67b8b414f2025607f8dd35e096289 Mon Sep 17 00:00:00 2001
> From: Japin Li 
> Date: Tue, 19 Jul 2022 18:27:25 +0800
> Subject: [PATCH v2 1/1] Fix the memory leak in psql describe
> 
> ---
>  src/bin/psql/describe.c | 168 
>  1 file changed, 168 insertions(+)
> 
> diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
> index 0ce38e4b4c..7a070a6cd0 100644
> --- a/src/bin/psql/describe.c
> +++ b/src/bin/psql/describe.c
> @@ -112,7 +112,10 @@ describeAggregates(const char *pattern, bool verbose, 
> bool showSystem)
>   "n.nspname", 
> "p.proname", NULL,
>   
> "pg_catalog.pg_function_is_visible(p.oid)",
>   NULL, 3))
> + {
> + termPQExpBuffer();
>   return false;
> + }
>  
>   appendPQExpBufferStr(, "ORDER BY 1, 2, 4;");

Adding copy over copy of this same block doesn't seem great. Can we instead
add a helper for it or such?

Greetings,

Andres Freund




Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 09:07:31 -0700, Jacob Champion wrote:
> On Fri, Jul 15, 2022 at 4:45 PM Andres Freund  wrote:
> > On 2022-07-15 14:51:38 -0700, Jacob Champion wrote:
> > > That seems much worse than escaping for this particular patch; if your
> > > cert's Common Name is in (non-ASCII) UTF-8 then all you'll see is
> > > "CN=?" in the log lines that were supposed to be helping you
> > > root-cause. Escaping would be much more helpful in this case.
> >
> > I'm doubtful that's all that common.
> 
> Probably not, but the more systems that support it without weird
> usability bugs, the more common it will hopefully become.
> 
> > But either way, I suggest a separate patch to deal with that...
> 
> Proposed fix attached, which uses \x-escaping for bytes outside of
> printable ASCII.

I don't think this should be open coded in the ssl part of the code. IMO this
should replace the existing ascii escape function instead. I strongly oppose
open coding this functionality in prepare_cert_name().

Greetings,

Andres Freund




Re: [PATCH] Log details for client certificate failures

2022-07-19 Thread Jacob Champion
On Fri, Jul 15, 2022 at 4:45 PM Andres Freund  wrote:
> On 2022-07-15 14:51:38 -0700, Jacob Champion wrote:
> > That seems much worse than escaping for this particular patch; if your
> > cert's Common Name is in (non-ASCII) UTF-8 then all you'll see is
> > "CN=?" in the log lines that were supposed to be helping you
> > root-cause. Escaping would be much more helpful in this case.
>
> I'm doubtful that's all that common.

Probably not, but the more systems that support it without weird
usability bugs, the more common it will hopefully become.

> But either way, I suggest a separate patch to deal with that...

Proposed fix attached, which uses \x-escaping for bytes outside of
printable ASCII.

Thanks,
--Jacob
From 3c8e910a75c74f85b640f7d728205c276b1d1c51 Mon Sep 17 00:00:00 2001
From: Jacob Champion 
Date: Mon, 18 Jul 2022 15:01:19 -0700
Subject: [PATCH] Don't reflect unescaped cert data to the logs

Commit 3a0e385048 introduced a new path for unauthenticated bytes from
the client certificate to be printed unescaped to the logs. There are a
handful of these already, but it doesn't make sense to keep making the
problem worse. \x-escape any unprintable bytes.

The test case introduces a revoked UTF-8 certificate. This requires the
addition of the `-utf8` flag to `openssl req`. Since the existing
certificates all use an ASCII subset, this won't modify the existing
certificates' subjects if/when they get regenerated; this was verified
experimentally with

$ make sslfiles-clean
$ make sslfiles

Unfortunately the test can't be run as-is yet due to a test timing
issue; see 55828a6b60.
---
 src/backend/libpq/be-secure-openssl.c | 74 ---
 src/test/ssl/conf/client-revoked-utf8.config  | 13 
 src/test/ssl/ssl/client-crldir/9bb9e3c3.r0| 19 ++---
 src/test/ssl/ssl/client-revoked-utf8.crt  | 18 +
 src/test/ssl/ssl/client-revoked-utf8.key  | 27 +++
 src/test/ssl/ssl/client.crl   | 19 ++---
 .../ssl/ssl/root+client-crldir/9bb9e3c3.r0| 19 ++---
 src/test/ssl/ssl/root+client.crl  | 19 ++---
 src/test/ssl/sslfiles.mk  | 10 ++-
 src/test/ssl/t/001_ssltests.pl| 13 
 src/test/ssl/t/SSL/Backend/OpenSSL.pm |  3 +-
 11 files changed, 167 insertions(+), 67 deletions(-)
 create mode 100644 src/test/ssl/conf/client-revoked-utf8.config
 create mode 100644 src/test/ssl/ssl/client-revoked-utf8.crt
 create mode 100644 src/test/ssl/ssl/client-revoked-utf8.key

diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 9cec6866a3..ad0d79a511 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -35,6 +35,7 @@
 #include "storage/fd.h"
 #include "storage/latch.h"
 #include "tcop/tcopprot.h"
+#include "utils/builtins.h"
 #include "utils/memutils.h"
 
 /*
@@ -1082,16 +1083,17 @@ dummy_ssl_passwd_cb(char *buf, int size, int rwflag, void *userdata)
 }
 
 /*
- * Examines the provided certificate name, and if it's too long to log, modifies
- * and truncates it. The return value is NULL if no truncation was needed; it
- * otherwise points into the middle of the input string, and should not be
- * freed.
+ * Examines the provided certificate name, and if it's too long to log or
+ * contains unprintable ASCII, escapes and truncates it. The return value is
+ * always a new palloc'd string.
  */
 static char *
-truncate_cert_name(char *name)
+prepare_cert_name(char *name)
 {
 	size_t		namelen = strlen(name);
-	char	   *truncated;
+	char	   *truncated = name;
+	char	   *escaped;
+	int			i;
 
 	/*
 	 * Common Names are 64 chars max, so for a common case where the CN is the
@@ -1101,19 +1103,37 @@ truncate_cert_name(char *name)
 	 */
 #define MAXLEN 71
 
-	if (namelen <= MAXLEN)
-		return NULL;
-
-	/*
-	 * Keep the end of the name, not the beginning, since the most specific
-	 * field is likely to give users the most information.
-	 */
-	truncated = name + namelen - MAXLEN;
-	truncated[0] = truncated[1] = truncated[2] = '.';
+	if (namelen > MAXLEN)
+	{
+		/*
+		 * Keep the end of the name, not the beginning, since the most specific
+		 * field is likely to give users the most information.
+		 */
+		truncated = name + namelen - MAXLEN;
+		truncated[0] = truncated[1] = truncated[2] = '.';
+		namelen = MAXLEN;
+	}
 
 #undef MAXLEN
 
-	return truncated;
+	escaped = palloc(namelen * 4 + 1); /* one byte becomes four: "\xCC" */
+	i = 0;
+
+	for (char *c = truncated; *c; c++)
+	{
+		/* Keep printable characters including space. Escape everything else. */
+		if (*c >= 0x20 && *c <= 0x7E)
+			escaped[i++] = *c;
+		else
+		{
+			escaped[i++] = '\\';
+			escaped[i++] = 'x';
+			i += hex_encode(c, 1, [i]);
+		}
+	}
+
+	escaped[i] = '\0';
+	return escaped;
 }
 
 /*
@@ -1156,21 +1176,24 @@ verify_cb(int ok, X509_STORE_CTX *ctx)
 	{
 		char	   *subject,
    *issuer;
-		char	   *sub_truncated,
-   *iss_truncated;
+		char	   *sub_prepared,
+			

Re: NAMEDATALEN increase because of non-latin languages

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 14:30:34 +0700, John Naylor wrote:
> I wrote:
> 
> > On Mon, Jul 18, 2022 at 9:58 AM Andres Freund  wrote:
> > > Hm. Wouldn't it make sense to just use the normal tuple deforming
> routines and
> > > then map the results to the structs?
> >
> > I wasn't sure if they'd be suitable for this, but if they are, that'd
> make this easier and more maintainable. I'll look into it.
> 
> This would seem to have its own problems: heap_deform_tuple writes to
> passed arrays of datums and bools. The lower level parts like fetchatt and
> nocachegetattr return datums, so still need some generated boilerplate.
> Some of these also assume they can write cached offsets on a passed tuple
> descriptor.

Sure. But that'll just be a form of conversion we do all over, rather than
encoding low-level data layout details. Basically
struct->member1 = DatumGetInt32(values[0]);
struct->member2 = DatumGetChar(values[1]);

etc.


> I'm thinking where the first few attributes are fixed length, not null, and
> (because of AIX) not double-aligned, we can do a single memcpy on multiple
> columns at once. That will still be a common pattern after namedata is
> varlen. Otherwise, use helper functions/macros similar to the above but
> instead of passing a tuple descriptor, use info we have at compile time.

I think that might be over-optimizing things. I don't think we do these
conversions at a rate that's high enough to warrant it - the common stuff
should be in relcache etc.  It's possible that we might want to optimize the
catcache case specifically - but that'd be more optimizing memory usage than
"conversion" imo.


Greetings,

Andres Freund




Re: Convert planner's AggInfo and AggTransInfo to Nodes

2022-07-19 Thread Peter Eisentraut

On 18.07.22 18:08, Tom Lane wrote:

I'm kind of tempted to mount an effort to get rid of as many of
pathnodes.h's "read_write_ignore" annotations as possible.  Some are
necessary to prevent infinite recursion, and others represent considered
judgments that they'd bloat node dumps more than they're worth --- but
I think quite a lot of them arose from plain laziness about updating
outfuncs.c.  With the infrastructure we have now, that's no longer
a good reason.


That was my impression as well, and I agree it would be good to sort 
that out.





Autovacuum worker spawning strategy

2022-07-19 Thread Rafael Thofehrn Castro
Hello all,

While investigating a problem in a PG14 instance I noticed that autovacuum
workers
stop processing other databases when a database has a temporary table with
age
older than `autovacuum_freeze_max_age`. To test that I added a custom
logline showing
which database the about to spawned autovacuum worker will target. Here are
the details:

```
test=# select oid,datname from pg_database;
  oid  |  datname
---+---
 13757 | postgres
 32850 | test
 1 | template1
 13756 | template0
(4 rows)
```

Here are the loglines under normal circumstances:

```
2022-07-19 11:24:29.406 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
2022-07-19 11:24:44.406 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:24:59.406 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:25:14.406 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:25:29.417 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
2022-07-19 11:25:44.417 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:25:59.418 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:26:14.417 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:26:29.429 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
2022-07-19 11:26:44.430 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:26:59.432 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:27:14.429 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:27:29.442 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
2022-07-19 11:27:44.441 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:27:59.446 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:28:14.442 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:28:29.454 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
2022-07-19 11:28:44.454 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:28:59.458 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:29:14.443 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:29:29.465 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=32850)
2022-07-19 11:29:44.485 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=1)
2022-07-19 11:29:59.499 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13756)
```

But when I create a temp table and make it older than
`autovacuum_freeze_max_age`
I get this:

```
2022-07-19 11:30:14.496 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:30:29.495 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:30:44.507 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:30:59.522 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:31:14.536 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:31:29.551 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:31:44.565 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:31:59.579 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:32:14.591 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:32:29.606 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:32:44.619 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:32:59.631 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:33:14.643 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:33:29.655 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:33:44.667 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:33:59.679 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:34:14.694 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:34:29.707 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:34:44.719 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:34:59.732 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
2022-07-19 11:35:14.743 -03 [18627] WARNING:  AUTOVACUUM WORKER SPAWNED
(db=13757)
```

This is actually expected behavior judging by the code logic:
https://github.com/postgres/postgres/blob/master/src/backend/postmaster/autovacuum.c#L1201

PG prioritizes databases that need to be frozen and since a temporary table
can't
be frozen by a process other than the session that created it, that DB will
remain
a priority until the table is dropped.

I acknowledge that having a temp table existing for long enough to reach
`autovacuum_freeze_max_age`
is a problem itself as the table will never be frozen and if age reaches 2
billion
the instance will shut down. That being said, perhaps there is room for
improvement
in the AV worker spawning strategy to avoid leaving other 

Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 17:37:11 +0200, Pavel Stehule wrote:
> út 19. 7. 2022 v 17:31 odesílatel Andres Freund  napsal:
> 
> > Hi,
> >
> > On 2022-07-19 16:28:07 +0200, Alvaro Herrera wrote:
> > > Anyway, the minimal patch that makes plpgsql_check tests pass is
> > > attached.  This seems a bit random.  Maybe it'd be better to have a
> > > plpgsql_internal.h with functions that are exported only for plpgsql
> > > itself, and keep plpgsql.h with a set of functions, all marked
> > > PGDLLEXPORT, that are for external use.
> >
> > It does seem a bit random. But I think we probably should err on the side
> > of
> > adding more declarations, rather than the oposite.
> >
> 
> This list can be extended. I think plpgsql_check is maybe one extension
> that uses code from another extension directly. This is really not common
> usage.

There's a few more use cases, e.g. transform modules. Hence exposing e.g. many
plpython helpers.


> I have not any problem with it or with exports.txt file.

Just to be clear, there shouldn't be any use exports.txt here, just a few
PGDLLEXPORTs.

Greetings,

Andres Freund




Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Pavel Stehule
Hi

út 19. 7. 2022 v 17:31 odesílatel Andres Freund  napsal:

> Hi,
>
> On 2022-07-19 16:28:07 +0200, Alvaro Herrera wrote:
> > Anyway, the minimal patch that makes plpgsql_check tests pass is
> > attached.  This seems a bit random.  Maybe it'd be better to have a
> > plpgsql_internal.h with functions that are exported only for plpgsql
> > itself, and keep plpgsql.h with a set of functions, all marked
> > PGDLLEXPORT, that are for external use.
>
> It does seem a bit random. But I think we probably should err on the side
> of
> adding more declarations, rather than the oposite.
>

This list can be extended. I think plpgsql_check is maybe one extension
that uses code from another extension directly. This is really not common
usage.


>
> I like the plpgsql_internal.h idea, but probably done separately?
>

can be

I have not any problem with it or with exports.txt file.



> Greetings,
>
> Andres Freund
>


Re: PG 15 (and to a smaller degree 14) regression due to ExprEvalStep size

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 20:40:11 +0900, Amit Langote wrote:
> About that, I was wondering if the blocks in llvm_compile_expr() need
> to be hand-coded to match what's added in ExecInterpExpr() or if I've
> missed some tool that can be used instead?

The easiest way is to just call an external function for the implementation of
the step. But yes, otherwise you need to handcraft it.

Greetings,

Andres Freund




Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Andres Freund
Hi,

On 2022-07-19 16:28:07 +0200, Alvaro Herrera wrote:
> Anyway, the minimal patch that makes plpgsql_check tests pass is
> attached.  This seems a bit random.  Maybe it'd be better to have a
> plpgsql_internal.h with functions that are exported only for plpgsql
> itself, and keep plpgsql.h with a set of functions, all marked
> PGDLLEXPORT, that are for external use.

It does seem a bit random. But I think we probably should err on the side of
adding more declarations, rather than the oposite.

I like the plpgsql_internal.h idea, but probably done separately?

Greetings,

Andres Freund




Re: Costing elided SubqueryScans more nearly correctly

2022-07-19 Thread Tom Lane
Alvaro Herrera  writes:
> Thanks, pushed.

Pushed the original patch now too.

regards, tom lane




Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Pavel Stehule
Hi

út 19. 7. 2022 v 16:28 odesílatel Alvaro Herrera 
napsal:

> [ Redirecting thread to -hackers from -committers ]
>
> On 2022-Jul-19, Tom Lane wrote:
>
> > Alvaro Herrera  writes:
>
> > > Do you just need to send a patch to add an exports.txt file to
> > > src/pl/plpgsql/src/ for these functions?
> >
> > The precedent of plpython says that PGDLLEXPORT markers are sufficient.
> > But yeah, we need a list of exactly which functions need to be
> > re-exposed.  I imagine pldebugger has its own needs.
>
> A reasonable guess.  I went as far as downloading pldebugger and
> compiling it, but it doesn't have a test suite of its own, so I couldn't
> verify anything about it.  I did notice that plpgsql_check is calling
> function load_external_function(), and that doesn't appear in
> pldebugger.  I wonder if the find_rendezvous_variable business is at
> play.
>
> Anyway, the minimal patch that makes plpgsql_check tests pass is
> attached.  This seems a bit random.  Maybe it'd be better to have a
> plpgsql_internal.h with functions that are exported only for plpgsql
> itself, and keep plpgsql.h with a set of functions, all marked
> PGDLLEXPORT, that are for external use.
>
>
I can confirm that the attached patch fixes plpgsql_check.

Thank you

Pavel




>
> ... oh, and:
>
> $ postmaster -c shared_preload_libraries=plugin_debugger
> 2022-07-19 16:27:24.006 CEST [742142] FATAL:  cannot request additional
> shared memory outside shmem_request_hook
>
> --
> Álvaro HerreraBreisgau, Deutschland  —
> https://www.EnterpriseDB.com/
>


Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Julien Rouhaud
On Tue, Jul 19, 2022 at 04:28:07PM +0200, Alvaro Herrera wrote:
> ... oh, and:
>
> $ postmaster -c shared_preload_libraries=plugin_debugger
> 2022-07-19 16:27:24.006 CEST [742142] FATAL:  cannot request additional 
> shared memory outside shmem_request_hook

This has been reported multiple times (including on one of my own projects),
see
https://www.postgresql.org/message-id/flat/81f82c00-8818-91f3-96fa-47976f94662b%40pm.me
for the last report.




Re: pgsql: Default to hidden visibility for extension libraries where possi

2022-07-19 Thread Alvaro Herrera
[ Redirecting thread to -hackers from -committers ]

On 2022-Jul-19, Tom Lane wrote:

> Alvaro Herrera  writes:

> > Do you just need to send a patch to add an exports.txt file to
> > src/pl/plpgsql/src/ for these functions?
> 
> The precedent of plpython says that PGDLLEXPORT markers are sufficient.
> But yeah, we need a list of exactly which functions need to be
> re-exposed.  I imagine pldebugger has its own needs.

A reasonable guess.  I went as far as downloading pldebugger and
compiling it, but it doesn't have a test suite of its own, so I couldn't
verify anything about it.  I did notice that plpgsql_check is calling
function load_external_function(), and that doesn't appear in
pldebugger.  I wonder if the find_rendezvous_variable business is at
play.

Anyway, the minimal patch that makes plpgsql_check tests pass is
attached.  This seems a bit random.  Maybe it'd be better to have a
plpgsql_internal.h with functions that are exported only for plpgsql
itself, and keep plpgsql.h with a set of functions, all marked
PGDLLEXPORT, that are for external use.


... oh, and:

$ postmaster -c shared_preload_libraries=plugin_debugger
2022-07-19 16:27:24.006 CEST [742142] FATAL:  cannot request additional shared 
memory outside shmem_request_hook

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
>From e0759245de3e0fcad7a6b2c3e9a637d3bdffe2a4 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera 
Date: Tue, 19 Jul 2022 16:19:03 +0200
Subject: [PATCH] add PGDLLEXPORTS to plpgsql

for plpgsql_check benefit
---
 src/pl/plpgsql/src/plpgsql.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index 1914160272..20dd5ec060 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -1231,10 +1231,10 @@ extern PLpgSQL_plugin **plpgsql_plugin_ptr;
 /*
  * Functions in pl_comp.c
  */
-extern PLpgSQL_function *plpgsql_compile(FunctionCallInfo fcinfo,
+extern PGDLLEXPORT PLpgSQL_function *plpgsql_compile(FunctionCallInfo fcinfo,
 		 bool forValidator);
 extern PLpgSQL_function *plpgsql_compile_inline(char *proc_source);
-extern void plpgsql_parser_setup(struct ParseState *pstate,
+extern PGDLLEXPORT void plpgsql_parser_setup(struct ParseState *pstate,
  PLpgSQL_expr *expr);
 extern bool plpgsql_parse_word(char *word1, const char *yytxt, bool lookup,
 			   PLwdatum *wdatum, PLword *word);
@@ -1246,7 +1246,7 @@ extern PLpgSQL_type *plpgsql_parse_wordtype(char *ident);
 extern PLpgSQL_type *plpgsql_parse_cwordtype(List *idents);
 extern PLpgSQL_type *plpgsql_parse_wordrowtype(char *ident);
 extern PLpgSQL_type *plpgsql_parse_cwordrowtype(List *idents);
-extern PLpgSQL_type *plpgsql_build_datatype(Oid typeOid, int32 typmod,
+extern PGDLLEXPORT PLpgSQL_type *plpgsql_build_datatype(Oid typeOid, int32 typmod,
 			Oid collation,
 			TypeName *origtypname);
 extern PLpgSQL_variable *plpgsql_build_variable(const char *refname, int lineno,
@@ -1257,7 +1257,7 @@ extern PLpgSQL_rec *plpgsql_build_record(const char *refname, int lineno,
 		 bool add2namespace);
 extern PLpgSQL_recfield *plpgsql_build_recfield(PLpgSQL_rec *rec,
 const char *fldname);
-extern int	plpgsql_recognize_err_condition(const char *condname,
+extern PGDLLEXPORT int	plpgsql_recognize_err_condition(const char *condname,
 			bool allow_sqlstate);
 extern PLpgSQL_condition *plpgsql_parse_err_condition(char *condname);
 extern void plpgsql_adddatum(PLpgSQL_datum *newdatum);
@@ -1280,7 +1280,7 @@ extern void plpgsql_exec_event_trigger(PLpgSQL_function *func,
 extern void plpgsql_xact_cb(XactEvent event, void *arg);
 extern void plpgsql_subxact_cb(SubXactEvent event, SubTransactionId mySubid,
 			   SubTransactionId parentSubid, void *arg);
-extern Oid	plpgsql_exec_get_datum_type(PLpgSQL_execstate *estate,
+extern PGDLLEXPORT Oid	plpgsql_exec_get_datum_type(PLpgSQL_execstate *estate,
 		PLpgSQL_datum *datum);
 extern void plpgsql_exec_get_datum_type_info(PLpgSQL_execstate *estate,
 			 PLpgSQL_datum *datum,
@@ -1296,7 +1296,7 @@ extern void plpgsql_ns_push(const char *label,
 extern void plpgsql_ns_pop(void);
 extern PLpgSQL_nsitem *plpgsql_ns_top(void);
 extern void plpgsql_ns_additem(PLpgSQL_nsitem_type itemtype, int itemno, const char *name);
-extern PLpgSQL_nsitem *plpgsql_ns_lookup(PLpgSQL_nsitem *ns_cur, bool localmode,
+extern PGDLLEXPORT PLpgSQL_nsitem *plpgsql_ns_lookup(PLpgSQL_nsitem *ns_cur, bool localmode,
 		 const char *name1, const char *name2,
 		 const char *name3, int *names_used);
 extern PLpgSQL_nsitem *plpgsql_ns_lookup_label(PLpgSQL_nsitem *ns_cur,
@@ -1306,7 +1306,7 @@ extern PLpgSQL_nsitem *plpgsql_ns_find_nearest_loop(PLpgSQL_nsitem *ns_cur);
 /*
  * Other functions in pl_funcs.c
  */
-extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
+extern PGDLLEXPORT const char *plpgsql_stmt_typename(PLpgSQL_stmt 

Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Tom Lane
Robert Haas  writes:
> On Tue, Jul 19, 2022 at 9:53 AM Andrew Dunstan  wrote:
>> Why not have an optional second parameter for array_shuffle that
>> indicates whether or not to flatten? e.g. array_shuffle(my_array,
>> flatten => true)

> IMHO, if we think that's something many people are going to want, it
> would be better to add an array_flatten() function, because that could
> be used for a variety of purposes, whereas an option to this function
> can only be used for this function.

Agreed.  Whether it's really needed, I dunno --- I don't recall the
issue having come up before.

After taking a second look through
https://www.postgresql.org/docs/current/functions-array.html
it seems like the things that treat arrays as flat even when they
are multi-D are just

* unnest(), which is more or less forced into that position by our
type system: it has to be anyarray returning anyelement, not
anyarray returning anyelement-or-anyarray-depending.

* array_to_string(), which maybe could have done it differently but
can reasonably be considered a variant of unnest().

* The containment/overlap operators, which are kind of their own
thing anyway.  Arguably they got this wrong, though I suppose it's
too late to rethink that.

Everything else either explicitly rejects more-than-one-D arrays
or does something that is compatible with thinking of them as
arrays-of-arrays.

So I withdraw my original position.  These functions should just
shuffle or select in the array's first dimension, preserving
subarrays.  Or else be lazy and reject more-than-one-D arrays;
but it's probably not that hard to handle them.

regards, tom lane




Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Masahiko Sawada
On Tue, Jul 19, 2022 at 9:25 PM Amit Kapila  wrote:
>
> On Tue, Jul 19, 2022 at 1:10 PM Masahiko Sawada  wrote:
> >
> > On Mon, Jul 18, 2022 at 8:49 PM Amit Kapila  wrote:
> > >
> > > On Sun, Jul 17, 2022 at 6:29 PM Masahiko Sawada  
> > > wrote:
> > > >
> > > > On Fri, Jul 15, 2022 at 3:32 PM shiy.f...@fujitsu.com
> > > >  wrote:
> > > > >
> > > >
> > > > I've attached patches for all supported branches including the master.
> > > >
> > >
> > > For back branch patches,
> > > * Wouldn't it be better to move purge logic into the function
> > > SnapBuildPurge* function for the sake of consistency?
> >
> > Agreed.
> >
> > > * Do we really need ReorderBufferInitialXactsSetCatalogChanges()?
> > > Can't we instead have a function similar to
> > > SnapBuildXidHasCatalogChanges() as we have for the master branch? That
> > > will avoid calling it when the snapshot
> > > state is SNAPBUILD_START or SNAPBUILD_BUILDING_SNAPSHOT
> >
> > Seems a good idea. We would need to pass the information about
> > (parsed->xinfo & XACT_XINFO_HAS_INVALS) to the function but probably
> > we can change ReorderBufferXidHasCatalogChanges() so that it checks
> > the RBTXN_HAS_CATALOG_CHANGES flag and then the initial running xacts
> > array.
> >
>
> Let's try to keep this as much similar to the master branch patch as possible.
>
> > BTW on backbranches, I think that the reason why we add
> > initial_running_xacts stuff to ReorderBuffer is that we cannot modify
> > SnapBuild that could be serialized. Can we add a (private) array for
> > the initial running xacts in snapbuild.c instead of adding new
> > variables to ReorderBuffer?
> >
>
> While thinking about this, I wonder if the current patch for back
> branches can lead to an ABI break as it changes the exposed structure?
> If so, it may be another reason to change it to some other way
> probably as you are suggesting.

Yeah, it changes the size of ReorderBuffer, which is not good.
Changing the function names and arguments would also break ABI. So
probably we cannot do the above idea of removing
ReorderBufferInitialXactsSetCatalogChanges() as well.

Regards,

-- 
Masahiko Sawada
EDB:  https://www.enterprisedb.com/




Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Robert Haas
On Tue, Jul 19, 2022 at 9:53 AM Andrew Dunstan  wrote:
> > Having thought about it, i would go with (2). It gives the user the
> > ability to decide wether or not array-of-arrays behavior is desired.
> > If he wants the behavior of (1) he can flatten the array before
> > applying array_shuffle(). Unfortunately there is no array_flatten()
> > function (at the moment) and the user would have to work around it
> > with unnest() and array_agg().
>
> Why not have an optional second parameter for array_shuffle that
> indicates whether or not to flatten? e.g. array_shuffle(my_array,
> flatten => true)

IMHO, if we think that's something many people are going to want, it
would be better to add an array_flatten() function, because that could
be used for a variety of purposes, whereas an option to this function
can only be used for this function.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Andrew Dunstan


On 2022-07-19 Tu 07:15, Martin Kalcher wrote:
> Am 18.07.22 um 23:48 schrieb Martin Kalcher:
>>
>> If we go with (1) array_shuffle() and array_sample() should shuffle
>> each element individually and always return a one-dimensional array.
>>
>>    select array_shuffle('{{1,2},{3,4},{5,6}}');
>>    ---
>>     {1,4,3,5,6,2}
>>
>>    select array_sample('{{1,2},{3,4},{5,6}}', 3);
>>    --
>>     {1,4,3}
>>
>> If we go with (2) both functions should only operate on the first
>> dimension and shuffle whole subarrays and keep the dimensions intact.
>>
>>    select array_shuffle('{{1,2},{3,4},{5,6}}');
>>    -
>>     {{3,4},{1,2},{5,6}}
>>
>>    select array_sample('{{1,2},{3,4},{5,6}}', 2);
>>    ---
>>     {{3,4},{1,2}}
>>
>
> Having thought about it, i would go with (2). It gives the user the
> ability to decide wether or not array-of-arrays behavior is desired.
> If he wants the behavior of (1) he can flatten the array before
> applying array_shuffle(). Unfortunately there is no array_flatten()
> function (at the moment) and the user would have to work around it
> with unnest() and array_agg().
>
>


Why not have an optional second parameter for array_shuffle that
indicates whether or not to flatten? e.g. array_shuffle(my_array,
flatten => true)


cheers


andrew

--
Andrew Dunstan
EDB: https://www.enterprisedb.com





Re: Windows now has fdatasync()

2022-07-19 Thread Tom Lane
Thomas Munro  writes:
> The reason we need it for macOS is that they have had fdatasync
> function for many years now, and configure detects it, but they
> haven't ever declared it in a header, so we (accidentally?) do it in
> c.h.  We didn't set that up for Apple!  The commit that added it was
> 33cc5d8a, which was about a month before Apple shipped the first
> version of OS X (and long before they defined the function).  So there
> must have been another Unix with that problem, lost in the mists of
> time.

It might have just been paranoia, but I doubt it.  Back then we
were still dealing with lots of systems that didn't have every
function described in SUS v2.

If you poked around in the mail archives you could likely find some
associated discussion, but I'm too lazy for that ...

regards, tom lane




Re: Memory leak fix in psql

2022-07-19 Thread Japin Li

On Tue, 19 Jul 2022 at 20:32, Michael Paquier  wrote:
> On Tue, Jul 19, 2022 at 06:41:13PM +0800, Japin Li wrote:
>> After looking around, I found psql/describe.c also has some memory leaks,
>> attached a patch to fix these leaks.
>
> Indeed.  There are quite a bit of them, so let's fix all that.  You
> have missed a couple of code paths in objectDescription().

Thanks for reviewing. Attached fix the memory leak in objectDescription().
Please consider v2 for further review.

-- 
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.

>From b2bcc3a1bac67b8b414f2025607f8dd35e096289 Mon Sep 17 00:00:00 2001
From: Japin Li 
Date: Tue, 19 Jul 2022 18:27:25 +0800
Subject: [PATCH v2 1/1] Fix the memory leak in psql describe

---
 src/bin/psql/describe.c | 168 
 1 file changed, 168 insertions(+)

diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 0ce38e4b4c..7a070a6cd0 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -112,7 +112,10 @@ describeAggregates(const char *pattern, bool verbose, bool showSystem)
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 4;");
 
@@ -182,7 +185,10 @@ describeAccessMethods(const char *pattern, bool verbose)
 NULL, "amname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -244,7 +250,10 @@ describeTablespaces(const char *pattern, bool verbose)
 NULL, "spcname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -534,7 +543,10 @@ describeFunctions(const char *functypes, const char *func_pattern,
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	for (int i = 0; i < num_arg_patterns; i++)
 	{
@@ -561,7 +573,10 @@ describeFunctions(const char *functypes, const char *func_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
+			{
+termPQExpBuffer();
 return false;
+			}
 		}
 		else
 		{
@@ -682,7 +697,10 @@ describeTypes(const char *pattern, bool verbose, bool showSystem)
 "pg_catalog.format_type(t.oid, NULL)",
 "pg_catalog.pg_type_is_visible(t.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -836,7 +854,10 @@ describeOperators(const char *oper_pattern,
 "n.nspname", "o.oprname", NULL,
 "pg_catalog.pg_operator_is_visible(o.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	if (num_arg_patterns == 1)
 		appendPQExpBufferStr(, "  AND o.oprleft = 0\n");
@@ -866,7 +887,10 @@ describeOperators(const char *oper_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
+			{
+termPQExpBuffer();
 return false;
+			}
 		}
 		else
 		{
@@ -953,7 +977,10 @@ listAllDbs(const char *pattern, bool verbose)
 		if (!validateSQLNamePattern(, pattern, false, false,
 	NULL, "d.datname", NULL, NULL,
 	NULL, 1))
+		{
+			termPQExpBuffer();
 			return false;
+		}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 	res = PSQLexec(buf.data);
@@ -1106,7 +1133,10 @@ permissionsList(const char *pattern)
 "n.nspname", "c.relname", NULL,
 "n.nspname !~ '^pg_' AND pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -1177,7 +1207,10 @@ listDefaultACLs(const char *pattern)
 "pg_catalog.pg_get_userbyid(d.defaclrole)",
 NULL,
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 3;");
 
@@ -1253,7 +1286,10 @@ objectDescription(const char *pattern, bool showSystem)
 false, "n.nspname", "pgc.conname", NULL,
 "pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	/* Domain constraint descriptions */
 	appendPQExpBuffer(,
@@ -1277,7 +1313,10 @@ objectDescription(const char *pattern, bool showSystem)
 false, "n.nspname", "pgc.conname", NULL,
 "pg_catalog.pg_type_is_visible(t.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	/* Operator class descriptions */
 	appendPQExpBuffer(,
@@ -1301,7 +1340,10 @@ objectDescription(const char *pattern, bool showSystem)
 "n.nspname", "o.opcname", NULL,
 "pg_catalog.pg_opclass_is_visible(o.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	/* Operator family descriptions */
 	appendPQExpBuffer(,
@@ -1325,7 +1367,10 @@ objectDescription(const char *pattern, bool showSystem)
 

Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Robert Haas
On Mon, Jul 18, 2022 at 6:43 PM Tom Lane  wrote:
> Um ... why is "the order in which the elements were chosen" a concept
> we want to expose?  ISTM sample() is a black box in which notionally
> the decisions could all be made at once.

I agree with that. But I also think it's fine for the elements to be
returned in a shuffled order rather than the original order.

> > I really think this function needs to grow an algorithm argument that can
> > be used to specify stuff like ordering, replacement/without-replacement,
> > etc...just some enums separated by commas that can be added to the call.
>
> I think you might run out of gold paint somewhere around here.  I'm
> still not totally convinced we should bother with the sample() function
> at all, let alone that it needs algorithm variants.  At some point we
> say to the user "here's a PL, write what you want for yourself".

I don't know what gold paint has to do with anything here, but I agree
that David's proposal seems to be moving the goalposts a very long
way.

The thing is, as Martin points out, these functions already exist in a
bunch of other systems. For one example I've used myself, see
https://underscorejs.org/

I probably wouldn't have called a function to put a list into a random
order "shuffle" in a vacuum, but it seems to be common nomenclature
these days. I believe that if you don't make reference to Fisher-Yates
in the documentation, they kick you out of the cool programmers club.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: errdetail/errhint style

2022-07-19 Thread Michael Paquier
On Tue, Jul 19, 2022 at 02:31:28PM +0200, Alvaro Herrera wrote:
> Hmm, +1, though a few of these are still missing ending periods after
> your patch.

+1.
--
Michael


signature.asc
Description: PGP signature


Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Amit Kapila
On Tue, Jul 19, 2022 at 2:01 PM Masahiko Sawada  wrote:
>
> On Tue, Jul 19, 2022 at 4:35 PM Kyotaro Horiguchi
>  wrote:
>
> >
> >
> > +   Assert((xcnt > 0) && (xcnt == rb->catchange_ntxns));
> >
> > (xcnt > 0) is obvious here (otherwise means dlist_foreach is broken..).
> > (xcnt == rb->catchange_ntxns) is not what should be checked here. The
> > assert just requires that catchange_txns and catchange_ntxns are
> > consistent so it should be checked just after dlist_empty.. I think.
> >
>
> If we want to check if catchange_txns and catchange_ntxns are
> consistent, should we check (xcnt == rb->catchange_ntxns) as well, no?
> This function requires the caller to use rb->catchange_ntxns as the
> length of the returned array. I think this assertion ensures that the
> actual length of the array is consistent with the length we
> pre-calculated.
>

Right, so, I think it is better to keep this assertion but remove
(xcnt > 0) part as pointed out by Horiguchi-San.

-- 
With Regards,
Amit Kapila.




Re: POC PATCH: copy from ... exceptions to: (was Re: VLDB Features)

2022-07-19 Thread Damir Belyalov
Hi!

Improved my patch by adding block subtransactions.
The block size is determined by the REPLAY_BUFFER_SIZE parameter.
I used the idea of a buffer for accumulating tuples in it.
If we read REPLAY_BUFFER_SIZE rows without errors, the subtransaction will
be committed.
If we find an error, the subtransaction will rollback and the buffer will
be replayed containing tuples.

In the patch REPLAY_BUFFER_SIZE equals 3, but it can be changed to any
other number (for example 1000).
There is an idea to create a GUC parameter for it.
Also maybe create a GUC parameter for the number of occurring WARNINGS by
rows with errors.

For CIM_MULTI and CIM_MULTI_CONDITIONAL cases the buffer is not needed.
It is needed for the CIM_SINGLE case.

Tests:

-- CIM_MULTI case
CREATE TABLE check_ign_err (n int, m int, k int);
COPY check_ign_err FROM STDIN WITH IGNORE_ERRORS;
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
1 1 1
2 2 2 2
3 3
a 4 4
5 b b

7 7 7
\.
SELECT * FROM check_ign_err;
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
 n | m | k
---+---+---
 1 | 1 | 1
 7 | 7 | 7
(2 rows)

##


-- CIM_SINGLE case
-- BEFORE row trigger
CREATE TABLE trig_test(n int, m int);
CREATE FUNCTION fn_trig_before () RETURNS TRIGGER AS '
  BEGIN
INSERT INTO trig_test VALUES(NEW.n, NEW.m);
RETURN NEW;
  END;
' LANGUAGE plpgsql;
CREATE TRIGGER trig_before BEFORE INSERT ON check_ign_err
FOR EACH ROW EXECUTE PROCEDURE fn_trig_before();
COPY check_ign_err FROM STDIN WITH IGNORE_ERRORS;
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
1 1 1
2 2 2 2
3 3
a 4 4
5 b b

7 7 7
\.
SELECT * FROM check_ign_err;
 n | m | k
---+---+---
 1 | 1 | 1
 7 | 7 | 7
(2 rows)

##


-- INSTEAD OF row trigger
CREATE VIEW check_ign_err_view AS SELECT * FROM check_ign_err;
CREATE FUNCTION fn_trig_instead_of () RETURNS TRIGGER AS '
  BEGIN
INSERT INTO check_ign_err VALUES(NEW.n, NEW.m, NEW.k);
RETURN NEW;
  END;
' LANGUAGE plpgsql;
CREATE TRIGGER trig_instead_of INSTEAD OF INSERT ON check_ign_err_view
FOR EACH ROW EXECUTE PROCEDURE fn_trig_instead_of();
COPY check_ign_err_view FROM STDIN WITH IGNORE_ERRORS;
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
SELECT * FROM check_ign_err;
1 1 1
2 2 2 2
3 3
a 4 4
5 b b

7 7 7
\.
SELECT * FROM check_ign_err_view;
 n | m | k
---+---+---
 1 | 1 | 1
 7 | 7 | 7
(2 rows)

##

-- foreign table case in postgres_fdw extension

##

-- volatile function in WHERE clause
COPY check_ign_err FROM STDIN WITH IGNORE_ERRORS
  WHERE n = floor(random()*(1-1+1))+1; /* find values equal 1 */
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
SELECT * FROM check_ign_err;
1 1 1
2 2 2 2
3 3
a 4 4
5 b b

7 7 7
\.
SELECT * FROM check_ign_err;
 n | m | k
---+---+---
 1 | 1 | 1
(1 row)

##

-- CIM_MULTI_CONDITIONAL case
-- INSERT triggers for partition tables
CREATE TABLE check_ign_err (n int, m int, k int) PARTITION BY RANGE (n);
CREATE TABLE check_ign_err_part1 PARTITION OF check_ign_err
  FOR VALUES FROM (1) TO (4);
CREATE TABLE check_ign_err_part2 PARTITION OF check_ign_err
  FOR VALUES FROM (4) TO (8);
CREATE FUNCTION fn_trig_before_part () RETURNS TRIGGER AS '
  BEGIN
INSERT INTO trig_test VALUES(NEW.n, NEW.m);
RETURN NEW;
  END;
' LANGUAGE plpgsql;
CREATE TRIGGER trig_before_part BEFORE INSERT ON check_ign_err
FOR EACH ROW EXECUTE PROCEDURE fn_trig_before_part();
COPY check_ign_err FROM STDIN WITH IGNORE_ERRORS;
WARNING:  COPY check_ign_err, line 2: "2 2 2 2"
WARNING:  COPY check_ign_err, line 3: "3 3"
WARNING:  COPY check_ign_err, line 4, column n: "a"
WARNING:  COPY check_ign_err, line 5, column m: "b"
WARNING:  COPY check_ign_err, line 6, column n: ""
SELECT * FROM check_ign_err;
1 1 1
2 2 2 2
3 3
a 4 4
5 b b

7 7 7
\.
 n | m | k
---+---+---
 1 | 1 | 1
 7 | 7 | 7
(2 rows)

Thanks for feedback.
Regards, Damir
From 

Re: System column support for partitioned tables using heap

2022-07-19 Thread Robert Haas
On Tue, Jul 19, 2022 at 4:44 AM Morris de Oryx  wrote:
> My reason for xmax() in the result is to break down the affected rows count 
> into an insert count, and a modified estimate. Not super critical, but 
> helpful. I've built out some simple custom logging table in out system for 
> this kind of detail, and folks have been wanting to break down rows 
> submitted, rows inserted, and rows updated a bit better. Rows submitted is 
> easy and rows inserted is too...update is an estimate as I'm not using 
> anything fancy with xmax() to sort out what exactly happened.

I wonder whether you could just have the CTEs bubble up 1 or 0 and
then sum them at some stage, instead of relying on xmax. Presumably
your UPSERT simulation knows which thing it did in each case.

For MERGE itself, I wonder if some information about this should be
included in the command tag. It looks like MERGE already includes some
sort of row count in the command tag, but I guess perhaps it doesn't
distinguish between inserts and updates. I don't know why we couldn't
expose multiple values this way, though.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Memory leak fix in psql

2022-07-19 Thread Michael Paquier
On Tue, Jul 19, 2022 at 06:41:13PM +0800, Japin Li wrote:
> After looking around, I found psql/describe.c also has some memory leaks,
> attached a patch to fix these leaks.

Indeed.  There are quite a bit of them, so let's fix all that.  You
have missed a couple of code paths in objectDescription().
--
Michael


signature.asc
Description: PGP signature


Re: errdetail/errhint style

2022-07-19 Thread Alvaro Herrera
On 2022-Jul-19, Justin Pryzby wrote:

> https://www.postgresql.org/docs/current/error-style-guide.html#id-1.10.6.4.7
> 
> git grep 'errdetail("[[:lower:]]'
> git grep 'errdetail(".*".*;' |grep -v '\."'

Hmm, +1, though a few of these are still missing ending periods after
your patch.

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"The problem with the facetime model is not just that it's demoralizing, but
that the people pretending to work interrupt the ones actually working."
   (Paul Graham)




Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Amit Kapila
On Tue, Jul 19, 2022 at 1:10 PM Masahiko Sawada  wrote:
>
> On Mon, Jul 18, 2022 at 8:49 PM Amit Kapila  wrote:
> >
> > On Sun, Jul 17, 2022 at 6:29 PM Masahiko Sawada  
> > wrote:
> > >
> > > On Fri, Jul 15, 2022 at 3:32 PM shiy.f...@fujitsu.com
> > >  wrote:
> > > >
> > >
> > > I've attached patches for all supported branches including the master.
> > >
> >
> > For back branch patches,
> > * Wouldn't it be better to move purge logic into the function
> > SnapBuildPurge* function for the sake of consistency?
>
> Agreed.
>
> > * Do we really need ReorderBufferInitialXactsSetCatalogChanges()?
> > Can't we instead have a function similar to
> > SnapBuildXidHasCatalogChanges() as we have for the master branch? That
> > will avoid calling it when the snapshot
> > state is SNAPBUILD_START or SNAPBUILD_BUILDING_SNAPSHOT
>
> Seems a good idea. We would need to pass the information about
> (parsed->xinfo & XACT_XINFO_HAS_INVALS) to the function but probably
> we can change ReorderBufferXidHasCatalogChanges() so that it checks
> the RBTXN_HAS_CATALOG_CHANGES flag and then the initial running xacts
> array.
>

Let's try to keep this as much similar to the master branch patch as possible.

> BTW on backbranches, I think that the reason why we add
> initial_running_xacts stuff to ReorderBuffer is that we cannot modify
> SnapBuild that could be serialized. Can we add a (private) array for
> the initial running xacts in snapbuild.c instead of adding new
> variables to ReorderBuffer?
>

While thinking about this, I wonder if the current patch for back
branches can lead to an ABI break as it changes the exposed structure?
If so, it may be another reason to change it to some other way
probably as you are suggesting.

-- 
With Regards,
Amit Kapila.




errdetail/errhint style

2022-07-19 Thread Justin Pryzby
Most of these are new in v15.
In any case, I'm not sure if the others ought to be backpatched.
There may be additional fixes to make with the same grepping.
>From a33bd79c2f36046463489fbd37c76d7f0c3f7a8e Mon Sep 17 00:00:00 2001
From: Justin Pryzby 
Date: Mon, 18 Jul 2022 13:54:38 -0500
Subject: [PATCH] errdetail/hint messages should be capitalized and end with a
 dot

https://www.postgresql.org/docs/current/error-style-guide.html#id-1.10.6.4.7

git grep 'errdetail("[[:lower:]]'
git grep 'errdetail(".*".*;' |grep -v '\."'

See also:
501ed02cf6f4f60c3357775eb07578aebc912d3a
730422afcdb6784bbe20efc65de72156d470b0c4
---
 contrib/basic_archive/basic_archive.c |  4 +--
 .../postgres_fdw/expected/postgres_fdw.out|  6 ++--
 contrib/postgres_fdw/option.c |  4 +--
 src/backend/commands/publicationcmds.c|  2 +-
 src/backend/commands/tablecmds.c  |  2 +-
 src/backend/parser/parse_expr.c   |  4 +--
 src/backend/parser/parse_jsontable.c  | 12 
 src/backend/utils/adt/jsonpath_exec.c |  2 +-
 src/backend/utils/adt/jsonpath_gram.y |  2 +-
 src/backend/utils/misc/guc.c  |  2 +-
 src/test/regress/expected/jsonb_sqljson.out   | 28 +--
 src/test/regress/expected/jsonpath.out|  2 +-
 src/test/regress/expected/publication.out |  2 +-
 src/test/regress/expected/sqljson.out | 10 +++
 src/test/regress/expected/triggers.out|  2 +-
 15 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index bba767c8f36..776a386e352 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -111,7 +111,7 @@ check_archive_directory(char **newval, void **extra, GucSource source)
 	 */
 	if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
 	{
-		GUC_check_errdetail("archive directory too long");
+		GUC_check_errdetail("Archive directory too long.");
 		return false;
 	}
 
@@ -122,7 +122,7 @@ check_archive_directory(char **newval, void **extra, GucSource source)
 	 */
 	if (stat(*newval, ) != 0 || !S_ISDIR(st.st_mode))
 	{
-		GUC_check_errdetail("specified archive directory does not exist");
+		GUC_check_errdetail("Specified archive directory does not exist.");
 		return false;
 	}
 
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ebf9ea35988..ade797159dc 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -9590,7 +9590,7 @@ HINT:  Target server's authentication method must be changed or password_require
 -- Unpriv user cannot make the mapping passwordless
 ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD password_required 'false');
 ERROR:  password_required=false is superuser-only
-HINT:  User mappings with the password_required option set to false may only be created or modified by the superuser
+HINT:  User mappings with the password_required option set to false may only be created or modified by the superuser.
 SELECT 1 FROM ft1_nopw LIMIT 1;
 ERROR:  password is required
 DETAIL:  Non-superuser cannot connect if the server does not request a password.
@@ -9611,10 +9611,10 @@ SELECT 1 FROM ft1_nopw LIMIT 1;
 ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (SET password_required 'true');
 ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD sslcert 'foo.crt');
 ERROR:  sslcert and sslkey are superuser-only
-HINT:  User mappings with the sslcert or sslkey options set may only be created or modified by the superuser
+HINT:  User mappings with the sslcert or sslkey options set may only be created or modified by the superuser.
 ALTER USER MAPPING FOR CURRENT_USER SERVER loopback_nopw OPTIONS (ADD sslkey 'foo.key');
 ERROR:  sslcert and sslkey are superuser-only
-HINT:  User mappings with the sslcert or sslkey options set may only be created or modified by the superuser
+HINT:  User mappings with the sslcert or sslkey options set may only be created or modified by the superuser.
 -- We're done with the role named after a specific user and need to check the
 -- changes to the public mapping.
 DROP USER MAPPING FOR CURRENT_USER SERVER loopback_nopw;
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index cd2ef234d66..95dde056eba 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -193,7 +193,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 ereport(ERROR,
 		(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 		 errmsg("password_required=false is superuser-only"),
-		 errhint("User mappings with the password_required option set to false may only be created or modified by the superuser")));
+		 errhint("User mappings with the password_required option set to false may only be created or modified by the superuser.")));
 		}
 		else if 

Re: add checkpoint stats of snapshot and mapping files of pg_logical dir

2022-07-19 Thread Bharath Rupireddy
On Tue, Apr 26, 2022 at 6:31 AM Michael Paquier  wrote:
>
> On Mon, Apr 25, 2022 at 01:34:38PM -0700, Nathan Bossart wrote:
> > I took another look at the example output, and I think I agree that logging
> > the total time for logical decoding operations is probably the best path
> > forward.  This information would be enough to clue an administrator into
> > the possible causes of lengthy checkpoints, but it also wouldn't disrupt
> > the readability of the log statement too much.
>
> +   /* translator: the placeholders after first %s show 
> restartpoint/checkpoint options */
> +   (errmsg("%s starting:%s%s%s%s%s%s%s%s",
> +   restartpoint ?
> _("restartpoint") : _("checkpoint"),
>
> 0001 breaks translation, as "checkpoint/restartpoint" and "starting"
> would treated as separate terms to translate.  That would not matter
> for English, but it does in French where we'd say "début du
> checkpoint".  You could fix that by adding "starting" to each
> refactored term or build a string.  0002 does the latter, so my take
> is that you should begin using a StringInfo in 0001.

Thanks for reviewing. I've addressed the review comments, PSA v10
patch. Note that we can't use StringInfo as the checkpointer memory
context doesn't allow pallocs in the critical section and the
checkpoint can sometimes be run in the critical section.

I've also added the total number of WAL files a checkpoint has
processed (scanned the pg_wal directory) while removing old WAL files.
This helps to estimate the pg_wal disk space at the time of a
particular checkpoint, especially useful for debugging issues.

[1] sample output:
2022-07-19 10:33:45.378 UTC [3027866] LOG:  checkpoint starting: wal
2022-07-19 10:33:51.434 UTC [3027866] LOG:  checkpoint complete: wrote
50 buffers (0.3%); 0 WAL file(s) added, 12 removed, 35 recycled, 76
processed; write=3.651 s, sync=0.011 s, total=6.136 s; sync files=11,
longest=0.004 s, average=0.001 s; distance=770045 kB, estimate=770045
kB; lsn=0/95000260, redo lsn=0/7968; logical decoding file(s)
processing=0.007 s

Regards,
Bharath Rupireddy.
From b113dfb9e876a84c2121b64ab94a9d10c3c670b4 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy 
Date: Tue, 19 Jul 2022 10:35:29 +
Subject: [PATCH v10] Add time taken for processing logical decoding files to
 checkpoint log

At times, there can be many snapshot and mapping files under
pg_logical dir that the checkpoint might have to delete/fsync
based on the cutoff LSN which can increase the checkpoint time.
Add stats related to these files to better understand the delays
or time spent by the checkpointer processing them.

Also, add total number of WAL files processed during checkpoint to
the log message. This will be useful for debugging issues like
total time taken by checkpoint (if there are many WAL files) and
estimate the disk space occupied by pg_wal at the time of checkpoint.
---
 src/backend/access/transam/xlog.c | 30 +++---
 src/include/access/xlog.h |  5 +
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b809a2152c..20c1689ed2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3603,6 +3603,8 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr,
 			   insertTLI);
 			}
 		}
+
+		CheckpointStats.ckpt_segs_processed++;
 	}
 
 	FreeDir(xldir);
@@ -6092,7 +6094,8 @@ LogCheckpointEnd(bool restartpoint)
 sync_msecs,
 total_msecs,
 longest_msecs,
-average_msecs;
+average_msecs,
+l_dec_ops_msecs;
 	uint64		average_sync_time;
 
 	CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
@@ -6129,6 +6132,9 @@ LogCheckpointEnd(bool restartpoint)
 			CheckpointStats.ckpt_sync_rels;
 	average_msecs = (long) ((average_sync_time + 999) / 1000);
 
+	l_dec_ops_msecs = TimestampDifferenceMilliseconds(CheckpointStats.l_dec_ops_start_t,
+	  CheckpointStats.l_dec_ops_end_t);
+
 	/*
 	 * ControlFileLock is not required to see ControlFile->checkPoint and
 	 * ->checkPointCopy here as we are the only updator of those variables at
@@ -6137,16 +6143,18 @@ LogCheckpointEnd(bool restartpoint)
 	if (restartpoint)
 		ereport(LOG,
 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%); "
-		"%d WAL file(s) added, %d removed, %d recycled; "
+		"%d WAL file(s) added, %d removed, %d recycled, %d processed; "
 		"write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
 		"sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
 		"distance=%d kB, estimate=%d kB; "
-		"lsn=%X/%X, redo lsn=%X/%X",
+		"lsn=%X/%X, redo lsn=%X/%X; "
+		"logical decoding file(s) processing=%ld.%03d s",
 		CheckpointStats.ckpt_bufs_written,
 		(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
 		CheckpointStats.ckpt_segs_added,
 		

Re: [BUG] Logical replication failure "ERROR: could not map filenode "base/13237/442428" to relation OID" with catalog modifying txns

2022-07-19 Thread Amit Kapila
On Tue, Jul 19, 2022 at 1:43 PM Kyotaro Horiguchi
 wrote:
>
> At Tue, 19 Jul 2022 16:57:14 +0900 (JST), Kyotaro Horiguchi 
>  wrote in
> > At Tue, 19 Jul 2022 16:02:26 +0900, Masahiko Sawada  
> > wrote in
> > > On Tue, Jul 19, 2022 at 1:47 PM Amit Kapila  
> > > wrote:
> > > > Good work. I wonder without comments this may create a problem in the
> > > > future. OTOH, I don't see adding a check "catchange.xcnt > 0" before
> > > > freeing the memory any less robust. Also, for consistency, we can use
> > > > a similar check based on xcnt in the SnapBuildRestore to free the
> > > > memory in the below code:
> > > > + /* set catalog modifying transactions */
> > > > + if (builder->catchange.xip)
> > > > + pfree(builder->catchange.xip);
> > >
> > > I would hesitate to add comments about preventing the particular
> > > optimization. I think we do null-pointer-check-then-pfree many place.
> > > It seems to me that checking the array length before memcpy is more
> > > natural than checking both the array length and the array existence
> > > before pfree.
> >
> > Anyway according to commit message of 46ab07ffda, POSIX forbits
> > memcpy(NULL, NULL, 0). It seems to me that it is the cause of the
> > false (or over) optimization.  So if we add some comment, it would be
> > for memcpy, not pfree..
>
> For clarilty, I meant that I don't think we need that comment.
>

Fair enough. I think commit 46ab07ffda clearly explains why it is a
good idea to add a check as Sawada-San did in his latest version. I
also agree that we don't any comment for this change.

-- 
With Regards,
Amit Kapila.




Re: PG 15 (and to a smaller degree 14) regression due to ExprEvalStep size

2022-07-19 Thread Amit Langote
Hi,

On Tue, Jul 19, 2022 at 4:09 AM Andrew Dunstan  wrote:
> On 2022-07-15 Fr 17:07, Andres Freund wrote:
> > Perhaps you could post your current state? I might be able to help resolving
> > some of the problems.
>
> Ok. Here is the state of things. This has proved to be rather more
> intractable than I expected. Almost all the legwork here has been done
> by Amit Langote, for which he deserves both my thanks and considerable
> credit, but I take responsibility for it.
>
> I just discovered today that this scheme is failing under
> "force_parallel_mode = regress". I have as yet no idea if that can be
> fixed simply or not.

The errors Andrew mentions here had to do with a bug of the new
coercion evaluation logic.  The old code in ExecEvalJsonExpr() would
skip coercion evaluation and thus also the sub-transaction associated
with it for some JsonExprs that the new code would not and that didn't
sit well with the invariant that a parallel worker shouldn't try to
start a sub-transaction.

That bug has been fixed in the attached updated version.

> Apart from that I think the main outstanding issue
> is to fill in the gaps in llvm_compile_expr().

About that, I was wondering if the blocks in llvm_compile_expr() need
to be hand-coded to match what's added in ExecInterpExpr() or if I've
missed some tool that can be used instead?

-- 
Thanks, Amit Langote
EDB: http://www.enterprisedb.com


v2-0002-Evaluate-various-JsonExpr-sub-expressions-using-p.patch
Description: Binary data


v2-0003-Use-one-ExprState-to-implement-JsonItemCoercions.patch
Description: Binary data


v2-0001-in-JsonExprState-just-store-a-pointer-to-the-inpu.patch
Description: Binary data


Re: Handle infinite recursion in logical replication setup

2022-07-19 Thread Amit Kapila
On Mon, Jul 18, 2022 at 9:46 PM vignesh C  wrote:
>
> I have updated the patch to handle the origin value case
> insensitively. The attached patch has the changes for the same.
>

Thanks, the patch looks mostly good to me. I have made a few changes
in 0001 patch which are as follows: (a) make a comparison of origin
names in maybe_reread_subscription similar to slot names as in future
we may support origin names other than 'any' and 'none', (b) made
comment changes at few places and minor change in one of the error
message, (c) ran pgindent and slightly changed the commit message.

I am planning to push this day after tomorrow unless there are any
comments/suggestions.

-- 
With Regards,
Amit Kapila.


v35-0001-Allow-uses-to-skip-logical-replication-of-data-h.patch
Description: Binary data


Re: Proposal: allow database-specific role memberships

2022-07-19 Thread Antonin Houska
Kenaniah Cerny  wrote:

> Rebased yet again...
> 
> On Mon, Jul 4, 2022 at 1:17 PM Kenaniah Cerny  wrote:

>  The "unsafe_tests" directory is where the pre-existing role tests were
>  located. According to the readme of the "unsafe_tests" directory, the tests
>  contained within are not run during "make installcheck" because they could
>  have side-effects that seem undesirable for a production installation. This
>  seemed like a reasonable location as the new tests that this patch
>  introduces also modifies the "state" of the database cluster by adding,
>  modifying, and removing roles & databases (including template1).

ok, I missed the purpose of "unsafe_tests" so far, thanks.

>  Regarding roles_is_member_of(), the nuance is that role "A" in your example
>  would only be considered a member of role "B" (and by extension role "C")
>  when connected to the database in which "A" was granted database-specific
>  membership to "B".

>  Conversely, when connected to any other database, "A" would not be 
> considered to be a member of "B".  
> 
>  This patch is designed to solve the scenarios in which one may want to
>  grant constrained access to a broader set of privileges. For example,
>  membership in "pg_read_all_data" effectively grants SELECT and USAGE rights
>  on everything (implicitly cluster-wide in today's implementation). By
>  granting a role membership to "pg_read_all_data" within the context of a
>  specific database, the grantee's read-everything privilege is effectively
>  constrained to just that specific database (as membership within
>  "pg_read_all_data" would not otherwise be held).

ok, I tried to view the problem rather from general perspective. However, the
permissions like "pg_read_all_data" are unusual in that they are rather strong
and at the same time they are usually located at the top of the groups
hierarchy. I've got no better idea how to solve the problem.

A few more comments on the patch:

* It's not clear from the explanation of the GRANT ... IN DATABASE ... / GRANT
  ... IN CURRENT DATABASE ... that, even if "membership in ... will be
  effective only when the recipient is connected to the database ...", the
  ADMIN option might not be "fully effective". I refer to the part of the
  regression tests starting with

-- Ensure database-specific admin option can only grant within that database

  For example, "role_read_34" does have the ADMIN option for the
  "pg_read_all_data" role and for the "db_4" database:

GRANT pg_read_all_data TO role_read_34 IN DATABASE db_4 WITH ADMIN OPTION;

  (in other words, "role_read_34" does have the database-specific membership
  in "pg_read_all_data"), but it cannot use the option (in other words, cannot
  use some ability resulting from that membership) unless the session to that
  database is active:

\connect db_3
SET SESSION AUTHORIZATION role_read_34;
...
GRANT pg_read_all_data TO role_granted IN CURRENT DATABASE; -- success
GRANT pg_read_all_data TO role_granted IN DATABASE db_3; -- notice
NOTICE:  role "role_granted" is already a member of role "pg_read_all_data" 
in database "db_3"
GRANT pg_read_all_data TO role_granted IN DATABASE db_4; -- error
ERROR:  must have admin option on role "pg_read_all_data"


Specifically on the regression tests:

* The function check_memberships() has no parameters - is there a reason 
not to use a view?

* I'm not sure if the pg_auth_members catalog can contain InvalidOid in
  other columns than dbid. Thus I think that the query in
  check_memberships() only needs an outer JOIN for the pg_database table,
  while the other joins can be inner.

* In this part

SET SESSION AUTHORIZATION role_read_12_noinherit;
SELECT * FROM data; -- error
SET ROLE role_read_12; -- error
SELECT * FROM data; -- error

I think you don't need to query the table again if the SET ROLE statement
failed and the same query had been executed before the SET ROLE.


-- 
Antonin Houska
Web: https://www.cybertec-postgresql.com




Re: [PATCH] Introduce array_shuffle() and array_sample()

2022-07-19 Thread Martin Kalcher

Am 18.07.22 um 23:48 schrieb Martin Kalcher:


If we go with (1) array_shuffle() and array_sample() should shuffle each 
element individually and always return a one-dimensional array.


   select array_shuffle('{{1,2},{3,4},{5,6}}');
   ---
    {1,4,3,5,6,2}

   select array_sample('{{1,2},{3,4},{5,6}}', 3);
   --
    {1,4,3}

If we go with (2) both functions should only operate on the first 
dimension and shuffle whole subarrays and keep the dimensions intact.


   select array_shuffle('{{1,2},{3,4},{5,6}}');
   -
    {{3,4},{1,2},{5,6}}

   select array_sample('{{1,2},{3,4},{5,6}}', 2);
   ---
    {{3,4},{1,2}}



Having thought about it, i would go with (2). It gives the user the 
ability to decide wether or not array-of-arrays behavior is desired. If 
he wants the behavior of (1) he can flatten the array before applying 
array_shuffle(). Unfortunately there is no array_flatten() function (at 
the moment) and the user would have to work around it with unnest() and 
array_agg().





Re: Memory leak fix in psql

2022-07-19 Thread Japin Li

On Tue, 19 Jul 2022 at 17:02, tanghy.f...@fujitsu.com  
wrote:
> Hi
>
> I think there is a newly introduced memory leak in your patch d2d3547.
> Try to fix it in the attached patch. 
> Kindly to have a check.
>

Yeah, it leaks, and the patch can fix it.

After looking around, I found psql/describe.c also has some memory leaks,
attached a patch to fix these leaks.

-- 
Regrads,
Japin Li.
ChengDu WenWu Information Technology Co.,Ltd.

>From 3036a0986f8ff490c133930524e2d5f5104249ff Mon Sep 17 00:00:00 2001
From: Japin Li 
Date: Tue, 19 Jul 2022 18:27:25 +0800
Subject: [PATCH v1 1/1] Fix the memory leak in psql describe

---
 src/bin/psql/describe.c | 150 
 1 file changed, 150 insertions(+)

diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 0ce38e4b4c..11a441f52f 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -112,7 +112,10 @@ describeAggregates(const char *pattern, bool verbose, bool showSystem)
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 4;");
 
@@ -182,7 +185,10 @@ describeAccessMethods(const char *pattern, bool verbose)
 NULL, "amname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -244,7 +250,10 @@ describeTablespaces(const char *pattern, bool verbose)
 NULL, "spcname", NULL,
 NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -534,7 +543,10 @@ describeFunctions(const char *functypes, const char *func_pattern,
 "n.nspname", "p.proname", NULL,
 "pg_catalog.pg_function_is_visible(p.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	for (int i = 0; i < num_arg_patterns; i++)
 	{
@@ -561,7 +573,10 @@ describeFunctions(const char *functypes, const char *func_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
+			{
+termPQExpBuffer();
 return false;
+			}
 		}
 		else
 		{
@@ -682,7 +697,10 @@ describeTypes(const char *pattern, bool verbose, bool showSystem)
 "pg_catalog.format_type(t.oid, NULL)",
 "pg_catalog.pg_type_is_visible(t.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -836,7 +854,10 @@ describeOperators(const char *oper_pattern,
 "n.nspname", "o.oprname", NULL,
 "pg_catalog.pg_operator_is_visible(o.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	if (num_arg_patterns == 1)
 		appendPQExpBufferStr(, "  AND o.oprleft = 0\n");
@@ -866,7 +887,10 @@ describeOperators(const char *oper_pattern,
 		true, false,
 		nspname, typname, ft, tiv,
 		NULL, 3))
+			{
+termPQExpBuffer();
 return false;
+			}
 		}
 		else
 		{
@@ -953,7 +977,10 @@ listAllDbs(const char *pattern, bool verbose)
 		if (!validateSQLNamePattern(, pattern, false, false,
 	NULL, "d.datname", NULL, NULL,
 	NULL, 1))
+		{
+			termPQExpBuffer();
 			return false;
+		}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 	res = PSQLexec(buf.data);
@@ -1106,7 +1133,10 @@ permissionsList(const char *pattern)
 "n.nspname", "c.relname", NULL,
 "n.nspname !~ '^pg_' AND pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
@@ -1177,7 +1207,10 @@ listDefaultACLs(const char *pattern)
 "pg_catalog.pg_get_userbyid(d.defaclrole)",
 NULL,
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1, 2, 3;");
 
@@ -1428,7 +1461,10 @@ describeTableDetails(const char *pattern, bool verbose, bool showSystem)
 "n.nspname", "c.relname", NULL,
 "pg_catalog.pg_table_is_visible(c.oid)",
 NULL, 3))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 2, 3;");
 
@@ -3614,7 +3650,10 @@ describeRoles(const char *pattern, bool verbose, bool showSystem)
 	if (!validateSQLNamePattern(, pattern, false, false,
 NULL, "r.rolname", NULL, NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 
 	appendPQExpBufferStr(, "ORDER BY 1;");
 
@@ -3739,11 +3778,17 @@ listDbRoleSettings(const char *pattern, const char *pattern2)
 	  gettext_noop("Settings"));
 	if (!validateSQLNamePattern(, pattern, false, false,
 NULL, "r.rolname", NULL, NULL, , 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 	if (!validateSQLNamePattern(, pattern2, havewhere, false,
 NULL, "d.datname", NULL, NULL,
 NULL, 1))
+	{
+		termPQExpBuffer();
 		return false;
+	}
 	appendPQExpBufferStr(, "ORDER BY 1, 2;");
 
 	res = PSQLexec(buf.data);
@@ -3940,7 

  1   2   >