Re: glibc qsort() vulnerability

2024-02-09 Thread Mats Kindahl
On Fri, Feb 9, 2024 at 9:08 PM Nathan Bossart 
wrote:

> On Fri, Feb 09, 2024 at 08:43:21PM +0100, Mats Kindahl wrote:
> > QQ: right now it looks like this:
> >
> > static inline int
> > pg_cmp_u16(uint16 a, uint16 b)
> > {
> >
> > return (int32)a - (int32)b;
> >
> > }
> >
> >
> > and
> >
> > static inline int
> > pg_cmp_u32(uint32 a, uint32 b)
> > {
> >
> > return (a > b) - (a < b);
> >
> > }
> >
> >
> > I think that is clear enough, but do you want more casts added for the
> > return value as well?
>
> I think that is reasonably clear.  The latter does require you to know that
> < and > return (int) 0 or (int) 1, which might be worth a short comment.
> But that's just nitpicking...
>
>
Hi all,

Split the code into two patches: one that just adds the functions
(including the new pg_cmp_size()) to common/int.h and one that starts using
them. I picked the name "pg_cmp_size" rather than "pg_cmp_size_t" since
"_t" is usually used as a suffix for types.

I added a comment to the (a > b) - (a < b) return and have also added casts
to (int32) for the int16 and uint16 functions (we need a signed int for
uin16 since we need to be able to get a negative number).

Changed the type of two instances that had an implicit cast from size_t to
int and used the new pg_,cmp_size() function.

Also fixed the missed replacements in the "contrib" directory.

Best wishes,
Mats Kindahl


> --
> Nathan Bossart
> Amazon Web Services: https://aws.amazon.com
>
From 84ed91e96f950cdb92952c8248b2c754d3c4a034 Mon Sep 17 00:00:00 2001
From: Mats Kindahl 
Date: Fri, 9 Feb 2024 21:48:27 +0100
Subject: Use integer comparison functions

Use overflow-safe integer functions when implementing
qsort() comparison functions.
---
 contrib/hstore/hstore_gist.c|  3 ++-
 contrib/intarray/_intbig_gist.c |  3 ++-
 contrib/pg_stat_statements/pg_stat_statements.c |  8 ++--
 contrib/pg_trgm/trgm_op.c   |  8 ++--
 src/backend/access/nbtree/nbtinsert.c   |  8 ++--
 src/backend/access/nbtree/nbtpage.c | 10 +++---
 src/backend/access/nbtree/nbtsplitloc.c |  8 ++--
 src/backend/access/spgist/spgdoinsert.c |  5 ++---
 src/backend/access/spgist/spgtextproc.c |  3 ++-
 src/backend/backup/basebackup_incremental.c |  8 ++--
 src/backend/backup/walsummary.c |  7 ++-
 src/backend/catalog/heap.c  |  8 ++--
 src/backend/nodes/list.c| 13 +++--
 src/backend/nodes/tidbitmap.c   |  7 ++-
 src/backend/parser/parse_agg.c  |  7 ---
 src/backend/postmaster/autovacuum.c |  6 ++
 src/backend/replication/logical/reorderbuffer.c |  7 ++-
 src/backend/replication/syncrep.c   |  8 ++--
 src/backend/utils/adt/oid.c |  7 ++-
 src/backend/utils/adt/tsgistidx.c   | 10 +++---
 src/backend/utils/adt/tsquery_gist.c|  6 ++
 src/backend/utils/adt/tsvector.c|  5 ++---
 src/backend/utils/adt/tsvector_op.c |  5 ++---
 src/backend/utils/adt/xid.c |  7 ++-
 src/backend/utils/cache/relcache.c  |  3 ++-
 src/backend/utils/cache/syscache.c  |  5 ++---
 src/backend/utils/cache/typcache.c  |  8 ++--
 src/backend/utils/resowner/resowner.c   | 10 ++
 src/bin/pg_dump/pg_dump_sort.c  |  7 ++-
 src/bin/pg_upgrade/function.c   | 13 +++--
 src/bin/pg_walsummary/pg_walsummary.c   |  8 ++--
 src/bin/psql/crosstabview.c |  3 ++-
 src/include/access/gin_private.h|  8 ++--
 33 files changed, 76 insertions(+), 156 deletions(-)

diff --git a/contrib/hstore/hstore_gist.c b/contrib/hstore/hstore_gist.c
index fe343739eb..1079f25bf7 100644
--- a/contrib/hstore/hstore_gist.c
+++ b/contrib/hstore/hstore_gist.c
@@ -7,6 +7,7 @@
 #include "access/reloptions.h"
 #include "access/stratnum.h"
 #include "catalog/pg_type.h"
+#include "common/int.h"
 #include "hstore.h"
 #include "utils/pg_crc.h"
 
@@ -356,7 +357,7 @@ typedef struct
 static int
 comparecost(const void *a, const void *b)
 {
-	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
+	return pg_cmp_s32(((const SPLITCOST *) a)->cost, ((const SPLITCOST *) b)->cost);
 }
 
 
diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c
index 8c6c4b5d05..aef9e13dcc 100644
--- a/contrib/intarray/_intbig_gist.c
+++ b/contrib/intarray/_intbig_gist.c
@@ -9,6 +9,7 @@
 #include "access/gist.h"
 #include "access/reloptions.h"
 #include "access/stratnum.h"
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 
 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
@@ -312,7 +313,7 @@ typedef struct
 static int
 comparecost(const void *a, const void *b)
 {
-	return ((const SPLITCOST *) 

Re: POC, WIP: OR-clause support for indexes

2024-02-09 Thread jian he
On Thu, Feb 8, 2024 at 1:34 PM Andrei Lepikhov
 wrote:
>
> On 3/2/2024 02:06, Alena Rybakina wrote:
> > On 01.02.2024 08:00, jian he wrote:
> > I added your code to the patch.
> Thanks Alena and Jian for the detailed scrutiny!
>
> A couple of questions:
> 1. As I see, transformAExprIn uses the same logic as we invented but
> allows composite and domain types. Could you add a comment explaining
> why we forbid row types in general, in contrast to the transformAExprIn
> routine?
> 2. Could you provide the tests to check issues covered by the recent (in
> v.15) changes?
>
> Patch 0001-* in the attachment incorporates changes induced by Jian's
> notes from [1].
> Patch 0002-* contains a transformation of the SAOP clause, which allows
> the optimizer to utilize partial indexes if they cover all values in
> this array. Also, it is an answer to Alexander's note [2] on performance
> degradation. This first version may be a bit raw, but I need your
> opinion: Does it resolve the issue?
yes. It resolved the partial index performance degradation issue.
The v16, 0002 extra code overhead is limited.

Here is how I test it.
drop table if exists test;
create table test as (select (random()*100)::int x, (random()*1000) y
from generate_series(1,100) i);
create index test_x_1_y on test (y) where x = 1;
create index test_x_2_y on test (y) where x = 2;
create index test_x_3_y on test (y) where x = 3;
create index test_x_4_y on test (y) where x = 4;
create index test_x_5_y on test (y) where x = 5;
create index test_x_6_y on test (y) where x = 6;
create index test_x_7_y on test (y) where x = 7;
create index test_x_8_y on test (y) where x = 8;
create index test_x_9_y on test (y) where x = 9;
create index test_x_10_y on test (y) where x = 10;

set enable_or_transformation to on;
explain(analyze, costs off)
select * from test
where (x = 1 or x = 2 or x = 3 or x = 4 or x = 5 or x = 6 or x = 7 or
x = 8 or x = 9 or x = 10);

set enable_or_transformation to off;
explain(analyze, costs off)
select * from test
where (x = 1 or x = 2 or x = 3 or x = 4 or x = 5 or x = 6 or x = 7 or
x = 8 or x = 9 or x = 10);



FAILED: src/backend/postgres_lib.a.p/optimizer_path_indxpath.c.o
ccache cc -Isrc/backend/postgres_lib.a.p -Isrc/include
-I../../Desktop/pg_src/src8/postgres/src/include
-I/usr/include/libxml2 -fdiagnostics-color=always --coverage
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O0 -g
-fno-strict-aliasing -fwrapv -fexcess-precision=standard -D_GNU_SOURCE
-Wmissing-prototypes -Wpointer-arith -Werror=vla -Wendif-labels
-Wmissing-format-attribute -Wimplicit-fallthrough=3
-Wcast-function-type -Wshadow=compatible-local -Wformat-security
-Wdeclaration-after-statement -Wno-format-truncation
-Wno-stringop-truncation -Wunused-variable -Wuninitialized
-Werror=maybe-uninitialized -Wreturn-type
-DWRITE_READ_PARSE_PLAN_TREES -DCOPY_PARSE_PLAN_TREES
-DREALLOCATE_BITMAPSETS -DRAW_EXPRESSION_COVERAGE_TEST
-fno-omit-frame-pointer -fPIC -pthread -DBUILDING_DLL -MD -MQ
src/backend/postgres_lib.a.p/optimizer_path_indxpath.c.o -MF
src/backend/postgres_lib.a.p/optimizer_path_indxpath.c.o.d -o
src/backend/postgres_lib.a.p/optimizer_path_indxpath.c.o -c
../../Desktop/pg_src/src8/postgres/src/backend/optimizer/path/indxpath.c
../../Desktop/pg_src/src8/postgres/src/backend/optimizer/path/indxpath.c:
In function ‘build_paths_for_SAOP’:
../../Desktop/pg_src/src8/postgres/src/backend/optimizer/path/indxpath.c:1267:33:
error: declaration of ‘pd’ shadows a previous local
[-Werror=shadow=compatible-local]
 1267 | PredicatesData *pd = (PredicatesData *) lfirst(lc);
  | ^~
../../Desktop/pg_src/src8/postgres/src/backend/optimizer/path/indxpath.c:1235:29:
note: shadowed declaration is here
 1235 | PredicatesData *pd;
  | ^~
cc1: all warnings being treated as errors
[32/126] Compiling C object src/backend/postgres_lib.a.p/utils_adt_ruleutils.c.o
ninja: build stopped: subcommand failed.

+ if (!predicate_implied_by(index->indpred, list_make1(rinfo1), true))
+ elog(ERROR, "Logical mistake in OR <-> ANY transformation code");
the error message seems not clear?
What is a "Logical mistake"?

static List *
build_paths_for_SAOP(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo,
List *other_clauses)
I am not sure what's `other_clauses`, and `rinfo` refers to? adding
some comments would be great.

struct PredicatesData needs some comments, I think.

+bool
+saop_covered_by_predicates(ScalarArrayOpExpr *saop, List *predicate_lists)
+{
+ ListCell   *lc;
+ PredIterInfoData clause_info;
+ bool result = false;
+ bool isConstArray;
+
+ Assert(IsA(saop, ScalarArrayOpExpr));
is this Assert necessary?

For the function build_paths_for_SAOP, I think I understand the first
part of the code.
But I am not 100% sure of the second part of the `foreach(lc,
predicate_lists)` code.
more comments in `foreach(lc, predicate_lists)` would be helpful.

do you need to add `PredicatesData` to 

Re: Popcount optimization using AVX512

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 15:27:57 -0800, Noah Misch wrote:
> On Fri, Feb 09, 2024 at 10:24:32AM -0800, Andres Freund wrote:
> > On 2024-01-26 07:42:33 +0100, Alvaro Herrera wrote:
> > > This suggests that finding a way to make the ifunc stuff work (with good
> > > performance) is critical to this work.
> > 
> > Ifuncs are effectively implemented as a function call via a pointer, they're
> > not magic, unfortunately. The sole trick they provide is that you don't
> > manually have to use the function pointer.
> 
> The IFUNC creators introduced it so glibc could use arch-specific memcpy with
> the instruction sequence of a non-pointer, extern function call, not the
> instruction sequence of a function pointer call.

My understanding is that the ifunc mechanism just avoid the need for repeated
indirect calls/jumps to implement a single function call, not the use of
indirect function calls at all. Calls into shared libraries, like libc, are
indirected via the GOT / PLT, i.e. an indirect function call/jump.  Without
ifuncs, the target of the function call would then have to dispatch to the
resolved function. Ifuncs allow to avoid this repeated dispatch by moving the
dispatch to the dynamic linker stage, modifying the contents of the GOT/PLT to
point to the right function. Thus ifuncs are an optimization when calling a
function in a shared library that's then dispatched depending on the cpu
capabilities.

However, in our case, where the code is in the same binary, function calls
implemented in the main binary directly (possibly via a static library) don't
go through GOT/PLT. In such a case, use of ifuncs turns a normal direct
function call into one going through the GOT/PLT, i.e. makes it indirect. The
same is true for calls within a shared library if either explicit symbol
visibility is used, or -symbolic, -Wl,-Bsymbolic or such is used. Therefore
there's no efficiency gain of ifuncs over a call via function pointer.


This isn't because ifunc is implemented badly or something - the reason for
this is that dynamic relocations aren't typically implemented by patching all
callsites (".text relocations"), which is what you would need to avoid the
need for an indirect call to something that fundamentally cannot be a constant
address at link time. The reason text relocations are disfavored is that
they can make program startup quite slow, that they require allowing
modifications to executable pages which are disliked due to the security
implications, and that they make the code non-shareable, as the in-memory
executable code has to differ from the on-disk code.


I actually think ifuncs within the same binary are a tad *slower* than plain
function pointer calls, unless -fno-plt is used. Without -fno-plt, an ifunc is
called by 1) a direct call into the PLT, 2) loading the target address from
the GOT, 3) making an an indirect jump to that address.  Whereas a "plain
indirect function call" is just 1) load target address from variable 2) making
an indirect jump to that address. With -fno-plt the callsites themselves load
the address from the GOT.



Greetings,

Andres Freund




RE: Synchronizing slots from primary to standby

2024-02-09 Thread Zhijie Hou (Fujitsu)
On Friday, February 9, 2024 4:13 PM Peter Smith  wrote:
> 
> FYI -- I checked patch v81-0001 to find which of the #includes are strictly 
> needed.

Thanks!

> 
> 1.
> ...
> 
> Many of these #includes seem unnecessary. e.g. I was able to remove
> all those that are commented-out below, and the file still compiles OK
> with no warnings:

Removed.

> 
> 
> ==
> src/backend/replication/slot.c
> 
> 
> 
> 2.
>  #include "pgstat.h"
> +#include "replication/slotsync.h"
>  #include "replication/slot.h"
> +#include "replication/walsender.h"
>  #include "storage/fd.h"
> 
> The #include "replication/walsender.h" seems to be unnecessary.

Removed.

> 
> ==
> src/backend/replication/walsender.c
> 
> 3.
>  #include "replication/logical.h"
> +#include "replication/slotsync.h"
>  #include "replication/slot.h"
> 
> The #include "replication/slotsync.h" is needed, but only for Assert code:
> Assert(am_cascading_walsender || IsSyncingReplicationSlots());
> 
> So you could #ifdef around that #include if you wish to.

I am not sure if it's necessary and didn't find similar coding, so
didn't change.

Best Regards,
Hou zj


RE: Synchronizing slots from primary to standby

2024-02-09 Thread Zhijie Hou (Fujitsu)
On Friday, February 9, 2024 12:27 PM Peter Smith  wrote:
> 
> Here are some review comments for patch v81-0001.

Thanks for the comments.

> . GENERAL - ReplicationSlotInvalidationCause enum.
> 
> I was thinking that the ReplicationSlotInvalidationCause should
> explicitly set RS_INVAL_NONE = 0 (it's zero anyway, but making it
> explicit with a comment / Must be zero. /. will stop it from being
> changed in the future).

I think the current code is better, so didn't change this.

> 5. synchronize_slots
> 
> + /*
> + * The primary_slot_name is not set yet or WALs not received yet.
> + * Synchronization is not possible if the walreceiver is not started.
> + */
> + latestWalEnd = GetWalRcvLatestWalEnd();
> + SpinLockAcquire(>mutex);
> + if ((WalRcv->slotname[0] == '\0') ||
> + XLogRecPtrIsInvalid(latestWalEnd))
> + {
> + SpinLockRelease(>mutex);
> + return;
> + }
> + SpinLockRelease(>mutex);
> 
> The comment talks about the GUC "primary_slot_name", but the code is
> checking the WalRcv's slotname. It may be the same, but the difference
> is confusing.

This part has been removed.

> 7.
> + /*
> + * Use shared lock to prevent a conflict with
> + * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
> + * a drop-database operation.
> + */
> + LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
> +
> + synchronize_one_slot(remote_slot, remote_dbid);
> +
> + UnlockSharedObject(DatabaseRelationId, remote_dbid, 0,
> + AccessShareLock);
> 
> IMO remove the blank lines (e.g., you don't use this kind of formatting for 
> spin
> locks)

I am not sure if it will look better, so didn't change this.

Other comments look good.

Best Regards,
Hou zj


RE: Synchronizing slots from primary to standby

2024-02-09 Thread Zhijie Hou (Fujitsu)
On Friday, February 9, 2024 6:45 PM Amit Kapila  wrote:
> 
> On Fri, Feb 9, 2024 at 10:00 AM Zhijie Hou (Fujitsu) 
> wrote:
> >
> 
> Few comments on 0001
> ===
> 1. Shouldn't pg_sync_replication_slots() check whether the user has 
> replication
> privilege?

Yes, added.

> 
> 2. The function declarations in slotsync.h don't seem to be in the same order 
> as
> they are defined in slotsync.c. For example, see ValidateSlotSyncParams(). The
> same is true for new functions exposed via walreceiver.h and walsender.h. 
> Please
> check the patch for other such inconsistencies.

I reordered the function declarations.

> 
> 3.
> +# Wait for the standby to finish sync
> +$standby1->wait_for_log(
> + qr/LOG: ( [A-Z0-9]+:)? newly created slot \"lsub1_slot\" is sync-ready
> +now/,  $offset);
> +
> +$standby1->wait_for_log(
> + qr/LOG: ( [A-Z0-9]+:)? newly created slot \"lsub2_slot\" is sync-ready
> +now/,  $offset);
> +
> +# Confirm that the logical failover slots are created on the standby
> +and are # flagged as 'synced'
> +is($standby1->safe_psql('postgres',
> + q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN
> ('lsub1_slot', 'lsub2_slot') AND synced;}),
> + "t",
> + 'logical slots have synced as true on standby');
> 
> Isn't the last test that queried pg_replication_slots sufficient? I think
> wait_for_log() would be required for slotsync worker or am I missing 
> something?

I think it's not needed in 0001, so removed.

> Apart from the above, I have modified a few comments in the attached.

Thanks, it looks good to me, so applied.

Attach the V83 patch which addressed Peter[1][2], Amit and Sawada-san's[3]
comments. Only 0001 is sent in this version, we will send other patches after
rebasing.

[1] 
https://www.postgresql.org/message-id/CAHut%2BPvW8s6AYD2UD0xadM%2B3VqBkXP2LjD30LEGRkHUa-Szm%2BQ%40mail.gmail.com
[2] 
https://www.postgresql.org/message-id/CAHut%2BPv88vp9mNxX37c_Bc5FDBsTS%2BdhV02Vgip9Wqwh7GBYSg%40mail.gmail.com
[3] 
https://www.postgresql.org/message-id/CAD21AoDvyLu%3D2-mqfGn_T_3jUamR34w%2BsxKvYnVzKqTCpyq_FQ%40mail.gmail.com

Best Regards,
Hou zj


v83-0001-Add-a-slot-synchronization-function.patch
Description: v83-0001-Add-a-slot-synchronization-function.patch


Re: failure in 019_replslot_limit

2024-02-09 Thread Alexander Lakhin

09.02.2024 21:59, Andres Freund wrote:



https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=kestrel=2024-02-04%2001%3A53%3A44
) and saw that it's not checkpointer, but walsender is hanging:

How did you reproduce this?


As kestrel didn't produce this failure until recently, I supposed that the
cause is the same as with subscription/031_column_list — longer test
duration, so I ran this test in parallel (with 20-30 jobs) in a slowed
down VM, so that one successful test duration increased to 100-120 seconds.
And I was lucky enough to catch it within 100 iterations. But now, that we
know what's happening there, I think I could reproduce it much easily,
with some sleep(s) added, if it would be of any interest.


So it's the issue that we wait effectively forever to to send a FATAL. I've
previously proposed that we should not block sending out fatal errors, given
that allows clients to do prevent graceful restarts and a lot of other things.



Yes, I had demonstrated one of those unpleasant things previously too:
https://www.postgresql.org/message-id/91c8860a-a866-71a7-a060-3f07af531295%40gmail.com

Best regards,
Alexander




Re: "ERROR: latch already owned" on gharial

2024-02-09 Thread Soumyadeep Chakraborty
Hey,

Deeply appreciate both your input!

On Thu, Feb 8, 2024 at 4:57 AM Heikki Linnakangas  wrote:
> Hmm, there is a pair of SpinLockAcquire() and SpinLockRelease() in
> ProcKill(), before step 3 can happen. Comment in spin.h about
> SpinLockAcquire/Release:
>
> >  *Load and store operations in calling code are guaranteed not to be
> >  *reordered with respect to these operations, because they include a
> >  *compiler barrier.  (Before PostgreSQL 9.5, callers needed to use a
> >  *volatile qualifier to access data protected by spinlocks.)
>
> That talks about a compiler barrier, though, not a memory barrier. But
> looking at the implementations in s_lock.h, I believe they do act as
> memory barrier, too.
>
> So you might indeed have that problem on 9.4, but AFAICS not on later
> versions.

Yes 9.4 does not have 0709b7ee72e, which I'm assuming you are referring to?

Reading src/backend/storage/lmgr/README.barrier: For x86, to avoid reordering
between a load and a store, we need something that prevents both CPU and
compiler reordering. pg_memory_barrier() fits the bill.

Here we can have pid X's read of latch->owner_pid=Y reordered to precede
pid Y's store of latch->owner_pid = 0. The compiler barrier in S_UNLOCK() will
prevent compiler reordering but not CPU reordering of the above.

#define S_UNLOCK(lock) \
do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
which is equivalent to a:
#define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory")

But maybe both CPU and memory reordering will be prevented by the tas() in
S_LOCK() which does a lock and xchgb?

Is the above acting as BOTH a compiler and CPU barrier, like the lock; addl
stuff in pg_memory_barrier_impl()?

If yes, then the picture would look like this:

Pid Y in DisownLatch(), Pid X in OwnLatch()

Y: LOAD latch->ownerPid
...
Y: STORE latch->ownerPid = 0
...
// returning PGPROC to freeList
Y:S_LOCK(ProcStructLock) <--- tas() prevents X: LOAD latch->ownerPid
from preceding this
...
... < X: LOAD latch->ownerPid can't get here anyway as spinlock is held
...
Y:S_UNLOCK(ProcStructLock)
...
X: S_LOCK(ProcStructLock) // to retrieve PGPROC from freeList
...
X: S_UNLOCK(ProcStructLock)
...
X: LOAD latch->ownerPid

And this issue is not caused due to 9.4 missing 0709b7ee72e, which
changed S_UNLOCK
exclusively.

If no, then we would need the patch that does an explicit pg_memory_barrier()
at the end of DisownLatch() for PG HEAD.

On Thu, Feb 8, 2024 at 1:41 PM Andres Freund  wrote:

> Right.  I wonder if the issue istead could be something similar to what was
> fixed in 8fb13dd6ab5b and more generally in 97550c0711972a. If two procs go
> through proc_exit() for the same process, you can get all kinds of weird
> mixed up resource ownership.  The bug fixed in 8fb13dd6ab5b wouldn't apply,
> but it's pretty easy to introduce similar bugs in other places, so it seems
> quite plausible that greenplum might have done so.  We also did have more
> proc_exit()s in signal handlers in older branches, so it might just be an
> issue that also was present before.

Hmm, the pids X and Y in the example provided upthread don't spawn off any
children (like by calling system()) - they are just regular backends. So its
not possible for them to receive TERM and try to proc_exit() w/ the same
PGPROC. So that is not the issue, I guess?

Regards,
Soumyadeep (VMware)




Re: Small fix on query_id_enabled

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 04:37:23PM +0800, Julien Rouhaud wrote:
> On Fri, Feb 09, 2024 at 03:38:23PM +0900, Yugo NAGATA wrote:
>> Also, I think the name is a bit confusing for the same reason, that is,
>> query_id_enabled may be false even when query id is computed in fact.
>>
>> Actually, this does not matter because we use IsQueryIdEnabled to check
>> if query id is enabled,  instead of referring to a global variable
>> (query_id_enabled or compute_query_id). But, just for making a code a bit
>> more readable, how about renaming this to query_id_required which seems to
>> stand for the meaning more correctly?
> 
> -1 for renaming to avoid breaking extensions that might access it.  We should
> simply document for compute_query_id and query_id_enabled declaration that one
> should instead use IsQueryIdEnabled() if they're interested in whether the 
> core
> queryid are computed or not.

Agreed.  A renaming would involve more pain than gain.  Improving the
comments around how to all that would be good enough, my 2c.
--
Michael


signature.asc
Description: PGP signature


Re: Make COPY format extendable: Extract COPY TO format implementations

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 11:27:05AM -0800, Andres Freund wrote:
> On 2024-02-09 13:21:34 +0900, Michael Paquier wrote:
>> +static void
>> +CopyFromTextInFunc(CopyFromState cstate, Oid atttypid,
>> +   FmgrInfo *finfo, Oid *typioparam)
>> +{
>> +Oid func_oid;
>> +
>> +getTypeInputInfo(atttypid, _oid, typioparam);
>> +fmgr_info(func_oid, finfo);
>> +}
> 
> FWIW, we should really change the copy code to initialize FunctionCallInfoData
> instead of re-initializing that on every call, realy makes a difference
> performance wise.

You mean to initialize once its memory and let the internal routines
call InitFunctionCallInfoData for each attribute.  Sounds like a good
idea, doing that for HEAD before the main patch.  More impact with
more attributes.

>> +/*
>> + * CopyFromTextStart
>> + *
>> + * Start of COPY FROM for text/CSV format.
>> + */
>> +static void
>> +CopyFromTextStart(CopyFromState cstate, TupleDesc tupDesc)
>> +{
>> +AttrNumber  attr_count;
>> +
>> +/*
>> + * If encoding conversion is needed, we need another buffer to hold the
>> + * converted input data.  Otherwise, we can just point input_buf to the
>> + * same buffer as raw_buf.
>> + */
>> +if (cstate->need_transcoding)
>> +{
>> +cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
>> +cstate->input_buf_index = cstate->input_buf_len = 0;
>> +}
>> +else
>> +cstate->input_buf = cstate->raw_buf;
>> +cstate->input_reached_eof = false;
>> +
>> +initStringInfo(>line_buf);
> 
> Seems kinda odd that we have a supposedly extensible API that then stores all
> this stuff in the non-extensible CopyFromState.

That relates to the introduction of the the opaque pointer mentioned 
upthread to point to a per-format structure, where we'd store data
specific to each format.

>> +/* create workspace for CopyReadAttributes results */
>> +attr_count = list_length(cstate->attnumlist);
>> +cstate->max_fields = attr_count;
> 
> Why is this here? This seems like generic code, not text format specific.

We don't care about that for binary.

>> +/*
>> + * CopyFromTextOneRow
>> + *
>> + * Copy one row to a set of `values` and `nulls` for the text and CSV
>> + * formats.
>> + */
> 
> I'm very doubtful it's a good idea to combine text and CSV here. They have
> basically no shared parsing code, so what's the point in sending them through
> one input routine?

The code shared between text and csv involves a path called once per
attribute.  TBH, I am not sure how much of the NULL handling should be
put outside the per-row routine as these options are embedded in the
core options.  So I don't have a better idea on this one than what's
proposed here if we cannot dispatch the routine calls once per
attribute.

>> +/* read raw fields in the next line */
>> +if (!NextCopyFromRawFields(cstate, _strings, ))
>> +return false;
>> +
>> +/* check for overflowing fields */
>> +if (attr_count > 0 && fldct > attr_count)
>> +ereport(ERROR,
>> +(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
>> + errmsg("extra data after last expected 
>> column")));
> 
> It bothers me that we look to be ending up with different error handling
> across the various output formats, particularly if they're ending up in
> extensions. That'll make it harder to evolve this code in the future.

But different formats may have different requirements, including the
number of attributes detected vs expected.  That was not really
nothing me.

>> +if (cstate->opts.csv_mode)
>> +{
> 
> More unfortunate intermingling of multiple formats in a single
> routine.

Similar answer as a few paragraphs above.  Sutou-san was suggesting to
use an internal routine with fixed arguments instead, which would be
enough at the end with some inline instructions?

>> +
>> +if (cstate->defaults[m])
>> +{
>> +/*
>> + * The caller must supply econtext and have switched 
>> into the
>> + * per-tuple memory context in it.
>> + */
>> +Assert(econtext != NULL);
>> +Assert(CurrentMemoryContext == 
>> econtext->ecxt_per_tuple_memory);
>> +
>> +values[m] = ExecEvalExpr(defexprs[m], econtext, 
>> [m]);
>> +}
> 
> I don't think it's good that we end up with this code in different copy
> implementations.

Yeah, still we don't care about that for binary.
--
Michael


signature.asc
Description: PGP signature


Re: Simplify documentation related to Windows builds

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 11:59:21AM -0800, Andres Freund wrote:
> On 2024-02-09 16:22:35 +0900, Michael Paquier wrote:
>> +  Diff is required to run the regression
>> +  tests. On Windows, it may not be available by default.
>> + 
>> +
>> 
>>
> 
> It's not installed by default on a bunch of platforms, including in common
> linux distros. The same is true for bison, flex etc, so I'm not sure it's
> really worth calling this out here.

Removing the second sentence would be OK, I assume.

>> -  
>> -   MIT Kerberos
>> -   
>> -Required for GSSAPI authentication support. MIT Kerberos can be
>> -downloaded from
>> -https://web.mit.edu/Kerberos/dist/index.html;>.
>> -   
>> -  
> 
> Btw, this is the only dependency that I hadn't been able to test on windows
> when I was hacking on the CI stuff last.  I'm not sure it relly still works.

Do you think that [1] would help in that?

[1]: https://community.chocolatey.org/packages/mitkerberos
--
Michael


signature.asc
Description: PGP signature


Re: Simplify documentation related to Windows builds

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 11:56:08AM -0800, Andres Freund wrote:
> One thing I forgot: I found chocolatey to be painfully slow to install. And
> even at runtime, the wrappers it installs cause build time slowdowns too. And
> unnecessary rebuilds with visual studio, because default install paths trigger
> some odd msbuild heuristics.

For a standalone development machine, I've found that OK and found
that rather straight-forward when testing specific patches.  Now
that's just one experience.
--
Michael


signature.asc
Description: PGP signature


Re: Printing backtrace of postgres processes

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 02:27:26PM +0530, Ashutosh Bapat wrote:
> On Fri, Feb 9, 2024 at 2:18 PM Alvaro Herrera  wrote:
>> Hmm, but the backtrace() manpage says
>>
>>•  backtrace() and backtrace_symbols_fd() don't call malloc()  explic‐
>>   itly,  but  they  are part of libgcc, which gets loaded dynamically
>>   when first used.  Dynamic loading usually triggers a call  to  mal‐
>>   loc(3).   If  you  need certain calls to these two functions to not
>>   allocate memory (in signal handlers, for example), you need to make
>>   sure libgcc is loaded beforehand.
>>
>> and the patch ensures that libgcc is loaded by calling a dummy
>> backtrace() at the start of the process.

FWIW, anything I am reading about the matter freaks me out, including
the dlopen() part in all the backends:
https://www.gnu.org/software/libc/manual/html_node/Backtraces.html

So I really question whether it is a good idea to assume if this will
always be safe depending on the version of libgcc dealt with,
increasing the impact area.  Perhaps that's worrying too much, but it
looks like one of these things where we'd better be really careful.

> We defer actual action triggered by a signal till CHECK_FOR_INTERRUPTS
> is called. I understand that we can't do that here since we want to
> capture the backtrace at that moment and can't wait till next CFI. But
> printing the backend can surely wait till next CFI right?

Delaying the call of backtrace() to happen during a CFI() would be
safe, yes, and writing data to stderr would not really be an issue as
at least the data would be sent somewhere.  That's less useful, but
we do that for memory contexts.
--
Michael


signature.asc
Description: PGP signature


Re: Function and Procedure with same signature?

2024-02-09 Thread Tom Lane
"David G. Johnston"  writes:
> On Fri, Feb 9, 2024, 12:05 Deepak M  wrote:
>> Folks, When tried to create a function with the same signature as
>> procedure it fails.

> That seems like a good hint you cannot do it.  Specifically because they
> get defined in the same internal catalog within which names must be unique.

Worth noting perhaps that this is actually required by the SQL
standard: per spec, functions and procedures are both "routines"
and share the same namespace, which is necessary so that commands
like DROP ROUTINE are well-defined.

regards, tom lane




Re: Popcount optimization using AVX512

2024-02-09 Thread Noah Misch
On Fri, Feb 09, 2024 at 10:24:32AM -0800, Andres Freund wrote:
> On 2024-01-26 07:42:33 +0100, Alvaro Herrera wrote:
> > This suggests that finding a way to make the ifunc stuff work (with good
> > performance) is critical to this work.
> 
> Ifuncs are effectively implemented as a function call via a pointer, they're
> not magic, unfortunately. The sole trick they provide is that you don't
> manually have to use the function pointer.

The IFUNC creators introduced it so glibc could use arch-specific memcpy with
the instruction sequence of a non-pointer, extern function call, not the
instruction sequence of a function pointer call.  I don't know why the
upthread ifunc_test.patch benchmark found ifunc performing worse than function
pointers.  However, it would be odd if toolchains have replaced the original
IFUNC with something equivalent to or slower than function pointers.




Re: Function and Procedure with same signature?

2024-02-09 Thread David G. Johnston
On Fri, Feb 9, 2024, 12:05 Deepak M  wrote:

> Hello Hackers,
>

Wrong list, this is for discussions regarding patches.



> Folks, When tried to create a function with the same signature as
> procedure it fails.
>

That seems like a good hint you cannot do it.  Specifically because they
get defined in the same internal catalog within which names must be unique.

David J.


Re: Encoding protection for pgcrypto

2024-02-09 Thread cary huang
The following review has been posted through the commitfest application:
make installcheck-world:  tested, failed
Implements feature:   not tested
Spec compliant:   not tested
Documentation:not tested

Hello

I had a look at your patch, which applies fine to PostgreSQL master. I noticed 
that the new regression tests you have added to test utf-8 encoding fails on my 
setup (make check) with the following diffs:

---
@ -13,6 +13,7 @@
 =ivrD
 -END PGP MESSAGE-
 '), '0123456789abcdefghij'), 'sha1');
+ERROR:  invalid byte sequence for encoding "UTF8": 0x91
 -- Database encoding protection.  Ciphertext source:
 -- printf '\xe0\xe0\xbff' | gpg --batch --passphrase mykey --textmode --armor 
--symmetric
 select pgp_sym_decrypt(dearmor('
@@ -23,5 +24,5 @@
 =QKy4
 -END PGP MESSAGE-
 '), 'mykey', 'debug=1');
+ERROR:  invalid byte sequence for encoding "UTF8": 0xe0 0xe0 0xbf
 \quit
-\endif
---

I am not sure but it seems that you intentionally provide a text input that 
would produce a non-utf-8 compliant decrypted output, which triggers the error 
from within "pg_verifymbstr()" call that you have added in pgp-pgsql.c? Are the 
errors expected in your new test case? If so, then the tests shall pass instead 
because it has caught a invalid encoding in decrypted output.

Generally, I am ok with the extra encoding check after text decryption but do 
not think if it is a good idea to just error out and abort the transaction when 
it detects invalid encoding character. text decryption routines may be used 
quite frequently and users generally do not expect them to abort transaction. 
It may be ok to just give them a warning about invalid character encoding. 

thanks

Cary Huang
Highgo Software - Canada
www.highgo.ca

Re: [PATCH] Add native windows on arm64 support

2024-02-09 Thread Dave Cramer
On Fri, 9 Feb 2024 at 14:36, Andres Freund  wrote:

> Hi,
>
> On 2024-02-09 14:23:46 -0500, Dave Cramer wrote:
> > > interestingly meson test does not produce any error
> > > The buildfarm produces the following error for me:
> > >
> > > -SELECT relname, attname, coltypes, get_columns_length(coltypes)
> > > - FROM check_columns
> > > - WHERE get_columns_length(coltypes) % 8 != 0 OR
> > > -   'name'::regtype::oid = ANY(coltypes);
> > > - relname | attname | coltypes | get_columns_length
> > > --+-+--+
> > > -(0 rows)
> > > -
> > > +server closed the connection unexpectedly
> > > + This probably means the server terminated abnormally
> > > + before or while processing the request.
> > > +connection to server was lost
> > >
> >
> > Actually digging some more, here is the actual error
> >
> > 2024-02-09 13:31:11.008 -05 postmaster[10672] LOG:  server process (PID
> > 11204) was terminated by exception 0xC005
> > 2024-02-09 13:31:11.008 -05 postmaster[10672] DETAIL:  Failed process was
> > running: VACUUM;
> > 2024-02-09 13:31:11.008 -05 postmaster[10672] HINT:  See C include file
> > "ntstatus.h" for a description of the hexadecimal value.
>
> That's something like a segfault.
>
> One suspicion I have is that src/port/pg_crc32c_armv8_choose.c possibly
> doesn't properly support msvc.  It seems to assume that SIGILL can be
> trapped,
> but that IIRC doesn't work on windows.
>
> I'd check if the problem persists if you change
> cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 1)
> to
> cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 0)
>

This results in

FAILED: src/bin/pg_checksums/pg_checksums.exe
src/bin/pg_checksums/pg_checksums.pdb
"link"  /MACHINE:ARM64 /OUT:src/bin/pg_checksums/pg_checksums.exe
src/bin/pg_checksums/pg_checksums.exe.p/win32ver.res
src/bin/pg_checksums/pg_checksums.exe.p/pg_checksums.c.obj "/release"
"/nologo" "/DEBUG" "/PDB:src\bin\pg_checksums\pg_checksums.pdb"
"/INCREMENTAL:NO" "/STACK:4194304" "/NOEXP" "src/fe_utils/libpgfeutils.a"
"src/common/libpgcommon.a" "src/port/libpgport.a" "ws2_32.lib" "ws2_32.lib"
"ws2_32.lib" "ws2_32.lib" "/SUBSYSTEM:CONSOLE" "kernel32.lib" "user32.lib"
"gdi32.lib" "winspool.lib" "shell32.lib" "ole32.lib" "oleaut32.lib"
"uuid.lib" "comdlg32.lib" "advapi32.lib"
libpgcommon.a(controldata_utils.c.obj) : error LNK2001: unresolved external
symbol pg_comp_crc32c


Dave


>
>
> Also, yikes, that's an ugly way of doing hardware detection. Jumping out
> of a
> signal handler into normal code. Brrr.
>
> Greetings,
>
> Andres Freund
>


Re: Add semi-join pushdown to postgres_fdw

2024-02-09 Thread Alexander Korotkov
On Fri, Feb 9, 2024 at 10:08 PM Pavel Luzanov  wrote:
> While playing with this feature I found the following.
>
> Two foreign tables:
> postgres@demo_postgres_fdw(17.0)=# \det aircrafts|seats
>   List of foreign tables
>  Schema |   Table   |   Server
> +---+-
>  public | aircrafts | demo_server
>  public | seats | demo_server
> (2 rows)
>
>
> This query uses optimization:
>
> postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
> FROM aircrafts a
> WHERE a.aircraft_code = '320' AND EXISTS (
>   SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
> );
>   
>QUERY PLAN 
>   >
> -->
>  Foreign Scan
>Output: a.aircraft_code, a.model, a.range
>Relations: (public.aircrafts a) SEMI JOIN (public.seats s)
>Remote SQL: SELECT r1.aircraft_code, r1.model, r1.range FROM 
> bookings.aircrafts r1 WHERE ((r1.aircraft_code = '320')) AND EXISTS (SELECT 
> NULL FROM bookings.seats r2 WHERE ((r2.aircraft_code =>
> (4 rows)
>
>
> But optimization not used for NOT EXISTS:
>
> postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
> FROM aircrafts a
> WHERE a.aircraft_code = '320' AND NOT EXISTS (
>   SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
> );
>QUERY PLAN
> 
>  Nested Loop Anti Join
>Output: a.aircraft_code, a.model, a.range
>->  Foreign Scan on public.aircrafts a
>  Output: a.aircraft_code, a.model, a.range
>  Remote SQL: SELECT aircraft_code, model, range FROM 
> bookings.aircrafts WHERE ((aircraft_code = '320'))
>->  Materialize
>  Output: s.aircraft_code
>  ->  Foreign Scan on public.seats s
>Output: s.aircraft_code
>Remote SQL: SELECT aircraft_code FROM bookings.seats WHERE 
> ((aircraft_code = '320'))
> (10 rows)
>
> Also, optimization not used after deleting first condition (a.aircraft_code = 
> '320'):
>
> postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
> FROM aircrafts a
> WHERE EXISTS (
>   SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
> );
>QUERY PLAN
> 
>  Hash Join
>Output: a.aircraft_code, a.model, a.range
>Inner Unique: true
>Hash Cond: (a.aircraft_code = s.aircraft_code)
>->  Foreign Scan on public.aircrafts a
>  Output: a.aircraft_code, a.model, a.range
>  Remote SQL: SELECT aircraft_code, model, range FROM 
> bookings.aircrafts
>->  Hash
>  Output: s.aircraft_code
>  ->  HashAggregate
>Output: s.aircraft_code
>Group Key: s.aircraft_code
>->  Foreign Scan on public.seats s
>  Output: s.aircraft_code
>  Remote SQL: SELECT aircraft_code FROM bookings.seats
> (15 rows)
>
>
> But the worst thing is that replacing AND with OR causes breaking session and 
> server restart:
>
> postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
> FROM aircrafts a
> WHERE a.aircraft_code = '320' OR EXISTS (
>   SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
> );
> server closed the connection unexpectedly
> This probably means the server terminated abnormally
> before or while processing the request.
> The connection to the server was lost. Attempting reset: Failed.
> The connection to the server was lost. Attempting reset: Failed.

Thank you, Pavel. I'm looking into this.

--
Regards,
Alexander Korotkov




Re: glibc qsort() vulnerability

2024-02-09 Thread Nathan Bossart
On Fri, Feb 09, 2024 at 08:43:21PM +0100, Mats Kindahl wrote:
> QQ: right now it looks like this:
> 
> static inline int
> pg_cmp_u16(uint16 a, uint16 b)
> {
> 
> return (int32)a - (int32)b;
> 
> }
> 
> 
> and
> 
> static inline int
> pg_cmp_u32(uint32 a, uint32 b)
> {
> 
> return (a > b) - (a < b);
> 
> }
> 
> 
> I think that is clear enough, but do you want more casts added for the
> return value as well?

I think that is reasonably clear.  The latter does require you to know that
< and > return (int) 0 or (int) 1, which might be worth a short comment.
But that's just nitpicking...

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Add semi-join pushdown to postgres_fdw

2024-02-09 Thread Pavel Luzanov

Hello,

While playing with this feature I found the following.

Two foreign tables:
postgres@demo_postgres_fdw(17.0)=# \det aircrafts|seats
  List of foreign tables
 Schema |   Table   |   Server
+---+-
 public | aircrafts | demo_server
 public | seats | demo_server
(2 rows)


This query uses optimization:

postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
FROM aircrafts a
WHERE a.aircraft_code = '320' AND EXISTS (
  SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
);
   
  QUERY PLAN   
>
-->
 Foreign Scan
   Output: a.aircraft_code, a.model, a.range
   Relations: (public.aircrafts a) SEMI JOIN (public.seats s)
   Remote SQL: SELECT r1.aircraft_code, r1.model, r1.range FROM bookings.aircrafts 
r1 WHERE ((r1.aircraft_code = '320')) AND EXISTS (SELECT NULL FROM bookings.seats 
r2 WHERE ((r2.aircraft_code =>
(4 rows)


But optimization not used for NOT EXISTS:

postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
FROM aircrafts a
WHERE a.aircraft_code = '320' AND NOT EXISTS (
  SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
);
   QUERY PLAN

 Nested Loop Anti Join
   Output: a.aircraft_code, a.model, a.range
   ->  Foreign Scan on public.aircrafts a
 Output: a.aircraft_code, a.model, a.range
 Remote SQL: SELECT aircraft_code, model, range FROM bookings.aircrafts 
WHERE ((aircraft_code = '320'))
   ->  Materialize
 Output: s.aircraft_code
 ->  Foreign Scan on public.seats s
   Output: s.aircraft_code
   Remote SQL: SELECT aircraft_code FROM bookings.seats WHERE 
((aircraft_code = '320'))
(10 rows)

Also, optimization not used after deleting first condition (a.aircraft_code = 
'320'):

postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
FROM aircrafts a
WHERE EXISTS (
  SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
);
   QUERY PLAN

 Hash Join
   Output: a.aircraft_code, a.model, a.range
   Inner Unique: true
   Hash Cond: (a.aircraft_code = s.aircraft_code)
   ->  Foreign Scan on public.aircrafts a
 Output: a.aircraft_code, a.model, a.range
 Remote SQL: SELECT aircraft_code, model, range FROM bookings.aircrafts
   ->  Hash
 Output: s.aircraft_code
 ->  HashAggregate
   Output: s.aircraft_code
   Group Key: s.aircraft_code
   ->  Foreign Scan on public.seats s
 Output: s.aircraft_code
 Remote SQL: SELECT aircraft_code FROM bookings.seats
(15 rows)


But the worst thing is that replacing AND with OR causes breaking session and 
server restart:

postgres@demo_postgres_fdw(17.0)=# EXPLAIN (costs off, verbose) SELECT *
FROM aircrafts a
WHERE a.aircraft_code = '320' OR EXISTS (
  SELECT * FROM seats s WHERE s.aircraft_code = a.aircraft_code
);
server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
The connection to the server was lost. Attempting reset: Failed.
The connection to the server was lost. Attempting reset: Failed.

--
Pavel Luzanov
Postgres Professional:https://postgrespro.com


Re: glibc qsort() vulnerability

2024-02-09 Thread Andres Freund
On 2024-02-09 14:04:29 -0600, Nathan Bossart wrote:
> On Fri, Feb 09, 2024 at 08:40:47PM +0100, Mats Kindahl wrote:
> > On Fri, Feb 9, 2024 at 5:27 PM Tom Lane  wrote:
> >> We do pretty much assume that "int" is "int32".  But I agree that
> >> assuming anything about the width of size_t is bad.  I think we need
> >> a separate pg_cmp_size() or pg_cmp_size_t().
> > 
> > Do we want to have something similar for "int" as well? It seems to be
> > quite common and even though it usually is an int32, it does not have to be.
> 
> I don't think we need separate functions for int and int32.  As Tom noted,
> we assume they are the same.

+1




Function and Procedure with same signature?

2024-02-09 Thread Deepak M
Hello Hackers,

Folks, When tried to create a function with the same signature as procedure
it fails.

postgres=#  create or replace procedure obj1(char) language plpgsql as $$
 begin select $1; end; $$;
CREATE PROCEDURE
postgres=# create or replace function obj1(char) returns void language sql
as $$ select $1 $$;
ERROR:  cannot change routine kind
DETAIL:  "obj1" is a procedure.

any reason for failures?
Can procedure or function can be defined with the same signature i.e. name
and IN arguments ?
as callable for both is different.?


Re: glibc qsort() vulnerability

2024-02-09 Thread Nathan Bossart
On Fri, Feb 09, 2024 at 08:40:47PM +0100, Mats Kindahl wrote:
> On Fri, Feb 9, 2024 at 5:27 PM Tom Lane  wrote:
>> We do pretty much assume that "int" is "int32".  But I agree that
>> assuming anything about the width of size_t is bad.  I think we need
>> a separate pg_cmp_size() or pg_cmp_size_t().
> 
> Do we want to have something similar for "int" as well? It seems to be
> quite common and even though it usually is an int32, it does not have to be.

I don't think we need separate functions for int and int32.  As Tom noted,
we assume they are the same.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Simplify documentation related to Windows builds

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 16:22:35 +0900, Michael Paquier wrote:
> diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
> index ed5b285a5e..af77352e6e 100644
> --- a/doc/src/sgml/installation.sgml
> +++ b/doc/src/sgml/installation.sgml
> @@ -193,6 +193,17 @@
>example, ICU_CFLAGS=' '.)
>   
>  
> +
> +
> + 
> +  
> +   Diff
> +  
> +
> +  Diff is required to run the regression
> +  tests. On Windows, it may not be available by default.
> + 
> +
> 
>

It's not installed by default on a bunch of platforms, including in common
linux distros. The same is true for bison, flex etc, so I'm not sure it's
really worth calling this out here.


> -  
> -   MIT Kerberos
> -   
> -Required for GSSAPI authentication support. MIT Kerberos can be
> -downloaded from
> -https://web.mit.edu/Kerberos/dist/index.html;>.
> -   
> -  

Btw, this is the only dependency that I hadn't been able to test on windows
when I was hacking on the CI stuff last.  I'm not sure it relly still works.




Greetings,

Andres Freund




Re: Simplify documentation related to Windows builds

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 11:53:35 -0800, Andres Freund wrote:
> > Between MSYS2, mingw-w64 and Chocolatey, there are a lot of options
> > available to users.  So shouldn't we try to recommend only of them,
> > then align the buildfarm and the CI to use one of them?
> 
> I think regardless which of these we use, we should provide a commandline
> invocation to actually install the necessary stuff, and occasionally test
> that, perhaps automatedly.
> 
> One issue with most / all of the above is that that they tend to install only
> optimized libraries, which can cause issues when building a debug environment.
> 
> I've in the past experimented with vcpkg and conan. Both unfortunately, at the
> time at least, didn't install necessary binaries for the compression tools,
> which makes it harder to test.  I had started to work on adding an option for
> the relevant vcpkg packages to install the binaries, but got stuck on some CLA
> stuff (cleared up since), after finding that that required some bugfixes in
> zstd.

One thing I forgot: I found chocolatey to be painfully slow to install. And
even at runtime, the wrappers it installs cause build time slowdowns too. And
unnecessary rebuilds with visual studio, because default install paths trigger
some odd msbuild heuristics.

Greetings,

Andres Freund




Re: Simplify documentation related to Windows builds

2024-02-09 Thread Andres Freund
Hi,

On 2023-12-31 15:13:03 +0900, Michael Paquier wrote:
> Some of my notes:
> - How much does command line editing work on Windows?  When it came to
> VS, I always got the impression that this never worked.  Andres in [2]
> mentioned otherwise because meson makes that easier?

Yea, I made it work. I think the only issue was that the build process is a
bit awkward.


> - ActiveState perl should not be recommended IMO, as being able to use
> a perl binary requires one to *register* into their service for tokens
> that can be used to kick perl sessions, last time I checked.

Indeed, it's far gone.


> Two alternatives:
> -- MinGW perl binary.
> -- strawberry perl (?) and Chocolatey.

My experience with strawberry perl were, um, not encouraging.


> Between MSYS2, mingw-w64 and Chocolatey, there are a lot of options
> available to users.  So shouldn't we try to recommend only of them,
> then align the buildfarm and the CI to use one of them?

I think regardless which of these we use, we should provide a commandline
invocation to actually install the necessary stuff, and occasionally test
that, perhaps automatedly.

One issue with most / all of the above is that that they tend to install only
optimized libraries, which can cause issues when building a debug environment.

I've in the past experimented with vcpkg and conan. Both unfortunately, at the
time at least, didn't install necessary binaries for the compression tools,
which makes it harder to test.  I had started to work on adding an option for
the relevant vcpkg packages to install the binaries, but got stuck on some CLA
stuff (cleared up since), after finding that that required some bugfixes in
zstd.

Greetings,

Andres Freund




Re: glibc qsort() vulnerability

2024-02-09 Thread Mats Kindahl
On Fri, Feb 9, 2024 at 8:26 PM Mats Kindahl  wrote:

> On Fri, Feb 9, 2024 at 5:24 PM Nathan Bossart 
> wrote:
>
>> On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
>> > Here is a new version introducing pg_cmp_s32 and friends and use them
>> > instead of the INT_CMP macro introduced before. It also moves the
>> > definitions to common/int.h and adds that as an include to all locations
>> > using these functions.
>>
>> Thanks for the new version of the patch.
>>
>> > Note that for integers with sizes less than sizeof(int), C standard
>> > conversions will convert the values to "int" before doing the
>> arithmetic,
>> > so no casting is *necessary*. I did not force the 16-bit functions to
>> > return -1 or 1 and have updated the comment accordingly.
>>
>> It might not be necessary, but this is one of those places where I would
>> add casting anyway to make it abundantly clear what we are expecting to
>> happen and why it is safe.
>>
>
> I'll add it.
>
>
>> > The types "int" and "size_t" are treated as s32 and u32 respectively
>> since
>> > that seems to be the case for most of the code, even if strictly not
>> > correct (size_t can be an unsigned long int for some architecture).
>>
>> Why is it safe to do this?
>>
>> > - return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *)
>> b)->cost;
>> > + return INT_CMP(((const SPLITCOST *) a)->cost, ((const SPLITCOST
>> *) b)->cost);
>>
>> The patch still contains several calls to INT_CMP.
>>
>
> I'll fix it.
>
>
>>
>> >
>> +/*
>> > + * Comparison routines for integers
>> > +
>> *
>> > + */
>>
>> I'd suggest separating this part out to a 0001 patch to make it easier to
>> review.  The 0002 patch could take care of converting the existing qsort
>> comparators.
>>
>
> Ok. Will split it out into two patches.
>
>
>>
>> > +static inline int
>> > +pg_cmp_s16(int16 a, int16 b)
>> > +{
>> > + return a - b;
>> > +}
>> > +
>> > +static inline int
>> > +pg_cmp_u16(uint16 a, uint16 b)
>> > +{
>> > + return a - b;
>> > +}
>> > +
>> > +static inline int
>> > +pg_cmp_s32(int32 a, int32 b)
>> > +{
>> > + return (a > b) - (a < b);
>> > +}
>> > +
>> > +static inline int
>> > +pg_cmp_u32(uint32 a, uint32 b)
>> > +{
>> > + return (a > b) - (a < b);
>> > +}
>> > +
>> > +static inline int
>> > +pg_cmp_s64(int64 a, int64 b)
>> > +{
>> > + return (a > b) - (a < b);
>> > +}
>> > +
>> > +static inline int
>> > +pg_cmp_u64(uint64 a, uint64 b)
>> > +{
>> > + return (a > b) - (a < b);
>> > +}
>>
>> As suggested above, IMHO we should be rather liberal with the casting to
>> ensure it is abundantly clear what is happening here.
>>
>
> Ok.
>

QQ: right now it looks like this:

static inline int
pg_cmp_u16(uint16 a, uint16 b)
{

return (int32)a - (int32)b;

}


and

static inline int
pg_cmp_u32(uint32 a, uint32 b)
{

return (a > b) - (a < b);

}


I think that is clear enough, but do you want more casts added for the
return value as well?

Best wishes,
Mats Kindahl


>
>
>>
>> --
>> Nathan Bossart
>> Amazon Web Services: https://aws.amazon.com
>>
>


Re: glibc qsort() vulnerability

2024-02-09 Thread Mats Kindahl
On Fri, Feb 9, 2024 at 5:27 PM Tom Lane  wrote:

> Nathan Bossart  writes:
> > On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
> >> The types "int" and "size_t" are treated as s32 and u32 respectively
> since
> >> that seems to be the case for most of the code, even if strictly not
> >> correct (size_t can be an unsigned long int for some architecture).
>
> > Why is it safe to do this?
>
> We do pretty much assume that "int" is "int32".  But I agree that
> assuming anything about the width of size_t is bad.  I think we need
> a separate pg_cmp_size() or pg_cmp_size_t().
>

Do we want to have something similar for "int" as well? It seems to be
quite common and even though it usually is an int32, it does not have to be.

Best wishes,
Mats Kindahl

>
> regards, tom lane
>


Re: [PATCH] Add native windows on arm64 support

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 14:23:46 -0500, Dave Cramer wrote:
> > interestingly meson test does not produce any error
> > The buildfarm produces the following error for me:
> >
> > -SELECT relname, attname, coltypes, get_columns_length(coltypes)
> > - FROM check_columns
> > - WHERE get_columns_length(coltypes) % 8 != 0 OR
> > -   'name'::regtype::oid = ANY(coltypes);
> > - relname | attname | coltypes | get_columns_length
> > --+-+--+
> > -(0 rows)
> > -
> > +server closed the connection unexpectedly
> > + This probably means the server terminated abnormally
> > + before or while processing the request.
> > +connection to server was lost
> >
> 
> Actually digging some more, here is the actual error
> 
> 2024-02-09 13:31:11.008 -05 postmaster[10672] LOG:  server process (PID
> 11204) was terminated by exception 0xC005
> 2024-02-09 13:31:11.008 -05 postmaster[10672] DETAIL:  Failed process was
> running: VACUUM;
> 2024-02-09 13:31:11.008 -05 postmaster[10672] HINT:  See C include file
> "ntstatus.h" for a description of the hexadecimal value.

That's something like a segfault.

One suspicion I have is that src/port/pg_crc32c_armv8_choose.c possibly
doesn't properly support msvc.  It seems to assume that SIGILL can be trapped,
but that IIRC doesn't work on windows.

I'd check if the problem persists if you change
cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 1)
to
cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 0)


Also, yikes, that's an ugly way of doing hardware detection. Jumping out of a
signal handler into normal code. Brrr.

Greetings,

Andres Freund




Re: glibc qsort() vulnerability

2024-02-09 Thread Mats Kindahl
On Fri, Feb 9, 2024 at 5:27 PM Tom Lane  wrote:

> Nathan Bossart  writes:
> > On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
> >> The types "int" and "size_t" are treated as s32 and u32 respectively
> since
> >> that seems to be the case for most of the code, even if strictly not
> >> correct (size_t can be an unsigned long int for some architecture).
>
> > Why is it safe to do this?
>
> We do pretty much assume that "int" is "int32".  But I agree that
> assuming anything about the width of size_t is bad.  I think we need
> a separate pg_cmp_size() or pg_cmp_size_t().
>

I added precisely one first, but removed it when I saw that all uses
assumed that it was an int. :)

I'll add it back.

Best wishes,
Mats Kindahl

>
> regards, tom lane
>


Re: Make COPY format extendable: Extract COPY TO format implementations

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 13:21:34 +0900, Michael Paquier wrote:
> +static void
> +CopyFromTextInFunc(CopyFromState cstate, Oid atttypid,
> +FmgrInfo *finfo, Oid *typioparam)
> +{
> + Oid func_oid;
> +
> + getTypeInputInfo(atttypid, _oid, typioparam);
> + fmgr_info(func_oid, finfo);
> +}

FWIW, we should really change the copy code to initialize FunctionCallInfoData
instead of re-initializing that on every call, realy makes a difference
performance wise.


> +/*
> + * CopyFromTextStart
> + *
> + * Start of COPY FROM for text/CSV format.
> + */
> +static void
> +CopyFromTextStart(CopyFromState cstate, TupleDesc tupDesc)
> +{
> + AttrNumber  attr_count;
> +
> + /*
> +  * If encoding conversion is needed, we need another buffer to hold the
> +  * converted input data.  Otherwise, we can just point input_buf to the
> +  * same buffer as raw_buf.
> +  */
> + if (cstate->need_transcoding)
> + {
> + cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
> + cstate->input_buf_index = cstate->input_buf_len = 0;
> + }
> + else
> + cstate->input_buf = cstate->raw_buf;
> + cstate->input_reached_eof = false;
> +
> + initStringInfo(>line_buf);

Seems kinda odd that we have a supposedly extensible API that then stores all
this stuff in the non-extensible CopyFromState.


> + /* create workspace for CopyReadAttributes results */
> + attr_count = list_length(cstate->attnumlist);
> + cstate->max_fields = attr_count;

Why is this here? This seems like generic code, not text format specific.


> + cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
> + /* Set read attribute callback */
> + if (cstate->opts.csv_mode)
> + cstate->copy_read_attributes = CopyReadAttributesCSV;
> + else
> + cstate->copy_read_attributes = CopyReadAttributesText;
> +}

Isn't this precisely repeating the mistake of 2889fd23be56?

And, why is this done here? Shouldn't this decision have been made prior to
even calling CopyFromTextStart()?

> +/*
> + * CopyFromTextOneRow
> + *
> + * Copy one row to a set of `values` and `nulls` for the text and CSV
> + * formats.
> + */

I'm very doubtful it's a good idea to combine text and CSV here. They have
basically no shared parsing code, so what's the point in sending them through
one input routine?


> +bool
> +CopyFromTextOneRow(CopyFromState cstate,
> +ExprContext *econtext,
> +Datum *values,
> +bool *nulls)
> +{
> + TupleDesc   tupDesc;
> + AttrNumber  attr_count;
> + FmgrInfo   *in_functions = cstate->in_functions;
> + Oid*typioparams = cstate->typioparams;
> + ExprState **defexprs = cstate->defexprs;
> + char  **field_strings;
> + ListCell   *cur;
> + int fldct;
> + int fieldno;
> + char   *string;
> +
> + tupDesc = RelationGetDescr(cstate->rel);
> + attr_count = list_length(cstate->attnumlist);
> +
> + /* read raw fields in the next line */
> + if (!NextCopyFromRawFields(cstate, _strings, ))
> + return false;
> +
> + /* check for overflowing fields */
> + if (attr_count > 0 && fldct > attr_count)
> + ereport(ERROR,
> + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
> +  errmsg("extra data after last expected 
> column")));

It bothers me that we look to be ending up with different error handling
across the various output formats, particularly if they're ending up in
extensions. That'll make it harder to evolve this code in the future.


> + fieldno = 0;
> +
> + /* Loop to read the user attributes on the line. */
> + foreach(cur, cstate->attnumlist)
> + {
> + int attnum = lfirst_int(cur);
> + int m = attnum - 1;
> + Form_pg_attribute att = TupleDescAttr(tupDesc, m);
> +
> + if (fieldno >= fldct)
> + ereport(ERROR,
> + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
> +  errmsg("missing data for column 
> \"%s\"",
> + 
> NameStr(att->attname;
> + string = field_strings[fieldno++];
> +
> + if (cstate->convert_select_flags &&
> + !cstate->convert_select_flags[m])
> + {
> + /* ignore input field, leaving column as NULL */
> + continue;
> + }
> +
> + cstate->cur_attname = NameStr(att->attname);
> + cstate->cur_attval = string;
> +
> + if (cstate->opts.csv_mode)
> + {

More unfortunate intermingling of 

Re: glibc qsort() vulnerability

2024-02-09 Thread Mats Kindahl
On Fri, Feb 9, 2024 at 5:24 PM Nathan Bossart 
wrote:

> On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
> > Here is a new version introducing pg_cmp_s32 and friends and use them
> > instead of the INT_CMP macro introduced before. It also moves the
> > definitions to common/int.h and adds that as an include to all locations
> > using these functions.
>
> Thanks for the new version of the patch.
>
> > Note that for integers with sizes less than sizeof(int), C standard
> > conversions will convert the values to "int" before doing the arithmetic,
> > so no casting is *necessary*. I did not force the 16-bit functions to
> > return -1 or 1 and have updated the comment accordingly.
>
> It might not be necessary, but this is one of those places where I would
> add casting anyway to make it abundantly clear what we are expecting to
> happen and why it is safe.
>

I'll add it.


> > The types "int" and "size_t" are treated as s32 and u32 respectively
> since
> > that seems to be the case for most of the code, even if strictly not
> > correct (size_t can be an unsigned long int for some architecture).
>
> Why is it safe to do this?
>
> > - return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *)
> b)->cost;
> > + return INT_CMP(((const SPLITCOST *) a)->cost, ((const SPLITCOST *)
> b)->cost);
>
> The patch still contains several calls to INT_CMP.
>

I'll fix it.


>
> >
> +/*
> > + * Comparison routines for integers
> > +
> *
> > + */
>
> I'd suggest separating this part out to a 0001 patch to make it easier to
> review.  The 0002 patch could take care of converting the existing qsort
> comparators.
>

Ok. Will split it out into two patches.


>
> > +static inline int
> > +pg_cmp_s16(int16 a, int16 b)
> > +{
> > + return a - b;
> > +}
> > +
> > +static inline int
> > +pg_cmp_u16(uint16 a, uint16 b)
> > +{
> > + return a - b;
> > +}
> > +
> > +static inline int
> > +pg_cmp_s32(int32 a, int32 b)
> > +{
> > + return (a > b) - (a < b);
> > +}
> > +
> > +static inline int
> > +pg_cmp_u32(uint32 a, uint32 b)
> > +{
> > + return (a > b) - (a < b);
> > +}
> > +
> > +static inline int
> > +pg_cmp_s64(int64 a, int64 b)
> > +{
> > + return (a > b) - (a < b);
> > +}
> > +
> > +static inline int
> > +pg_cmp_u64(uint64 a, uint64 b)
> > +{
> > + return (a > b) - (a < b);
> > +}
>
> As suggested above, IMHO we should be rather liberal with the casting to
> ensure it is abundantly clear what is happening here.
>

Ok.


>
> --
> Nathan Bossart
> Amazon Web Services: https://aws.amazon.com
>


Re: glibc qsort() vulnerability

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-10 00:02:08 +0500, Andrey Borodin wrote:
> > Not really in this case. The call is perfectly predictable - a single 
> > qsort()
> > will use the same callback for every comparison, whereas the if is perfectly
> > *unpredictable*.  A branch mispredict is far more expensive than a correctly
> > predicted function call.
> 
> Oh, make sense... I did not understand that. But does cpu predicts what
> instruction to fetch even after a call instruction?

Yes, it does predict that. Both for branches and calls (which are just special
kinds of branches in the end). If you want to find more about this, the term
to search for is "branch target buffer".  There's also predictions about where
a function return will jump to, since that obviously can differ.

Modern predictors aren't just taking the instruction pointer into account, to
predict where a jump/call will go to. Tey take the history of recent branches
into account, i.e. the same instruction will be predicted to jump to different
locations, depending on where the current function was called from. This is
important as a function obviously can behave very differently depending on the
input.


> These cpus are really neat things...

Indeed.

Greetings,

Andres Freund




Re: [PATCH] Add native windows on arm64 support

2024-02-09 Thread Dave Cramer
Dave Cramer
www.postgres.rocks


On Fri, 9 Feb 2024 at 07:18, Dave Cramer  wrote:

>
>
>
>
> On Fri, 9 Feb 2024 at 00:26, Michael Paquier  wrote:
>
>> On Tue, Feb 06, 2024 at 07:01:49AM -0500, Dave Cramer wrote:
>> > Thanks, this patch works and
>> > testing with meson passes.
>>
>> Only with the version posted at [1]?  Interesting, that's the same
>> contents as v8 posted upthread, minus src/tools/ because we don't need
>> to care about them anymore.
>>
>> Andrew, what's happening on the test side?  It does not seem you've
>> mentioned any details about what is going wrong, or I have just missed
>> them.
>>
>> > I'll try the buildfarm next.
>>
>> [1]:
>> https://www.postgresql.org/message-id/ea42654a-3dc4-98b0-335b-56b7ec5e5...@dunslane.net
>
>
> interestingly meson test does not produce any error
> The buildfarm produces the following error for me:
>
> -SELECT relname, attname, coltypes, get_columns_length(coltypes)
> - FROM check_columns
> - WHERE get_columns_length(coltypes) % 8 != 0 OR
> -   'name'::regtype::oid = ANY(coltypes);
> - relname | attname | coltypes | get_columns_length
> --+-+--+
> -(0 rows)
> -
> +server closed the connection unexpectedly
> + This probably means the server terminated abnormally
> + before or while processing the request.
> +connection to server was lost
>

Actually digging some more, here is the actual error

2024-02-09 13:31:11.008 -05 postmaster[10672] LOG:  server process (PID
11204) was terminated by exception 0xC005
2024-02-09 13:31:11.008 -05 postmaster[10672] DETAIL:  Failed process was
running: VACUUM;
2024-02-09 13:31:11.008 -05 postmaster[10672] HINT:  See C include file
"ntstatus.h" for a description of the hexadecimal value.
2024-02-09 13:31:11.008 -05 postmaster[10672] LOG:  terminating any other
active server processes
2024-02-09 13:31:11.013 -05 postmaster[10672] LOG:  all server processes
terminated; reinitializing
2024-02-09 13:31:11.034 -05 startup[6152] LOG:  database system was
interrupted; last known up at 2024-02-09 13:31:01 -05


Dave

>
> Dave
>


Re: GUC-ify walsender MAX_SEND_SIZE constant

2024-02-09 Thread Andres Freund
Hi,

On 2024-01-19 23:04:50 +0330, Majid Garoosi wrote:
> Following is the description which is also written in the commit message:
> MAX_SEND_SIZE parameter was used in WALSender to limit maximum size of
> a WAL data packet sent to a WALReceiver during replication. Although
> its default value (128kB) was a reasonable value, it was written in
> 2010. Since the hardwares have changed a lot since then, a PostgreSQL
> user might need to customize this value.
> For example, if a database's disk has a high bandwidth and a high
> latency at the same time, it makes more sense to read bigger chunks of
> data from disk in each iteration. One example of such disks is a remote
> disk. (e.g. an RBD volume)

The way we read the WAL data is perfectly prefetchable by the the OS, so I
wouldn't really expect gains here.  Have you actually been able to see a
performance benefit by increasing MAX_SEND_SIZE?

I don't think we should add configuration parameters without at least some
demonstration of practical gains, otherwise we'll end up with hundreds of
never-useful config options.

Greetings,

Andres Freund




Re: glibc qsort() vulnerability

2024-02-09 Thread Andrey Borodin



> On 9 Feb 2024, at 21:32, Nathan Bossart  wrote:
>  a lot
> of current use-cases require inspecting specific fields of structs
Yes, I'm proposing to pass to sorting routine not a comparator, but value 
extractor. And then rely on operators <,>,==.
In a pseudocode: instead of sort(array, (a,b)->a.field-b.field) use sort(array, 
x->x.field). And rely on "field" being comparable.

> If that can be made simple and elegant and
> demonstrates substantial improvements
I'll try to produce a PoC and measure it with Andres' intarray test.

> On 9 Feb 2024, at 23:40, Andres Freund  wrote:
> 
> We have some infrastructure for that actually, see sort_template.h.  But
> perhaps we should define a static inline of the generic pg_qsort() even. OTOH,
> plenty places that'll just end up to a pointless amount of code emitted to
> sort ~5 elements on average.
I think there might be another benefit. It's easier to think about values order 
than function comparator that returns -1,0,+1...

>> I bet “call" is more expensive than “if".
> 
> Not really in this case. The call is perfectly predictable - a single qsort()
> will use the same callback for every comparison, whereas the if is perfectly
> *unpredictable*.  A branch mispredict is far more expensive than a correctly
> predicted function call.

Oh, make sense... I did not understand that. But does cpu predicts what 
instruction to fetch even after a call instruction? These cpus are really neat 
things... so, probably, yes, it does.


Best regards, Andrey Borodin.



Re: failure in 019_replslot_limit

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 18:00:01 +0300, Alexander Lakhin wrote:
> I've managed to reproduce this issue (which still persists:
> https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=kestrel=2024-02-04%2001%3A53%3A44
> ) and saw that it's not checkpointer, but walsender is hanging:

How did you reproduce this?



> And I see the walsender process still running (I've increased the timeout
> to keep the test running and to connect to the process in question), with
> the following stack trace:
> #0  0x7fe4feac3d16 in epoll_wait (epfd=5, events=0x55b279b70f38,
> maxevents=1, timeout=timeout@entry=-1) at
> ../sysdeps/unix/sysv/linux/epoll_wait.c:30
> #1  0x55b278b9ab32 in WaitEventSetWaitBlock
> (set=set@entry=0x55b279b70eb8, cur_timeout=cur_timeout@entry=-1,
> occurred_events=occurred_events@entry=0x7ffda5ffac90,
> nevents=nevents@entry=1) at latch.c:1571
> #2  0x55b278b9b6b6 in WaitEventSetWait (set=0x55b279b70eb8,
> timeout=timeout@entry=-1,
> occurred_events=occurred_events@entry=0x7ffda5ffac90,
> nevents=nevents@entry=1, wait_event_info=wait_event_info@entry=100663297) at
> latch.c:1517
> #3  0x55b278a3f11f in secure_write (port=0x55b279b65aa0,
> ptr=ptr@entry=0x55b279bfbd08, len=len@entry=21470) at be-secure.c:296
> #4  0x55b278a460dc in internal_flush () at pqcomm.c:1356
> #5  0x55b278a461d4 in internal_putbytes (s=s@entry=0x7ffda5ffad3c 
> "E\177", len=len@entry=1) at pqcomm.c:1302

So it's the issue that we wait effectively forever to to send a FATAL. I've
previously proposed that we should not block sending out fatal errors, given
that allows clients to do prevent graceful restarts and a lot of other things.

Greetings,

Andres Freund




Re: POC: Extension for adding distributed tracing - pg_tracing

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 19:46:58 +0300, Nikita Malakhov wrote:
> I agree with Heikki on most topics and especially the one he recommended
> to publish your extension on GitHub, this tool is very promising for highly
> loaded
> systems, you will get a lot of feedback very soon.
> 
> I'm curious about SpinLock - it is recommended for very short operations,
> taking up to several instructions, and docs say that for longer ones it
> will be
> too expensive, and recommends using LWLock. Why have you chosen SpinLock?
> Does it have some benefits here?

Indeed - e.g. end_tracing() looks to hold the spinlock for far too long for
spinlocks to be appropriate. Use an lwlock.

Random stuff I noticed while skimming:
- pg_tracing.c should include postgres.h as the first thing

- afaict none of the use of volatile is required, spinlocks have been barriers
  for a long time now

- you acquire the spinlock for single increments of n_writers, perhaps that
  could become an atomic, to reduce contention?

- I don't think it's a good idea to do memory allocations in the middle of a
  PG_CATCH. If the error was due to out-of-memory, you'll throw another error.


Greetings,

Andres Freund




Re: glibc qsort() vulnerability

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 13:19:49 +0500, Andrey M. Borodin wrote:
> > On 8 Feb 2024, at 06:52, Nathan Bossart  wrote:
> > For the same compASC() test, I see an ~8.4% improvement with your int64
> > code and a ~3.4% improvement with this:
>
> If we care about branch prediction in comparison function, maybe we could
> produce sorting that inlines comparator, thus eliminating function call to
> comparator? We convert comparison logic to int, to extract comparison back
> then.

We have some infrastructure for that actually, see sort_template.h.  But
perhaps we should define a static inline of the generic pg_qsort() even. OTOH,
plenty places that'll just end up to a pointless amount of code emitted to
sort ~5 elements on average.


> I bet “call" is more expensive than “if".

Not really in this case. The call is perfectly predictable - a single qsort()
will use the same callback for every comparison, whereas the if is perfectly
*unpredictable*.  A branch mispredict is far more expensive than a correctly
predicted function call.

Greetings,

Andres




Re: Popcount optimization using AVX512

2024-02-09 Thread Andres Freund
Hi,

On 2024-02-09 17:39:46 +, Amonson, Paul D wrote:

> diff --git a/meson.build b/meson.build
> index 8ed51b6aae..1e7a4dc942 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1773,6 +1773,45 @@ elif cc.links('''
>  endif
>  
>  
> +# XXX: The configure.ac check for __cpuidex() is broken, we don't copy that
> +# here. To prevent problems due to two detection methods working, stop
> +# checking after one.

This seems like a bogus copy-paste.


> +if cc.links('''
> +#include 
> +int main(int arg, char **argv)
> +{
> +unsigned int exx[4] = {0, 0, 0, 0};
> +__get_cpuid_count(7, 0, [0], [1], [2], [3]);
> +}
> +''', name: '__get_cpuid_count',
> +args: test_c_args)
> +  cdata.set('HAVE__GET_CPUID_COUNT', 1)
> +elif cc.links('''
> +#include 
> +int main(int arg, char **argv)
> +{
> +unsigned int exx[4] = {0, 0, 0, 0};
> +__cpuidex(exx, 7, 0);
> +}
> +''', name: '__cpuidex',
> +args: test_c_args)
> +  cdata.set('HAVE__CPUIDEX', 1)
> +endif
> +
> +
> +# Check for header immintrin.h
> +if cc.links('''
> +#include 
> +int main(int arg, char **argv)
> +{
> +  return 1701;
> +}
> +''', name: '__immintrin',
> +args: test_c_args)
> +  cdata.set('HAVE__IMMINTRIN', 1)
> +endif

Do these all actually have to link?  Invoking the linker is slow.

I think you might be able to just use cc.has_header_symbol().



> +###
> +# AVX 512 POPCNT Intrinsic check
> +###
> +have_avx512_popcnt = false
> +cflags_avx512_popcnt = []
> +if host_cpu == 'x86_64'
> +  prog = '''
> +  #include 
> +  #include 
> +  void main(void)
> +  {
> +__m512i tmp __attribute__((aligned(64)));
> +__m512i input = _mm512_setzero_si512();
> +__m512i output = _mm512_popcnt_epi64(input);
> +uint64_t cnt = 999;
> +_mm512_store_si512(, output);
> +cnt = _mm512_reduce_add_epi64(tmp);
> +/* return computed value, to prevent the above being optimized away 
> */
> +return cnt == 0;
> +  }'''

Does this work with msvc?


> +if cc.links(prog, name: '_mm512_setzero_si512, _mm512_popcnt_epi64, 
> _mm512_store_si512, and _mm512_reduce_add_epi64 with -mavx512vpopcntdq 
> -mavx512f',

That's a very long line in the output, how about using the avx feature name or
something?



> diff --git a/src/port/Makefile b/src/port/Makefile
> index dcc8737e68..6a01a7d89a 100644
> --- a/src/port/Makefile
> +++ b/src/port/Makefile
> @@ -87,6 +87,11 @@ pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_CRC)
>  pg_crc32c_sse42_shlib.o: CFLAGS+=$(CFLAGS_CRC)
>  pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_CRC)
>  
> +# Newer Intel processors can use AVX-512 POPCNT Capabilities (01/30/2024)
> +pg_bitutils.o: CFLAGS+=$(CFLAGS_AVX512_POPCNT)
> +pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_AVX512_POPCNT)
> +pg_bitutils_srv.o:CFLAGS+=$(CFLAGS_AVX512_POPCNT)
> +
>  # all versions of pg_crc32c_armv8.o need CFLAGS_CRC
>  pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
>  pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
> diff --git a/src/port/meson.build b/src/port/meson.build
> index 69b30ab21b..1c48a3b07e 100644
> --- a/src/port/meson.build
> +++ b/src/port/meson.build
> @@ -184,6 +184,7 @@ foreach name, opts : pgport_variants
>link_with: cflag_libs,
>c_pch: pch_c_h,
>kwargs: opts + {
> +'c_args': opts.get('c_args', []) + cflags_avx512_popcnt,
>  'dependencies': opts['dependencies'] + [ssl],
>}
>  )

This will build all of pgport with the avx flags, which wouldn't be correct, I
think? The compiler might inject automatic uses of avx512 in places, which
would cause problems, no?

While you don't do the same for make, isn't even just using the avx512 for all
of pg_bitutils.c broken for exactly that reson? That's why the existing code
builds the files for various crc variants as their own file.


Greetings,

Andres Freund




Re: Popcount optimization using AVX512

2024-02-09 Thread Andres Freund
Hi,

On 2024-01-26 07:42:33 +0100, Alvaro Herrera wrote:
> This suggests that finding a way to make the ifunc stuff work (with good
> performance) is critical to this work.

Ifuncs are effectively implemented as a function call via a pointer, they're
not magic, unfortunately. The sole trick they provide is that you don't
manually have to use the function pointer.

Greetings,

Andres




RE: Popcount optimization using AVX512

2024-02-09 Thread Amonson, Paul D
Álvaro,

All feedback is now completed. I added the additional checks for the new APIs 
and a separate check for the header to autoconf.

About the double check for AVX 512 I added a large comment explaining why both 
are needed. There are cases where the CPU ZMM# registers are not exposed by the 
OS or hypervisor even if the CPU supports AVX512.

The big change is adding all old and new build support to meson. I am new to 
meson/ninja so please review carefully.

Thanks,
Paul

-Original Message-
From: Alvaro Herrera  
Sent: Wednesday, February 7, 2024 2:13 AM
To: Amonson, Paul D 
Cc: Shankaran, Akash ; Nathan Bossart 
; Noah Misch ; Tom Lane 
; Matthias van de Meent ; 
pgsql-hackers@lists.postgresql.org
Subject: Re: Popcount optimization using AVX512

Hello,

This looks quite reasonable.  On my machine, I get the compiler test to pass so 
I get a "yes" in configure; but of course my CPU doesn't support the 
instructions so I get the slow variant.  So here's the patch again with some 
minor artifacts fixed.

I have the following review notes:

1. we use __get_cpuid_count and __cpuidex by relying on macros HAVE__GET_CPUID 
and HAVE__CPUID respectively; but those macros are (in the current Postgres 
source) only used and tested for __get_cpuid and __cpuid respectively.  So 
unless there's some reason to be certain that __get_cpuid_count is always 
present when __get_cpuid is present, and that __cpuidex is present when __cpuid 
is present, I think we need to add new configure tests and new HAVE_ macros for 
these.

2. we rely on  being present with no AC_CHECK_HEADER() test.  We 
currently don't use this header anywhere, so I suppose we need a test for this 
one as well.  (Also, I suppose if we don't have immintrin.h we can skip the 
rest of it?)

3. We do the __get_cpuid_count/__cpuidex test and we also do a xgetbv test.  
The comment there claims that this is to check the results for consistency.  
But ... how would we know that the results are ever inconsistent?  As far as I 
understand, if they were, we would silently become slower.  Is this really what 
we want?  I'm confused about this coding.  Maybe we do need both tests to 
succeed?  In that case, just reword the comment.

I think if both tests are each considered reliable on its own, then we could 
either choose one of them and stick with it, ignoring the other; or we could 
use one as primary and then in a USE_ASSERT_CHECKING block verify that the 
other matches and throw a WARNING if not (but what would that tell us?).  Or 
something like that ... not sure.

4. It needs meson support, which I suppose consists of copying the
c-compiler.m4 test into meson.build, mimicking what the tests for CRC 
instructions do.


I started a CI run with this patch applied,
https://cirrus-ci.com/build/4912499619790848
but because Meson support is missing, the compile failed
immediately:

[10:08:48.825] ccache cc -Isrc/port/libpgport_srv.a.p -Isrc/include 
-I../src/include -Isrc/include/utils -fdiagnostics-color=always -pipe 
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -g -fno-strict-aliasing -fwrapv 
-fexcess-precision=standard -D_GNU_SOURCE -Wmissing-prototypes -Wpointer-arith 
-Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 
-Wcast-function-type -Wshadow=compatible-local -Wformat-security 
-Wdeclaration-after-statement -Wno-format-truncation -Wno-stringop-truncation 
-fPIC -pthread -DBUILDING_DLL -MD -MQ 
src/port/libpgport_srv.a.p/pg_bitutils.c.o -MF 
src/port/libpgport_srv.a.p/pg_bitutils.c.o.d -o 
src/port/libpgport_srv.a.p/pg_bitutils.c.o -c ../src/port/pg_bitutils.c 
[10:08:48.825] ../src/port/pg_bitutils.c: In function ‘pg_popcount512_fast’:
[10:08:48.825] ../src/port/pg_bitutils.c:270:11: warning: AVX512F vector return 
without AVX512F enabled changes the ABI [-Wpsabi]
[10:08:48.825]   270 |  __m512i  accumulator = _mm512_setzero_si512();
[10:08:48.825]   |   ^~~
[10:08:48.825] In file included from 
/usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:55,
[10:08:48.825]  from ../src/port/pg_bitutils.c:22:
[10:08:48.825] /usr/lib/gcc/x86_64-linux-gnu/10/include/avx512fintrin.h:339:1: 
error: inlining failed in call to ‘always_inline’ ‘_mm512_setzero_si512’: 
target specific option mismatch
[10:08:48.825]   339 | _mm512_setzero_si512 (void)
[10:08:48.825]   | ^~~~
[10:08:48.825] ../src/port/pg_bitutils.c:270:25: note: called from here
[10:08:48.825]   270 |  __m512i  accumulator = _mm512_setzero_si512();
[10:08:48.825]   | ^~


Thanks

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"Siempre hay que alimentar a los dioses, aunque la tierra esté seca" (Orual)


v4-0001-Add-support-for-AVX512-implemented-POPCNT.patch
Description: v4-0001-Add-support-for-AVX512-implemented-POPCNT.patch


Re: POC: Extension for adding distributed tracing - pg_tracing

2024-02-09 Thread Nikita Malakhov
Hi!

I agree with Heikki on most topics and especially the one he recommended
to publish your extension on GitHub, this tool is very promising for highly
loaded
systems, you will get a lot of feedback very soon.

I'm curious about SpinLock - it is recommended for very short operations,
taking up to several instructions, and docs say that for longer ones it
will be
too expensive, and recommends using LWLock. Why have you chosen SpinLock?
Does it have some benefits here?

Thank you!

PS: process_query_desc function has trace_context argument that is neither
used nor passed anywhere.

--
Regards,
Nikita Malakhov
Postgres Professional
The Russian Postgres Company
https://postgrespro.ru/


Re: [PATCH] allow pg_current_logfile() execution under pg_monitor role

2024-02-09 Thread Nathan Bossart
On Fri, Feb 09, 2024 at 04:01:58PM +0100, Pavlo Golub wrote:
> The patch attached fixes an oversight/inconsistency of disallowing the
> pg_monitor system role to execute pg_current_logfile([text]).

I think this is reasonable.  We allow pg_monitor to execute functions like
pg_ls_logdir(), so it doesn't seem like much of a stretch to expect it to
have privileges for pg_current_logfile(), too.  Are there any other
functions that pg_monitor ought to have privileges for?

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: glibc qsort() vulnerability

2024-02-09 Thread Nathan Bossart
On Fri, Feb 09, 2024 at 01:19:49PM +0500, Andrey M. Borodin wrote:
> If we care about branch prediction in comparison function, maybe we could
> produce sorting that inlines comparator, thus eliminating function call
> to comparator? We convert comparison logic to int, to extract comparison
> back then.
> 
> I bet “call" is more expensive than “if".

It might make sense to have a couple of built-in qsort implementations for
pointers to integers, pointers to unsigned integers, etc.  However, a lot
of current use-cases require inspecting specific fields of structs, so
(assuming I understand your proposal correctly), we'd end up with many
qsort implementations.  If that can be made simple and elegant and
demonstrates substantial improvements, then it might be worth considering,
but I'm somewhat skeptical that the current uses are performance-sensitive
enough to be worth the effort.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: glibc qsort() vulnerability

2024-02-09 Thread Tom Lane
Nathan Bossart  writes:
> On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
>> The types "int" and "size_t" are treated as s32 and u32 respectively since
>> that seems to be the case for most of the code, even if strictly not
>> correct (size_t can be an unsigned long int for some architecture).

> Why is it safe to do this?

We do pretty much assume that "int" is "int32".  But I agree that
assuming anything about the width of size_t is bad.  I think we need
a separate pg_cmp_size() or pg_cmp_size_t().

regards, tom lane




Re: glibc qsort() vulnerability

2024-02-09 Thread Nathan Bossart
On Fri, Feb 09, 2024 at 08:52:26AM +0100, Mats Kindahl wrote:
> Here is a new version introducing pg_cmp_s32 and friends and use them
> instead of the INT_CMP macro introduced before. It also moves the
> definitions to common/int.h and adds that as an include to all locations
> using these functions.

Thanks for the new version of the patch.

> Note that for integers with sizes less than sizeof(int), C standard
> conversions will convert the values to "int" before doing the arithmetic,
> so no casting is *necessary*. I did not force the 16-bit functions to
> return -1 or 1 and have updated the comment accordingly.

It might not be necessary, but this is one of those places where I would
add casting anyway to make it abundantly clear what we are expecting to
happen and why it is safe.

> The types "int" and "size_t" are treated as s32 and u32 respectively since
> that seems to be the case for most of the code, even if strictly not
> correct (size_t can be an unsigned long int for some architecture).

Why is it safe to do this?

> - return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
> + return INT_CMP(((const SPLITCOST *) a)->cost, ((const SPLITCOST *) 
> b)->cost);

The patch still contains several calls to INT_CMP.

> +/*
> + * Comparison routines for integers
> + *
> + */

I'd suggest separating this part out to a 0001 patch to make it easier to
review.  The 0002 patch could take care of converting the existing qsort
comparators.

> +static inline int
> +pg_cmp_s16(int16 a, int16 b)
> +{
> + return a - b;
> +}
> +
> +static inline int
> +pg_cmp_u16(uint16 a, uint16 b)
> +{
> + return a - b;
> +}
> +
> +static inline int
> +pg_cmp_s32(int32 a, int32 b)
> +{
> + return (a > b) - (a < b);
> +}
> +
> +static inline int
> +pg_cmp_u32(uint32 a, uint32 b)
> +{
> + return (a > b) - (a < b);
> +}
> +
> +static inline int
> +pg_cmp_s64(int64 a, int64 b)
> +{
> + return (a > b) - (a < b);
> +}
> +
> +static inline int
> +pg_cmp_u64(uint64 a, uint64 b)
> +{
> + return (a > b) - (a < b);
> +}

As suggested above, IMHO we should be rather liberal with the casting to
ensure it is abundantly clear what is happening here.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Documentation to upgrade logical replication cluster

2024-02-09 Thread vignesh C
On Thu, 1 Feb 2024 at 14:50, vignesh C  wrote:
>
> On Wed, 31 Jan 2024 at 11:42, Hayato Kuroda (Fujitsu)
>  wrote:
> >
> > Dear Vignesh,
> >
> > Thanks for updating the patch! Here are my comments for v6.
> >
> > 01.
> > ```
> > +   Logical replication cluster
> > +   
> > +
> > + A set of publisher and subscriber instance with publisher instance
> > + replicating changes to the subscriber instance.
> > +
> > +   
> > ```
> >
> > Should we say 1:N relationship is allowed?
>
> I felt this need not be mentioned here, just wanted to give an
> indication wherever this terminology is used, it means a set of
> publisher and subscriber instances. Detail information should be added
> in the logical replication related pages
>
> > 02.
> > ```
> > @@ -70,6 +70,7 @@ PostgreSQL documentation
> > pg_upgrade supports upgrades from 9.2.X and later to the current
> > major release of PostgreSQL, including 
> > snapshot and beta releases.
> >
> > +
> >   
> > ```
> >
> > Unnecessary blank.
>
> Removed it.
>
> > 03.
> > ```
> >
> > -   These are the steps to perform an upgrade
> > -   with pg_upgrade:
> > +   Below are the steps to perform an upgrade
> > +   with pg_upgrade.
> >
> > ```
> >
> > I'm not sure it should be included in this patch.
>
> This is not required in this patch, removed it.
>
> > 04.
> > ```
> > +   If the old primary is prior to version 17.0, then no slots on the 
> > primary
> > +   are copied to the new standby, so all the slots on the old standby 
> > must
> > +   be recreated manually.
> > ```
> >
> > I think that "all the slots on the old standby" must be created manually in 
> > any
> > cases. Therefore, the preposition ", so" seems not correct.
>
> I felt that this change is not related to this patch. I'm removing
> these changes from the patch. Let's handle rephrasing of the base code
> change in a separate thread.
>
> > 05.
> > ```
> > If the old primary is version 17.0 or later, then
> > +   only logical slots on the primary are copied to the new standby, but
> > +   other slots on the old standby are not copied, so must be recreated
> > +   manually.
> > ```
> >
> > How about replacing this paragraph to below?
> >
> > ```
> > All the slots on the old standby must be recreated manually. If the old 
> > primary
> > is version 17.0 or later, then only logical slots on the primary are copied 
> > to the
> > new standby.
> > ```
>
> I felt that this change is not related to this patch. I'm removing
> these changes from the patch. Let's handle rephrasing of the base code
> change in a separate thread.

A new thread is started for the same and a patch is attached at [1].
Let's discuss this change at the new thread.
[1] - 
https://www.postgresql.org/message-id/CAHv8RjJHCw0jpUo9PZxjcguzGt3j2W1_NH%3DQuREoN0nYiVdVeA%40mail.gmail.com

Regards,
Vignesh




Improve documentation of upgrade for streaming replication section.

2024-02-09 Thread Shubham Khanna
Hi,

Currently the documentation of upgrade for streaming replication
section says that logical replication slots will be copied
irrespective of the version, but the logical replication slots are
copied only if the old primary is version 17.0 or later. The changes
for the same are in the attached patch.

Thanks and Regards,
Shubham Khanna.


v1-0001-Improve-documentation-of-upgrade-for-streaming-re (2).patch
Description: Binary data


RE: Psql meta-command conninfo+

2024-02-09 Thread Maiquel Grassi
>Hmm, I noticed that this would call printSSLInfo() and printGSSInfo()
>after listConnectionInformation.  It would be much better to show these
>in tabular format as well and remove the calls to printSSL/GSSInfo.
>
>I propose additional columns to the same \conninfo+ table; when SSL,
>like this:
>
>Database   | postgres
>[...]
>Host   | 127.0.0.1
>Encryption | SSL
>Protocol   | PQsslAttribute(protocol)
>Cipher | PQsslAttribute(cipher)
>Compression| PQsslAttribute(compression)
>
>When GSS, like this
>
>Database   | postgres
>[...]
>Host   | 127.0.0.1
>Encryption | GSS
>
>(why don't we print anything else in printGSSInfo()?  That's weird.)

--//--

Hi Álvaro,


Thank you for the suggestion. I will remove printSSLInfo() and printGSSInfo() 
and
incorporate the suggested fields into the tabular result of "\conninfo+".

If it's necessary to adjust printGSSInfo(), we can work on that as well. At 
first
glance, I leave it open for others to analyze and give their opinion.

I'd also like to ask for help with a difficulty. Currently, I'm working on

resolving this situation (highlighted by Pavel Luzanov). How can we
make \conninfo return the same message as \conninfo+ after closing
the current session in another session with pg_terminate_backend(pid)?

[postgres@localhost bin]$ ./psql

psql (17devel)
Type "help" for help.

postgres=# \conninfo
You are connected to database "postgres" as user "postgres" via socket in 
"/tmp" at port "5432".
postgres=# \conninfo+
 
Current Connection Information
 Database | Authenticated User | System User | Current User | Session User | 
Backend PID | Server Address | Server Port | Client Address | Client Port | 
Socket Directory | Host
--++-+--+--+-++-++-+--+--
 postgres | postgres   | | postgres | postgres |
   17281 || 5432|| | /tmp   
  |
(1 row)

postgres=# 2024-02-09 09:15:24.152 -03 [17281] FATAL:  terminating connection 
due to administrator command

postgres=# \conninfo
You are connected to database "postgres" as user "postgres" via socket in 
"/tmp" at port "5432".
postgres=# \conninfo+
FATAL:  terminating connection due to administrator command
server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
The connection to the server was lost. Attempting reset: Succeeded.
postgres=#

Tks a lot!
Maiquel Grassi.


Re: Improve eviction algorithm in ReorderBuffer

2024-02-09 Thread Masahiko Sawada
On Fri, Feb 9, 2024 at 7:35 PM Ajin Cherian  wrote:
>
>
>
> On Tue, Feb 6, 2024 at 5:06 PM Masahiko Sawada  wrote:
>>
>>
>> I've attached the new version patch set.
>>
>> Regards,
>>
>>
>> --
>> Masahiko Sawada
>> Amazon Web Services: https://aws.amazon.com
>
>
> Thanks for the patch. I reviewed that patch and did minimal testing and it 
> seems to show the speed up as claimed. Some minor comments:

Thank you for the comments!

> patch 0001:
>
> +static void
> +bh_enlarge_node_array(binaryheap *heap)
> +{
> + if (heap->bh_size < heap->bh_space)
> + return;
>
> why not check "if (heap->bh_size >= heap->bh_space)" outside this function to 
> avoid calling this function when not necessary? This check was there in code 
> before the patch.

Agreed.

>
> patch 0003:
>
> +/*
> + * The threshold of the number of transactions in the max-heap (rb->txn_heap)
> + * to switch the state.
> + */
> +#define REORDE_BUFFER_MEM_TRACK_THRESHOLD 1024
>
> Typo: I think you meant REORDER_ and not REORDE_

Fixed.

These comments are addressed in the v4 patch set I just shared[1].

Regards,

[1] 
https://www.postgresql.org/message-id/CAD21AoDhuybyryVkmVkgPY8uVrjGLYchL8EY8-rBm1hbZJpwLw%40mail.gmail.com

-- 
Masahiko Sawada
Amazon Web Services: https://aws.amazon.com




Re: Improve eviction algorithm in ReorderBuffer

2024-02-09 Thread Masahiko Sawada
On Thu, Feb 8, 2024 at 6:33 PM Hayato Kuroda (Fujitsu)
 wrote:
>
> Dear Sawada-san,
>
> Thanks for making v3 patchset. I have also benchmarked the case [1].
> Below results are the average of 5th, there are almost the same result
> even when median is used for the comparison. On my env, the regression
> cannot be seen.
>
> HEAD (1e285a5)  HEAD + v3 patches   difference
> 10910.722 ms10714.540 msaround 1.8%
>

Thank you for doing the performance test!

> Also, here are mino comments for v3 set.
>
> 01.
> bh_nodeidx_entry and ReorderBufferMemTrackState is missing in typedefs.list.

Will add them.

>
> 02. ReorderBufferTXNSizeCompare
> Should we assert {ta, tb} are not NULL?

Not sure we really need it as other binaryheap users don't have such checks.

On Tue, Feb 6, 2024 at 2:45 PM Hayato Kuroda (Fujitsu)
 wrote:
>
> > I've run a benchmark test that I shared before[1]. Here are results of
> > decoding a transaction that has 1M subtransaction each of which has 1
> > INSERT:
> >
> > HEAD:
> > 1810.192 ms
> >
> > HEAD w/ patch:
> > 2001.094 ms
> >
> > I set a large enough value to logical_decoding_work_mem not to evict
> > any transactions. I can see about about 10% performance regression in
> > this case.
>
> Thanks for running. I think this workload is the worst and an extreme case 
> which
> would not be occurred on the real system (Such a system should be fixed), so 
> we
> can say that the regression is up to -10%. I felt it could be negligible but 
> how
> do other think?

I think this performance regression is not acceptable. In this
workload, one transaction has 10k subtransactions and the logical
decoding becomes quite slow if logical_decoding_work_mem is not big
enough. Therefore, it's a legitimate and common approach to increase
logical_decoding_work_mem to speedup the decoding. However, with thie
patch, the decoding becomes slower than today. It's a bad idea in
general to optimize an extreme case while sacrificing the normal (or
more common) cases.

Therefore, I've improved the algorithm so that we don't touch the
max-heap at all if the number of transactions is small enough. I've
run benchmark test with two workloads:

workload-1, decode single transaction with 800k tuples (normal.sql):

* without spill
HEAD: 13235.136 ms
v3 patch: 14320.082 ms
v4 patch: 13300.665 ms

* with spill
HEAD: 22970.204 ms
v3 patch: 23625.649 ms
v4 patch: 23304.366

workload-2, decode one transaction with 100k subtransaction (many-subtxn.sql):

* without spill
HEAD: 345.718 ms
v3 patch: 409.686 ms
v4 patch: 353.026 ms

* with spill
HEAD: 136718.313 ms
v3 patch: 2675.539 ms
v4 patch: 2734.981 ms

Regards,

-- 
Masahiko Sawada
Amazon Web Services: https://aws.amazon.com


v4-0001-Make-binaryheap-enlareable.patch
Description: Binary data


v4-0002-Add-functions-to-binaryheap-to-efficiently-remove.patch
Description: Binary data


v4-0003-Use-max-heap-to-evict-largest-transactions-in-Reo.patch
Description: Binary data


normal.sql
Description: Binary data


many-subtxn.sql
Description: Binary data


[PATCH] allow pg_current_logfile() execution under pg_monitor role

2024-02-09 Thread Pavlo Golub
Hello,

The patch attached fixes an oversight/inconsistency of disallowing the
pg_monitor system role to execute pg_current_logfile([text]).

pgwatch3=# create user joe;
CREATE ROLE
pgwatch3=# set role joe;
SET
pgwatch3=> select pg_current_logfile();
ERROR:  permission denied for function pg_current_logfile
pgwatch3=> reset role;
RESET
pgwatch3=# grant pg_monitor to joe;
GRANT ROLE
pgwatch3=# set role joe;
SET
pgwatch3=> select pg_current_logfile();
ERROR:  permission denied for function pg_current_logfile
pgwatch3=> select * FROM pg_ls_logdir();
   name   |   size   |  modification
--+--+
 postgresql-2024-02-08_130906.log |  652 | 2024-02-08 13:10:04+01
(5 rows)

Best regards,
Pavlo Golub


0001-allow-pg_current_logfile-execution-under-pg_monitor-.patch
Description: Binary data


Re: failure in 019_replslot_limit

2024-02-09 Thread Alexander Lakhin

Hello Andres,

05.04.2023 21:48, Andres Freund wrote:

I just saw the following failure:
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=mylodon=2023-04-05%2017%3A47%3A03
after a commit of mine. The symptoms look unrelated though.

[17:54:42.188](258.345s) # poll_query_until timed out executing this query:
# SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep3'
# expecting this output:
# lost
# last actual query output:
# unreserved
# with stderr:
timed out waiting for slot to be lost at 
/home/bf/bf-build/mylodon/HEAD/pgsql/src/test/recovery/t/019_replslot_limit.pl 
line 400.

We're expecting "lost" but are getting "unreserved".

...

ISTM that this indicates that checkpointer got stuck after signalling
344783.

Do you see any other explanation?



I've managed to reproduce this issue (which still persists:
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=kestrel=2024-02-04%2001%3A53%3A44
) and saw that it's not checkpointer, but walsender is hanging:
[12:15:03.753](34.771s) ok 17 - have walsender pid 317885
[12:15:03.875](0.122s) ok 18 - have walreceiver pid 317884
[12:15:04.808](0.933s) ok 19 - walsender termination logged
...

Last essential messages in _primary3.log are:
2024-02-09 12:15:04.823 UTC [318036][not initialized][:0] LOG: connection 
received: host=[local]
2024-02-09 12:15:04.823 UTC [317885][walsender][3/0:0] FATAL: terminating 
connection due to administrator command
2024-02-09 12:15:04.823 UTC [317885][walsender][3/0:0] STATEMENT: START_REPLICATION SLOT 
"rep3" 0/70 TIMELINE 1
(then the test just queries the slot state, there are no other messages
related to walsender)

And I see the walsender process still running (I've increased the timeout
to keep the test running and to connect to the process in question), with
the following stack trace:
#0  0x7fe4feac3d16 in epoll_wait (epfd=5, events=0x55b279b70f38, maxevents=1, timeout=timeout@entry=-1) at 
../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1  0x55b278b9ab32 in WaitEventSetWaitBlock (set=set@entry=0x55b279b70eb8, cur_timeout=cur_timeout@entry=-1, 
occurred_events=occurred_events@entry=0x7ffda5ffac90, nevents=nevents@entry=1) at latch.c:1571
#2  0x55b278b9b6b6 in WaitEventSetWait (set=0x55b279b70eb8, timeout=timeout@entry=-1, 
occurred_events=occurred_events@entry=0x7ffda5ffac90, nevents=nevents@entry=1, 
wait_event_info=wait_event_info@entry=100663297) at latch.c:1517
#3  0x55b278a3f11f in secure_write (port=0x55b279b65aa0, ptr=ptr@entry=0x55b279bfbd08, len=len@entry=21470) at 
be-secure.c:296

#4  0x55b278a460dc in internal_flush () at pqcomm.c:1356
#5  0x55b278a461d4 in internal_putbytes (s=s@entry=0x7ffda5ffad3c "E\177", 
len=len@entry=1) at pqcomm.c:1302
#6  0x55b278a46299 in socket_putmessage (msgtype=, s=0x55b279b363c0 
"SFATAL", len=112) at pqcomm.c:1483
#7  0x55b278a48670 in pq_endmessage (buf=buf@entry=0x7ffda5ffada0) at 
pqformat.c:302
#8  0x55b278d0c82a in send_message_to_frontend (edata=edata@entry=0x55b27908e500 
) at elog.c:3590
#9  0x55b278d0cfe2 in EmitErrorReport () at elog.c:1716
#10 0x55b278d0d17d in errfinish (filename=filename@entry=0x55b278eaa480 "postgres.c", lineno=lineno@entry=3295, 
funcname=funcname@entry=0x55b278eaaef0 <__func__.16> "ProcessInterrupts") at elog.c:551

#11 0x55b278bc41c9 in ProcessInterrupts () at postgres.c:3295
#12 0x55b278b6c9af in WalSndLoop (send_data=send_data@entry=0x55b278b6c346 
) at walsender.c:2680
#13 0x55b278b6cef1 in StartReplication (cmd=cmd@entry=0x55b279b733e0) at 
walsender.c:987
#14 0x55b278b6d865 in exec_replication_command (cmd_string=cmd_string@entry=0x55b279b39c60 "START_REPLICATION SLOT 
\"rep3\" 0/70 TIMELINE 1") at walsender.c:2039

#15 0x55b278bc7d71 in PostgresMain (dbname=, 
username=) at postgres.c:4649
#16 0x55b278b2329d in BackendRun (port=port@entry=0x55b279b65aa0) at 
postmaster.c:4464
#17 0x55b278b263ae in BackendStartup (port=port@entry=0x55b279b65aa0) at 
postmaster.c:4140
#18 0x55b278b26539 in ServerLoop () at postmaster.c:1776
#19 0x55b278b27ac5 in PostmasterMain (argc=argc@entry=4, 
argv=argv@entry=0x55b279b34180) at postmaster.c:1475
#20 0x55b278a49ab0 in main (argc=4, argv=0x55b279b34180) at main.c:198

(gdb) frame 9
(gdb) print edata->message
$3 = 0x55b279b367d0 "terminating connection due to administrator command"

So it looks like walsender tries to send the message to walreceiver, which
is paused, and thus walsender gets stuck on it.

Best regards,
Alexander




Re: [PATCH] Add CANONICAL option to xmlserialize

2024-02-09 Thread Jim Jones

On 05.10.23 09:38, Jim Jones wrote:
>
> v8 attached changes de default behaviour to WITH COMMENTS.
v9 attached with rebase due to changes done to primnodes.h in 615f5f6

-- 
Jim
From fe51a1826b75b778c21f559236b23d340a10d703 Mon Sep 17 00:00:00 2001
From: Jim Jones 
Date: Fri, 9 Feb 2024 13:51:44 +0100
Subject: [PATCH v9] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml|  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y |  21 +-
 src/backend/parser/parse_expr.c   |   2 +-
 src/backend/utils/adt/xml.c   | 272 --
 src/include/nodes/parsenodes.h|   1 +
 src/include/nodes/primnodes.h |  11 ++
 src/include/parser/kwlist.h   |   1 +
 src/include/utils/xml.h   |   2 +-
 src/test/regress/expected/xml.out | 114 +++
 src/test/regress/expected/xml_1.out   | 108 ++
 src/test/regress/expected/xml_2.out   | 114 +++
 src/test/regress/sql/xml.sql  |  63 ++
 src/tools/pgindent/typedefs.list  |   1 +
 14 files changed, 643 insertions(+), 110 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 66510ee031..ec2f1137c8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4472,7 +4472,7 @@ xml 'bar'
 xml, uses the function
 xmlserialize:xmlserialize
 
-XMLSERIALIZE ( { DOCUMENT | CONTENT } value AS type [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } value AS type [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 
 type can be
 character, character varying, or
@@ -4489,6 +4489,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } value AS 
 
+   
+The option CANONICAL converts a given
+XML document to its https://www.w3.org/TR/xml-c14n11/#Terminology;>canonical form
+based on the https://www.w3.org/TR/xml-c14n11/;>W3C Canonical XML 1.1 Specification.
+It is basically designed to provide applications the ability to compare xml documents or test if they
+have been changed. The optional parameters WITH COMMENTS (which is the default) or
+WITH NO COMMENTS, respectively, keep or remove XML comments from the given document.
+
+
+ 
+ Example:
+
+
+   

 When a character string value is cast to or from type
 xml without going through XMLPARSE or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 3f20f1dd31..315ee53274 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4019,7 +4019,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 *op->resvalue =
 	PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 		   xexpr->xmloption,
-		   xexpr->indent));
+		   xexpr->format));
 *op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 130f7fc7c3..f54858dfe9 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -616,12 +616,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type 	xml_root_version opt_xml_root_standalone
 %type 	xmlexists_argument
 %type 	document_or_content
-%type 	xml_indent_option xml_whitespace_option
+%type 	xml_whitespace_option
 %type 	xmltable_column_list xmltable_column_option_list
 %type 	xmltable_column_el
 %type 	xmltable_column_option_el
 %type 	xml_namespace_list
 %type 	xml_namespace_el
+%type  	opt_xml_serialize_format
 
 %type 	func_application func_expr_common_subexpr
 %type 	func_expr func_expr_windowless
@@ -693,7 +694,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15677,14 +15678,14 @@ func_expr_common_subexpr:
 	$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 	 list_make3($3, $5, $6), @1);
 }
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' 

Re: [PATCH] Add native windows on arm64 support

2024-02-09 Thread Dave Cramer
On Fri, 9 Feb 2024 at 00:26, Michael Paquier  wrote:

> On Tue, Feb 06, 2024 at 07:01:49AM -0500, Dave Cramer wrote:
> > Thanks, this patch works and
> > testing with meson passes.
>
> Only with the version posted at [1]?  Interesting, that's the same
> contents as v8 posted upthread, minus src/tools/ because we don't need
> to care about them anymore.
>
> Andrew, what's happening on the test side?  It does not seem you've
> mentioned any details about what is going wrong, or I have just missed
> them.
>
> > I'll try the buildfarm next.
>
> [1]:
> https://www.postgresql.org/message-id/ea42654a-3dc4-98b0-335b-56b7ec5e5...@dunslane.net


interestingly meson test does not produce any error
The buildfarm produces the following error for me:

-SELECT relname, attname, coltypes, get_columns_length(coltypes)
- FROM check_columns
- WHERE get_columns_length(coltypes) % 8 != 0 OR
-   'name'::regtype::oid = ANY(coltypes);
- relname | attname | coltypes | get_columns_length
--+-+--+
-(0 rows)
-
+server closed the connection unexpectedly
+ This probably means the server terminated abnormally
+ before or while processing the request.
+connection to server was lost

Dave


RE: speed up a logical replica setup

2024-02-09 Thread Hayato Kuroda (Fujitsu)
Dear Euler,

Further comments for v17.

01.
This program assumes that the target server has same major version with this.
Because the target server would be restarted by same version's pg_ctl command.
I felt it should be ensured by reading the PG_VERSION.

02.
pg_upgrade checked the version of using executables, like pg_ctl, postgres, and
pg_resetwal. I felt it should be as well.

03. get_bin_directory
```
if (find_my_exec(path, full_path) < 0)
{
pg_log_error("The program \"%s\" is needed by %s but was not 
found in the\n"
 "same directory as \"%s\".\n",
 "pg_ctl", progname, full_path);
```

s/"pg_ctl"/progname

04.
Missing canonicalize_path()?

05.
Assuming that the target server is a cascade standby, i.e., it has a role as
another primary. In this case, I thought the child node would not work. Because
pg_createsubcriber runs pg_resetwal and all WAL files would be discarded at that
time. I have not tested, but should the program detect it and exit earlier?

06.
wait_for_end_recovery() waits forever even if the standby has been disconnected
from the primary, right? should we check the status of the replication via
pg_stat_wal_receiver?

07.
The cleanup function has couple of bugs.

* If subscriptions have been created on the database, the function also tries to
  drop a publication. But it leads an ERROR because it has been already dropped.
  See setup_subscriber().
* If the subscription has been created, drop_replication_slot() leads an ERROR.
  Because the subscriber tried to drop the subscription while executing DROP 
SUBSCRIPTION.

08.
I found that all messages (ERROR, WARNING, INFO, etc...) would output to stderr,
but I felt it should be on stdout. Is there a reason? pg_dump outputs messages 
to
stderr, but the motivation might be to avoid confusion with dumps.

09.
I'm not sure the cleanup for subscriber is really needed. Assuming that there
are two databases, e.g., pg1 pg2 , and we fail to create a subscription on pg2.
This can happen when the subscription which has the same name has been already
created on the primary server.
In this case a subscirption pn pg1 would be removed. But what is a next step?
Since a timelineID on the standby server is larger than the primary (note that
the standby has been promoted once), we cannot resume the physical replication
as-is. IIUC the easiest method to retry is removing a cluster once and 
restarting
from pg_basebackup. If so, no need to cleanup the standby because it is 
corrupted.
We just say "Please remove the cluster and recreate again".

Here is a reproducer.

1. apply the txt patch atop 0001 patch.
2. run test_corruption.sh.
3. when you find a below output [1], connect to a testdb from another terminal 
and
   run CREATE SUBSCRITPION for the same subscription on the primary
4. Finally, pg_createsubscriber would fail the creation.

I also attached server logs of both nodes and the output.
Note again that this is a real issue. I used a tricky way for surely 
overlapping name,
but this can happen randomly.

10.
While investigating #09, I found that we cannot report properly a reason why the
subscription cannot be created. The output said:

```
pg_createsubscriber: error: could not create subscription 
"pg_createsubscriber_16389_3884" on database "testdb": out of memory
```

But the standby serverlog said:

```
ERROR:  subscription "pg_createsubscriber_16389_3884" already exists
STATEMENT:  CREATE SUBSCRIPTION pg_createsubscriber_16389_3884 CONNECTION 
'user=postgres port=5431 dbname=testdb' PUBLICATION pg_createsubscriber_16389 
WITH (create_slot = false, copy_data = false, enabled = false)
```

[1]
```
pg_createsubscriber: creating the replication slot 
"pg_createsubscriber_16389_3884" on database "testdb"
pg_createsubscriber: XXX: sleep 20s
```

Best Regards,
Hayato Kuroda
FUJITSU LIMITED
https://www.fujitsu.com/global/ 



N1.log
Description: N1.log


result.dat
Description: result.dat


server_start_20240209T115112.963.log
Description: server_start_20240209T115112.963.log


test_corruption.sh
Description: test_corruption.sh
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c 
b/src/bin/pg_basebackup/pg_createsubscriber.c
index 9628f32a3e..fdb3e92aa3 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -919,6 +919,9 @@ create_logical_replication_slot(PGconn *conn, 
LogicalRepInfo *dbinfo,
 
pg_log_info("creating the replication slot \"%s\" on database \"%s\"", 
slot_name, dbinfo->dbname);
 
+   pg_log_info("XXX: sleep 20s");
+   sleep(20);
+
appendPQExpBuffer(str, "SELECT lsn FROM 
pg_create_logical_replication_slot('%s', '%s', %s, false, false)",
  slot_name, "pgoutput", temporary ? 
"true" : "false");
 


Re: speed up a logical replica setup

2024-02-09 Thread Shubham Khanna
On Wed, Feb 7, 2024 at 10:24 AM Euler Taveira  wrote:
>
> On Fri, Feb 2, 2024, at 6:41 AM, Hayato Kuroda (Fujitsu) wrote:
>
> Thanks for updating the patch!
>
>
> Thanks for taking a look.
>
> >
> I'm still working on the data structures to group options. I don't like the 
> way
> it was grouped in v13-0005. There is too many levels to reach database name.
> The setup_subscriber() function requires the 3 data structures.
> >
>
> Right, your refactoring looks fewer stack. So I pause to revise my refactoring
> patch.
>
>
> I didn't complete this task yet so I didn't include it in this patch.
>
> >
> The documentation update is almost there. I will include the modifications in
> the next patch.
> >
>
> OK. I think it should be modified before native speakers will attend to the
> thread.
>
>
> Same for this one.
>
> >
> Regarding v13-0004, it seems a good UI that's why I wrote a comment about it.
> However, it comes with a restriction that requires a similar HBA rule for both
> regular and replication connections. Is it an acceptable restriction? We might
> paint ourselves into the corner. A reasonable proposal is not to remove this
> option. Instead, it should be optional. If it is not provided, 
> primary_conninfo
> is used.
> >
>
> I didn't have such a point of view. However, it is not related whether -P 
> exists
> or not. Even v14-0001 requires primary to accept both normal/replication 
> connections.
> If we want to avoid it, the connection from pg_createsubscriber can be 
> restored
> to replication-connection.
> (I felt we do not have to use replication protocol even if we change the 
> connection mode)
>
>
> That's correct. We made a decision to use non physical replication connections
> (besides the one used to connect primary <-> standby). Hence, my concern about
> a HBA rule falls apart. I'm not convinced that using a modified
> primary_conninfo is the only/right answer to fill the subscription connection
> string. Physical vs logical replication has different requirements when we 
> talk
> about users. The physical replication requires only the REPLICATION privilege.
> On the other hand, to create a subscription you must have the privileges of
> pg_create_subscription role and also CREATE privilege on the specified
> database. Unless, you are always recommending the superuser for this tool, I'm
> afraid there will be cases that reusing primary_conninfo will be an issue. The
> more I think about this UI, the more I think that, if it is not hundreds of
> lines of code, it uses the primary_conninfo the -P is not specified.
>
> The motivation why -P is not needed is to ensure the consistency of nodes.
> pg_createsubscriber assumes that the -P option can connect to the upstream 
> node,
> but no one checks it. Parsing two connection strings may be a solution but be
> confusing. E.g., what if some options are different?
> I think using a same parameter is a simplest solution.
>
>
> Ugh. An error will occur the first time (get_primary_sysid) it tries to 
> connect
> to primary.
>
> I found that no one refers the name of temporary slot. Can we remove the 
> variable?
>
>
> It is gone. I did a refactor in the create_logical_replication_slot function.
> Slot name is assigned internally (no need for slot_name or temp_replslot) and
> temporary parameter is included.
>
> Initialization by `CreateSubscriberOptions opt = {0};` seems enough.
> All values are set to 0x0.
>
>
> It is. However, I keep the assignments for 2 reasons: (a) there might be
> parameters whose default value is not zero, (b) the standard does not say that
> a null pointer must be represented by zero and (c) there is no harm in being
> paranoid during initial assignment.
>
> You said "target server must be a standby" in [1], but I cannot find checks 
> for it.
> IIUC, there are two approaches:
> a) check the existence "standby.signal" in the data directory
> b) call an SQL function "pg_is_in_recovery"
>
>
> I applied v16-0004 that implements option (b).
>
> I still think they can be combined as "bindir".
>
>
> I applied a patch that has a single variable for BINDIR.
>
> WriteRecoveryConfig() writes GUC parameters to postgresql.auto.conf, but not
> sure it is good. These settings would remain on new subscriber even after the
> pg_createsubscriber. Can we avoid it? I come up with passing these parameters
> via pg_ctl -o option, but it requires parsing output from 
> GenerateRecoveryConfig()
> (all GUCs must be allign like "-c XXX -c XXX -c XXX...").
>
>
> I applied a modified version of v16-0006.
>
> Functions arguments should not be struct because they are passing by value.
> They should be a pointer. Or, for modify_subscriber_sysid and 
> wait_for_end_recovery,
> we can pass a value which would be really used.
>
>
> Done.
>
> 07.
> ```
> static char *get_base_conninfo(char *conninfo, char *dbname,
>const char *noderole);
> ```
>
> Not sure noderole should be passed here. It is used only for the logging.
> Can we output string before 

Re: Synchronizing slots from primary to standby

2024-02-09 Thread Amit Kapila
On Fri, Feb 9, 2024 at 10:00 AM Zhijie Hou (Fujitsu)
 wrote:
>

Few comments on 0001
===
1. Shouldn't pg_sync_replication_slots() check whether the user has
replication privilege?

2. The function declarations in slotsync.h don't seem to be in the
same order as they are defined in slotsync.c. For example, see
ValidateSlotSyncParams(). The same is true for new functions exposed
via walreceiver.h and walsender.h. Please check the patch for other
such inconsistencies.

3.
+# Wait for the standby to finish sync
+$standby1->wait_for_log(
+ qr/LOG: ( [A-Z0-9]+:)? newly created slot \"lsub1_slot\" is sync-ready now/,
+ $offset);
+
+$standby1->wait_for_log(
+ qr/LOG: ( [A-Z0-9]+:)? newly created slot \"lsub2_slot\" is sync-ready now/,
+ $offset);
+
+# Confirm that the logical failover slots are created on the standby and are
+# flagged as 'synced'
+is($standby1->safe_psql('postgres',
+ q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN
('lsub1_slot', 'lsub2_slot') AND synced;}),
+ "t",
+ 'logical slots have synced as true on standby');

Isn't the last test that queried pg_replication_slots sufficient? I
think wait_for_log() would be required for slotsync worker or am I
missing something?

Apart from the above, I have modified a few comments in the attached.

-- 
With Regards,
Amit Kapila.
diff --git a/src/backend/replication/logical/slotsync.c 
b/src/backend/replication/logical/slotsync.c
index b8b5bd61b4..0d5ca29c18 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -13,14 +13,15 @@
  * the slots on the standby and synchronize them. This is done by a call to SQL
  * function pg_sync_replication_slots.
  *
- * If on the physical standby, the restart_lsn and/or local catalog_xmin is
- * ahead of those on the remote then we cannot create the local standby slot
- * in sync with the primary server because that would mean moving the local
- * slot backwards and the standby might not have WALs retained for old LSN.
- * In this case, the slot will be marked as RS_TEMPORARY. Once the primary
- * server catches up, the slot will be marked as RS_PERSISTENT (which
- * means sync-ready) after which we can call pg_sync_replication_slots()
- * periodically to perform syncs.
+ * If on physical standby, the WAL corresponding to the remote's restart_lsn
+ * is not available or the remote's catalog_xmin precedes the oldest xid for 
which
+ * it is guaranteed that rows wouldn't have been removed then we cannot create
+ * the local standby slot because that would mean moving the local slot
+ * backward and decoding won't be possible via such a slot. In this case, the
+ * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
+ * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
+ * which we can call pg_sync_replication_slots() periodically to perform
+ * syncs.
  *
  * Any standby synchronized slots will be dropped if they no longer need
  * to be synchronized. See comment atop drop_local_obsolete_slots() for more
@@ -60,15 +61,10 @@
 #include "utils/timeout.h"
 #include "utils/varlena.h"
 
-/*
- * Struct for sharing information to control slot synchronization.
- *
- * The 'syncing' flag should be set to true in any process that is syncing
- * slots to prevent concurrent slot sync, which could lead to errors and slot
- * info overwrite.
- */
+/* Struct for sharing information to control slot synchronization. */
 typedef struct SlotSyncCtxStruct
 {
+   /* prevents concurrent slot syncs to avoid slot overwrites */
boolsyncing;
slock_t mutex;
 } SlotSyncCtxStruct;
@@ -933,8 +929,8 @@ SlotSyncInitialize(void)
 }
 
 /*
- * Synchronize the replication slots with failover enabled using the
- * specified primary server connection.
+ * Synchronize the failover enabled replication slots using the specified
+ * primary server connection.
  */
 void
 SyncReplicationSlots(WalReceiverConn *wrconn)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index c443e949b2..94447f6228 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -266,10 +266,10 @@ ReplicationSlotCreate(const char *name, bool db_specific,
ReplicationSlotValidateName(name, ERROR);
 
/*
-* Do not allow users to create the slots with failover enabled on the
-* standby as we do not support sync to the cascading standby.
+* Do not allow users to create the failover enabled slots on the 
standby
+* as we do not support sync to the cascading standby.
 *
-* However, slots with failover enabled can be created during slot
+* However, failover enabled slots can be created during slot
 * synchronization because we need to retain the same values as the 
remote
 * slot.
 */
@@ -736,8 +736,8 @@ ReplicationSlotAlter(const char *name, bool failover)
  

Re: pipe_read_line for reading arbitrary strings

2024-02-09 Thread Daniel Gustafsson
The attached v5 is a rebase with no new changes just to get a fresh run in the
CFBot before pushing.  All review comments have been addressed and the patch
has been Ready for Committer for some time, just didn't have time to get to it
in the last CF.

--
Daniel Gustafsson



v5-0001-Refactor-pipe_read_line-to-return-the-full-line.patch
Description: Binary data


Re: Improve eviction algorithm in ReorderBuffer

2024-02-09 Thread Ajin Cherian
On Tue, Feb 6, 2024 at 5:06 PM Masahiko Sawada 
wrote:

>
> I've attached the new version patch set.
>
> Regards,
>
>
> --
> Masahiko Sawada
> Amazon Web Services: https://aws.amazon.com


Thanks for the patch. I reviewed that patch and did minimal testing and it
seems to show the speed up as claimed. Some minor comments:
patch 0001:

+static void
+bh_enlarge_node_array(binaryheap *heap)
+{
+ if (heap->bh_size < heap->bh_space)
+ return;

why not check "if (heap->bh_size >= heap->bh_space)" outside this function
to avoid calling this function when not necessary? This check was there in
code before the patch.

patch 0003:

+/*
+ * The threshold of the number of transactions in the max-heap
(rb->txn_heap)
+ * to switch the state.
+ */
+#define REORDE_BUFFER_MEM_TRACK_THRESHOLD 1024

Typo: I think you meant REORDER_ and not REORDE_

regards,
Ajin Cherian
Fujitsu Australia


Re: GUC-ify walsender MAX_SEND_SIZE constant

2024-02-09 Thread Majid Garoosi
On Fri, 9 Feb 2024 at 08:50, Michael Paquier  wrote:

> Something to be aware of, but the community lists use bottom-posting
> for replies because it is easier to follow the logic of a thread this
> way.  See here:
> https://en.wikipedia.org/wiki/Posting_style#Bottom-posting
>
Oh, sorry for not using the convention here. I just noticed that after
pressing the send button. =)

Thanks for the patch, that looks rather fine.  I have spent some time
> polishing the docs, adding a mention that increasing the value can
> show benefits depending on what you do.  How does the attached look to
> you?
>
I took a look at it and it seems good to me.
Maybe I should work on my writing skills more. :D

Best
Majid


Re: Psql meta-command conninfo+

2024-02-09 Thread Alvaro Herrera
Hmm, I noticed that this would call printSSLInfo() and printGSSInfo()
after listConnectionInformation.  It would be much better to show these
in tabular format as well and remove the calls to printSSL/GSSInfo.

I propose additional columns to the same \conninfo+ table; when SSL,
like this:

Database   | postgres
[...]
Host   | 127.0.0.1
Encryption | SSL
Protocol   | PQsslAttribute(protocol)
Cipher | PQsslAttribute(cipher)
Compression| PQsslAttribute(compression)

When GSS, like this

Database   | postgres
[...]
Host   | 127.0.0.1
Encryption | GSS

(why don't we print anything else in printGSSInfo()?  That's weird.)

-- 
Álvaro Herrera PostgreSQL Developer  —  https://www.EnterpriseDB.com/
"Pido que me den el Nobel por razones humanitarias" (Nicanor Parra)




Re: Extract numeric filed in JSONB more effectively

2024-02-09 Thread Andy Fan

Hi,

Here is the update of this patch.

1. What is it for?

commit f7b93acc24b4a152984048fefc6d71db606e3204 (HEAD -> jsonb_numeric)
Author: yizhi.fzh 
Date:   Fri Feb 9 16:54:06 2024 +0800

Improve the performance of Jsonb numeric/bool extraction.

JSONB object uses a binary compatible numeric format with the numeric
data type in SQL. However in the past, extracting a numeric value from a
JSONB field still needs to find the corresponding JsonbValue first,
then convert the JsonbValue to Jsonb, and finally use the cast system to
convert the Jsonb to a Numeric data type. This approach was very
inefficient in terms of performance.

In the current patch, It is handled that the JsonbValue is converted to
numeric data type directly.  This is done by the planner support
function which detects the above case and simplify the expression.
Because the boolean type and numeric type share certain similarities in
their attributes, we have implemented the same optimization approach for
both.  In the ideal test case, the performance can be 2x than before.

The optimized functions and operators includes:
1. jsonb_object_field / ->
2. jsonb_array_element / ->
3. jsonb_extract_path / #>
4. jsonb_path_query
5. jsonb_path_query_first

example:
create table tb(a jsonb);
insert into tb select '{"a": 1, "b": "a"}'::jsonb;


master:
explain (costs off, verbose) select * from tb where (a->'a')::numeric > 
3::numeric;
QUERY PLAN 
---
 Seq Scan on public.tb
   Output: a
   Filter: (((tb.a -> 'a'::text))::numeric > '3'::numeric)
(3 rows)

patched:

postgres=# explain (costs off, verbose) select * from tb where 
(a->'a')::numeric > 3::numeric;
 QUERY PLAN 
 
-
 Seq Scan on public.tb
   Output: a
   Filter: (jsonb_finish_numeric(jsonb_object_field_start((tb.a)::internal, 
'a'::text), '1700'::oid) > '3'::numeric)
(3 rows)

The final expression generated by planner support function includes:

1).
jsonb_object_field_start((tb.a)::internal, 'a'::text) first, this
function returns the internal datum which is JsonbValue in fact.
2).
jsonb_finish_numeric(internal (jsonbvalue), '1700::oid) convert the
jsonbvalue to numeric directly without the jsonb as a intermedia result.

the reason why "1700::oid" will be explained later, that's the key issue
right now.

The reason why we need the 2 steps rather than 1 step is because the
code can be better abstracted, the idea comes from Chap, the detailed
explaination is at [1]. You can search "Now, it would make me happy to
further reduce some of the code duplication" and read the following
graph. 


2. Where is the current feature blocked for the past few months?

It's error message compatible issue! Continue with above setup:

master:

select * from tb where (a->'b')::numeric > 3::numeric;
ERROR:  cannot cast jsonb string to type numeric

select * from tb where (a->'b')::int4 > 3::numeric;
ERROR:  cannot cast jsonb string to type integer

You can see the error message is different (numeric vs integer). 


Patched:

We still can get the same error message as master BUT the code
looks odd.

select * from tb where (a->'b')::int4 > 3;
QUERY PLAN  
   
---
 Seq Scan on public.tb
   Output: a
   Filter: ((jsonb_finish_numeric(jsonb_object_field_start((tb.a)::internal, 
'b'::text), '23'::oid))::integer > 3)
(3 rows)

You can see "jsonb_finish_numeric(..,  '23::oid)" the '23::oid' is just
for the *"integer"* output in error message:

"cannot cast jsonb string to type *integer*"

Now the sistuation is either we use the odd argument (23::oid) in
jsonb_finish_numeric, or we use a incompatible error message with the
previous version. I'm not sure which way is better, but this is the
place the current feature is blocked.

3. what do I want now?

Since this feature uses the planner support function which needs some
catalog changes, so it is better that we can merge this feature in PG17,
or else, we have to target it in PG18. So if some senior developers can
chime in, for the current blocking issue at least, will be pretty
helpful.

[1]
https://www.postgresql.org/message-id/5138c6b5fd239e7ce4e1a4e63826ac27%40anastigmatix.net
 

-- 
Best Regards
Andy Fan

>From f7b93acc24b4a152984048fefc6d71db606e3204 Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" 
Date: Fri, 9 Feb 2024 16:54:06 +0800
Subject: [PATCH v16 1/1] Improve the performance of Jsonb numeric/bool
 extraction.

JSONB object uses a binary compatible 

RE: Psql meta-command conninfo+

2024-02-09 Thread Maiquel Grassi
>Hi Nathan
>
>On 09.02.24 03:41, Nathan Bossart wrote:
>
>> Separately, does
>> the server version really belong here?  I'm not sure I would consider that
>> to be connection information.
>>
>I tend to agree with you. The info there wouldn't hurt, but perhaps the
>client version would be a better fit.

--//--

Hi!

I believe that if we include the server version, it would also be
necessary to include the version of psql. This would make it
clearer for the user. I agree that this won't make much difference
in practice, especially because we're interested in creating a setof
information directly related to the connection. I prefer to keep it
suppressed for now. In the future, if necessary, we can add this
information without any problem. In v12, "Server Version" is
already removed.

Tks!
Maiquel Grassi.


Re: Reducing connection overhead in pg_upgrade compat check phase

2024-02-09 Thread Daniel Gustafsson
> On 9 Feb 2024, at 00:04, Daniel Gustafsson  wrote:
> 
>> On 8 Feb 2024, at 15:16, Daniel Gustafsson  wrote:
> 
>> One option could perhaps be to include a version number for <= comparison, 
>> and
>> if set to zero a function pointer to a version check function must be 
>> provided?
>> That would handle the simple cases in a single place without messy logic, and
>> leave the more convoluted checks with a special case function.
> 
> The attached is a draft version of this approach, each check can define to run
> for all versions, set a threshold version for which it runs or define a
> callback which implements a more complicated check.

And again pgindented and with documentation on the struct members to make it
easy to add new checks.  A repetitive part of the report text was also moved to
a single place.

--
Daniel Gustafsson



v14-0001-pg_upgrade-run-all-data-type-checks-per-connecti.patch
Description: Binary data


RE: Psql meta-command conninfo+

2024-02-09 Thread Maiquel Grassi
Hi Pavel!

>The patch v10 build ends with a warning:
>$ make -j --silent
>describe.c:911:1: warning: no previous prototype for 
>‘listConnectionInformation’ [-Wmissing-prototypes]
>  911 | listConnectionInformation()
  | ^

>About terms.

I resolved this in v11. I had forgotten to put
the 'void' inside the parentheses (describe.h and describe.c).


>postgres@postgres(17.0)=# \x \conninfo+
>Expanded display is on.
>Current Connection Information
>-[ RECORD 1 ]--+-
>Database   | postgres
>Authenticated User | postgres
>System User|
>Current User   | postgres
>Session User   | postgres
>Session PID| 951112
>Server Version | 17devel
>Server Address |
>Server Port| 5401
>Client Address |
>Client Port|
>Socket Directory   | /tmp
>Host   |

>It looks like "Session PID" is a new term for the server process identifier.
>How about changing the name to "Backend PID" (from pg_backend_pid) or even PID 
>(from pg_stat_activity)?


You're right, it's better to stick with the proper terms already in use
in PostgreSQL. This ensures that the user doesn't get confused. I've
changed it to "Backend PID".


>On 08.02.2024 17:58, Maiquel Grassi wrote:

> 1.
> + if (db == NULL)
> + printf(_("You are currently not connected to a 
> database.\n"));
>
> This check is performed for \conninfo, but not for \conninfo+.

1. The connection check for the case of \conninfo+ is handled by "describe.c" 
itself since it deals with queries. I might be mistaken, but I believe that by 
using "printQuery()" via "describe.c", this is already ensured, and there is no 
need to evaluate the connection status.

>I found that \conninfo and \conninfo+ act differently when the connection is 
>broken.
>I used pg_terminate_backend function from another session to terminate an open 
>psql session.
>After that, \conninfo does not see the connection break (surprisingly!), and 
>\conninfo+ returns an error:

>postgres@postgres(17.0)=# \conninfo+
>FATAL:  terminating connection due to administrator command
>server closed the connection unexpectedly
>This probably means the server terminated abnormally
>before or while processing the request.
>The connection to the server was lost. Attempting reset: Succeeded.


For this case, I believe it's already resolved, because if you get a
return indicating that the connection was terminated, and indeed it was,
then "describe.c" is handling it correctly. At least that's what
it seems like.

>postgres@postgres(17.0)=# \conninfo
>You are connected to database "postgres" as user "postgres" via socket in 
>"/tmp" at port "5401".


Here it seems like we have an issue to be studied and subsequently resolved.

>Another surprise is that this check: if (db == NULL) did not work in both 
>cases.

I will investigate further and, if necessary, remove it.

Here's v12, in the next version, I'll try to address the above situation.

Thanks a lot!
Maiquel Grassi.


v12-0001-psql-meta-command-conninfo-plus.patch
Description: v12-0001-psql-meta-command-conninfo-plus.patch


Fix parallel vacuum buffer usage reporting

2024-02-09 Thread Anthonin Bonnefoy
Hi,

With a db setup with pgbench, we add an additional index:
CREATE INDEX ON pgbench_accounts(abalance)

And trigger several updates and vacuum to reach a stable amount of
dirtied pages:
UPDATE pgbench_accounts set abalance = abalance + 1 WHERE aid=1; CHECKPOINT;
VACUUM (VERBOSE, INDEX_CLEANUP ON) pgbench_accounts

The vacuum will report the following:
INFO:  vacuuming "postgres.public.pgbench_accounts"
INFO:  launched 1 parallel vacuum worker for index vacuuming (planned: 1)
INFO:  finished vacuuming "postgres.public.pgbench_accounts": index scans: 1
...
buffer usage: 122 hits, 165 misses, 4 dirtied

4 pages were reported dirtied. However, we have 5 dirtied blocks at
the end of the vacuum when looking at pg_buffercache:

SELECT c.relname, b.relfilenode
 FROM
pg_buffercache b LEFT JOIN pg_class c
 ON b.relfilenode =
pg_relation_filenode(c.oid)
   WHERE isdirty=true;
relname| relfilenode
---+-
 pg_class  |1259
 pgbench_accounts  |   16400
 pgbench_accounts  |   16400
 pgbench_accounts_pkey |   16408
 pgbench_accounts_abalance_idx |   16480

The missing dirty block comes from the parallel worker vacuuming the
abalance index. Running vacuum with parallel disabled will give the
correct result.

Vacuum uses dedicated VacuumPage{Hit,Miss,Dirty} globals to track
buffer usage. However, those values are not collected at the end of
parallel vacuum workers, leading to incorrect buffer count.

Those vacuum specific globals are redundant with the existing
pgBufferUsage and only used in the verbose output. This patch removes
them and replaces them by pgBufferUsage which is already correctly
collected at the end of parallel workers, fixing the buffer count.

Regards,
Anthonin


v01-0001-Fix-parallel-vacuum-buffer-usage-reporting.patch
Description: Binary data


Re: Printing backtrace of postgres processes

2024-02-09 Thread Ashutosh Bapat
On Fri, Feb 9, 2024 at 2:18 PM Alvaro Herrera  wrote:
>
> On 2024-Feb-09, Michael Paquier wrote:
>
> > Anyway, I've been digging around the signal-safety of backtrace(3)
> > (even looking a bit at some GCC code, brrr), and I am under the
> > impression that backtrace() is just by nature not safe and also
> > dangerous in signal handlers.  One example of issue I've found:
> > https://github.com/gperftools/gperftools/issues/838
> >
> > This looks like enough ground to me to reject the patch.
>
> Hmm, but the backtrace() manpage says
>
>•  backtrace() and backtrace_symbols_fd() don't call malloc()  explic‐
>   itly,  but  they  are part of libgcc, which gets loaded dynamically
>   when first used.  Dynamic loading usually triggers a call  to  mal‐
>   loc(3).   If  you  need certain calls to these two functions to not
>   allocate memory (in signal handlers, for example), you need to make
>   sure libgcc is loaded beforehand.
>
> and the patch ensures that libgcc is loaded by calling a dummy
> backtrace() at the start of the process.
>

We defer actual action triggered by a signal till CHECK_FOR_INTERRUPTS
is called. I understand that we can't do that here since we want to
capture the backtrace at that moment and can't wait till next CFI. But
printing the backend can surely wait till next CFI right?


-- 
Best Wishes,
Ashutosh Bapat




Re: Printing backtrace of postgres processes

2024-02-09 Thread Alvaro Herrera
On 2024-Feb-09, Michael Paquier wrote:

> Anyway, I've been digging around the signal-safety of backtrace(3)
> (even looking a bit at some GCC code, brrr), and I am under the
> impression that backtrace() is just by nature not safe and also
> dangerous in signal handlers.  One example of issue I've found:
> https://github.com/gperftools/gperftools/issues/838
> 
> This looks like enough ground to me to reject the patch.

Hmm, but the backtrace() manpage says

   •  backtrace() and backtrace_symbols_fd() don't call malloc()  explic‐
  itly,  but  they  are part of libgcc, which gets loaded dynamically
  when first used.  Dynamic loading usually triggers a call  to  mal‐
  loc(3).   If  you  need certain calls to these two functions to not
  allocate memory (in signal handlers, for example), you need to make
  sure libgcc is loaded beforehand.

and the patch ensures that libgcc is loaded by calling a dummy
backtrace() at the start of the process.

-- 
Álvaro Herrera   48°01'N 7°57'E  —  https://www.EnterpriseDB.com/
"All rings of power are equal,
But some rings of power are more equal than others."
 (George Orwell's The Lord of the Rings)




Re: Small fix on query_id_enabled

2024-02-09 Thread Julien Rouhaud
Hi,

On Fri, Feb 09, 2024 at 03:38:23PM +0900, Yugo NAGATA wrote:
>
> I found the comment on query_id_enabled looks inaccurate because this is
> never set to true when compute_query_id is ON.
>
>  /* True when compute_query_id is ON, or AUTO and a module requests them */
>  bool   query_id_enabled = false;
>
> Should we fix this as following (just fixing the place of a comma) ?
>
> /* True when compute_query_id is ON or AUTO, and a module requests them */

Agreed.

> Also, I think the name is a bit confusing for the same reason, that is,
> query_id_enabled may be false even when query id is computed in fact.
>
> Actually, this does not matter because we use IsQueryIdEnabled to check
> if query id is enabled,  instead of referring to a global variable
> (query_id_enabled or compute_query_id). But, just for making a code a bit
> more readable, how about renaming this to query_id_required which seems to
> stand for the meaning more correctly?

-1 for renaming to avoid breaking extensions that might access it.  We should
simply document for compute_query_id and query_id_enabled declaration that one
should instead use IsQueryIdEnabled() if they're interested in whether the core
queryid are computed or not.




RE: Psql meta-command conninfo+

2024-02-09 Thread Maiquel Grassi
>Sorry if this has been brought up, but I noticed that some of the

>information is listed below the result set:
>
  >  postgres=# \conninfo+
  >Current Connection Information
  >  -[ RECORD 1 ]--+-
  > Database   | postgres
  >  Authenticated User | nathan
  >  System User|
  >  Current User   | nathan
  >  Session User   | nathan
  > Session PID| 659410
  >  Server Version | 17devel
  >  Server Address | ::1
  >  Server Port| 5432
  >  Client Address | ::1
  >  Client Port| 59886
  >  Socket Directory   |
  >  Host   | ::1

  >  SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, 
compression: off)

>Shouldn't we move this information into the result set?  Separately, does
>the server version really belong here?  I'm not sure I would consider that
>to be connection information.

--//--


Hi Nathan,

The "Server Version" information is closely related to the connection. However,
it does seem to be an element that does not belong to this set. I removed this
column and left only what is truly connection info.


Regarding the functions "printSSLInfo()" and "printGSSInfo()", I agree that we
should include them in the returned dataset (as two additional columns). A good
argument is that this will make more sense when using \x (Expanded display).
However, they are declared and defined within the "command.c" file. To make
calls to "printSSLInfo()" and "printGSSInfo()" within "describe.c", we would 
need
to move their declarations to a new header and create a new C file. I believe
something like "ssl_gss_info.h" and "ssl_gss_info.c". I'm not sure, but at 
first glance,
this is what occurs to me. Do you have any better or more concise suggestions
for resolving this?

Regards,
Maiquel Grassi.


v11-0001-psql-meta-command-conninfo-plus.patch
Description: v11-0001-psql-meta-command-conninfo-plus.patch


v11-0001-psql-meta-command-conninfo-plus.patch
Description: v11-0001-psql-meta-command-conninfo-plus.patch


Re: Make COPY format extendable: Extract COPY TO format implementations

2024-02-09 Thread Michael Paquier
On Fri, Feb 09, 2024 at 04:32:05PM +0900, Sutou Kouhei wrote:
> In 
>   "Re: Make COPY format extendable: Extract COPY TO format implementations" 
> on Fri, 9 Feb 2024 13:21:34 +0900,
>   Michael Paquier  wrote:
>> A next step I think we could take is to split the binary-only and the
>> text/csv-only data in each cstate into their own structure to make the
>> structure, with an opaque pointer that custom formats could use, but a
>> lot of fields are shared as well.
> 
> It'll make COPY code base cleaner but it may decrease
> performance.

Perhaps, but I'm not sure, TBH.  But perhaps others can comment on
this point.  This surely needs to be studied closely.

> My suggestion:
> 1. Introduce Copy{To,From}Routine
>(We can do it based on the v14 patch.)
> 2. Add an opaque pointer to Copy{To,From}Routine
>(This must not have performance impact.)
> 3.a. Split format specific data to the opaque space
> 3.b. Add support for registering custom format handler by
>  creating a function
> 4. ...

4. is going to need 3.  At this point 3.b sounds like the main thing
to tackle first if we want to get something usable for the end-user
into this release, at least.  Still 2 is important for pluggability
as we pass the cstates across all the routines and custom formats want
to save their own data, so this split sounds OK.  I am not sure how
much of 3.a we really need to do for the in-core formats.

> I think that we should not use this approach for
> performance. We need to use "static inline" and constant
> argument instead something like the attached
> remove-copy-read-attributes.diff.

FWIW, using inlining did not show any performance change here.
Perhaps that's only because this is called in the COPY FROM path once
per row (even for the case of using 1 attribute with blackhole_am).
--
Michael


signature.asc
Description: PGP signature


Re: glibc qsort() vulnerability

2024-02-09 Thread Andrey M. Borodin



> On 8 Feb 2024, at 06:52, Nathan Bossart  wrote:
> 
> For the same compASC() test, I see an ~8.4% improvement with your int64
> code and a ~3.4% improvement with this:

If we care about branch prediction in comparison function, maybe we could 
produce sorting that inlines comparator, thus eliminating function call to 
comparator? We convert comparison logic to int, to extract comparison back then.

I bet “call" is more expensive than “if".


Best regards, Andrey Borodin.



Re: Printing backtrace of postgres processes

2024-02-09 Thread Michael Paquier
On Thu, Feb 08, 2024 at 12:25:18PM +0900, Michael Paquier wrote:
> In HandleLogBacktraceInterrupt(), we don't use backtrace_symbols() and
> rely on backtrace_symbols_fd() to avoid doing malloc() in the signal
> handler as mentioned in [1] back in 2022.  Perhaps the part about the
> fact that we don't use backtrace_symbols() should be mentioned
> explicitely in a comment rather than silently implied?  That's
> a very important point.

This has been itching me, so I have spent more time reading about
that, and while browsing signal(7) and signal-safety(7), I've first
noticed that this is not safe in the patch: 
+   write_stderr("logging current backtrace of process with PID %d:\n",
+MyProcPid);

Note that there's a write_stderr_signal_safe().

Anyway, I've been digging around the signal-safety of backtrace(3)
(even looking a bit at some GCC code, brrr), and I am under the
impression that backtrace() is just by nature not safe and also
dangerous in signal handlers.  One example of issue I've found:
https://github.com/gperftools/gperftools/issues/838

This looks like enough ground to me to reject the patch.
--
Michael


signature.asc
Description: PGP signature


Re: Synchronizing slots from primary to standby

2024-02-09 Thread Peter Smith
FYI -- I checked patch v81-0001 to find which of the #includes are
strictly needed.

==
src/backend/replication/logical/slotsync.c

1.
+#include "postgres.h"
+
+#include 
+
+#include "access/genam.h"
+#include "access/table.h"
+#include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
+#include "catalog/pg_database.h"
+#include "commands/dbcommands.h"
+#include "libpq/pqsignal.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
+#include "postmaster/fork_process.h"
+#include "postmaster/interrupt.h"
+#include "postmaster/postmaster.h"
+#include "replication/logical.h"
+#include "replication/logicallauncher.h"
+#include "replication/walreceiver.h"
+#include "replication/slotsync.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/procarray.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc_hooks.h"
+#include "utils/pg_lsn.h"
+#include "utils/ps_status.h"
+#include "utils/timeout.h"
+#include "utils/varlena.h"

Many of these #includes seem unnecessary. e.g. I was able to remove
all those that are commented-out below, and the file still compiles OK
with no warnings:

#include "postgres.h"

//#include 

//#include "access/genam.h"
//#include "access/table.h"
#include "access/xlog_internal.h"
#include "access/xlogrecovery.h"
#include "catalog/pg_database.h"
#include "commands/dbcommands.h"
//#include "libpq/pqsignal.h"
//#include "pgstat.h"
//#include "postmaster/bgworker.h"
//#include "postmaster/fork_process.h"
//#include "postmaster/interrupt.h"
//#include "postmaster/postmaster.h"
#include "replication/logical.h"
//#include "replication/logicallauncher.h"
//#include "replication/walreceiver.h"
#include "replication/slotsync.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/procarray.h"
//#include "tcop/tcopprot.h"
#include "utils/builtins.h"
//#include "utils/fmgroids.h"
//#include "utils/guc_hooks.h"
#include "utils/pg_lsn.h"
//#include "utils/ps_status.h"
//#include "utils/timeout.h"
//#include "utils/varlena.h"

==
src/backend/replication/slot.c

2.
 #include "pgstat.h"
+#include "replication/slotsync.h"
 #include "replication/slot.h"
+#include "replication/walsender.h"
 #include "storage/fd.h"

The #include "replication/walsender.h" seems to be unnecessary.

==
src/backend/replication/walsender.c

3.
 #include "replication/logical.h"
+#include "replication/slotsync.h"
 #include "replication/slot.h"

The #include "replication/slotsync.h" is needed, but only for Assert code:
Assert(am_cascading_walsender || IsSyncingReplicationSlots());

So you could #ifdef around that #include if you wish to.

==
Kind Regards,
Peter Smith.
Fujitsu Australia