Re: ResourceOwner refactoring

2024-06-06 Thread Heikki Linnakangas

On 06/06/2024 18:27, Robert Haas wrote:

On Thu, Jun 6, 2024 at 7:32 AM Heikki Linnakangas  wrote:

Barring objections, I'll commit this later today or tomorrow. Thanks for
the report!


Committed.


I think you may have forgotten to push.


Huh, you're right. I could swear I did...

Pushed now thanks!

--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2024-06-06 Thread Robert Haas
On Thu, Jun 6, 2024 at 7:32 AM Heikki Linnakangas  wrote:
> > Barring objections, I'll commit this later today or tomorrow. Thanks for
> > the report!
>
> Committed.

I think you may have forgotten to push.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: ResourceOwner refactoring

2024-06-06 Thread Heikki Linnakangas

On 05/06/2024 16:58, Heikki Linnakangas wrote:

On 04/06/2024 01:49, Heikki Linnakangas wrote:

A straightforward fix is to modify RelationFlushRelation() so that if
!IsTransactionState(), it just marks the entry as invalid instead of
calling RelationClearRelation(). That's what RelationClearRelation()
would do anyway, if it didn't hit the assertion first.


Here's a patch with that straightforward fix. Your test case hit the
"rd_createSubid != InvalidSubTransactionId" case, I expanded it to also
cover the "rd_firstRelfilelocatorSubid != InvalidSubTransactionId" case.


For the record, I got the above backwards: your test case covered the 
rd_firstRelfilelocatorSubid case and I expanded it to also cover the 
rd_createSubid case.



Barring objections, I'll commit this later today or tomorrow. Thanks for
the report!


Committed.

--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2024-06-05 Thread Heikki Linnakangas

On 04/06/2024 01:49, Heikki Linnakangas wrote:

A straightforward fix is to modify RelationFlushRelation() so that if
!IsTransactionState(), it just marks the entry as invalid instead of
calling RelationClearRelation(). That's what RelationClearRelation()
would do anyway, if it didn't hit the assertion first.


Here's a patch with that straightforward fix. Your test case hit the 
"rd_createSubid != InvalidSubTransactionId" case, I expanded it to also 
cover the "rd_firstRelfilelocatorSubid != InvalidSubTransactionId" case. 
Barring objections, I'll commit this later today or tomorrow. Thanks for 
the report!


I started a new thread with the bigger refactorings I had in mind: 
https://www.postgresql.org/message-id/9c9e8908-7b3e-4ce7-85a8-00c0e165a3d6%40iki.fi


--
Heikki Linnakangas
Neon (https://neon.tech)
From 227f173d1066955a2ab6aba12c508aaa1f416c27 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 5 Jun 2024 13:29:51 +0300
Subject: [PATCH 1/3] Make RelationFlushRelation() work without ResourceOwner
 during abort

ReorderBufferImmediateInvalidation() executes invalidation messages in
an aborted transaction. However, RelationFlushRelation sometimes
required a valid resource owner, to temporarily increment the refcount
of the relache entry. Commit b8bff07daa worked around that in the main
subtransaction abort function, AbortSubTransaction(), but missed this
similar case in ReorderBufferImmediateInvalidation().

To fix, introduce a separate function to invalidate a relcache
entry. It does the same thing as RelationClearRelation(rebuild==true)
does, when outside a transaction, but can be called without
incrementing the refcount.

Add regression test. Before this fix, it failed with:

ERROR: ResourceOwnerEnlarge called after release started

Reported-by: Alexander Lakhin 
Disussion: https://www.postgresql.org/message-id/e56be7d9-14b1-664d-0bfc-00ce97727...@gmail.com
---
 .../expected/decoding_in_xact.out | 48 +
 .../test_decoding/sql/decoding_in_xact.sql| 27 ++
 src/backend/access/transam/xact.c | 13 -
 src/backend/utils/cache/relcache.c| 54 ---
 4 files changed, 122 insertions(+), 20 deletions(-)

diff --git a/contrib/test_decoding/expected/decoding_in_xact.out b/contrib/test_decoding/expected/decoding_in_xact.out
index b65253f4630..8555b3d9436 100644
--- a/contrib/test_decoding/expected/decoding_in_xact.out
+++ b/contrib/test_decoding/expected/decoding_in_xact.out
@@ -79,6 +79,54 @@ SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'inc
  COMMIT
 (6 rows)
 
+-- Decoding works in transaction that issues DDL
+--
+-- We had issues handling relcache invalidations with these, see
+-- https://www.postgresql.org/message-id/e56be7d9-14b1-664d-0bfc-00ce97727...@gmail.com
+CREATE TABLE tbl_created_outside_xact(id SERIAL PRIMARY KEY);
+BEGIN;
+  -- TRUNCATE changes the relfilenode and sends relcache invalidation
+  TRUNCATE tbl_created_outside_xact;
+  INSERT INTO tbl_created_outside_xact(id) VALUES('1');
+  -- don't show yet, haven't committed
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+ data 
+--
+(0 rows)
+
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+ data 
+--
+ BEGIN
+ table public.tbl_created_outside_xact: TRUNCATE: (no-flags)
+ table public.tbl_created_outside_xact: INSERT: id[integer]:1
+ COMMIT
+(4 rows)
+
+set debug_logical_replication_streaming=immediate;
+BEGIN;
+  CREATE TABLE tbl_created_in_xact(id SERIAL PRIMARY KEY);
+  INSERT INTO tbl_created_in_xact VALUES (1);
+  CHECKPOINT; -- Force WAL flush, so that the above changes will be streamed
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');
+   data   
+--
+ opening a streamed block for transaction
+ streaming change for transaction
+ closing a streamed block for transaction
+(3 rows)
+
+COMMIT;
+reset debug_logical_replication_streaming;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+  data   
+-
+ BEGIN
+ table public.tbl_created_in_xact: INSERT: id[integer]:1
+ COMMIT
+(3 rows)
+
 SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
  ?column? 
 --
diff --git a/contrib/test_decoding/sql/decoding_in_xact.sql b/contrib/test_decoding/sql/decoding_in_xact.sql
index 108782dc2e9..841a92fa780 100644
--- a/contrib/test_decoding/sql/decoding_in_xact.sql
+++ 

Re: ResourceOwner refactoring

2024-06-03 Thread Heikki Linnakangas

Thanks for the testing!

On 01/06/2024 11:00, Alexander Lakhin wrote:

Hello Heikki,

I've stumbled upon yet another anomaly introduced with b8bff07da.

Please try the following script:
SELECT 'init' FROM pg_create_logical_replication_slot('slot',
    'test_decoding');
CREATE TABLE tbl(t text);
BEGIN;
TRUNCATE table tbl;

SELECT data FROM pg_logical_slot_get_changes('slot', NULL, NULL,
   'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');

On b8bff07da (and the current HEAD), it ends up with:
ERROR:  ResourceOwnerEnlarge called after release started

But on the previous commit, I get no error:
   data
--
(0 rows)


This is another case of the issue I tried to fix in commit b8bff07da 
with this:



--- b/src/backend/access/transam/xact.c
+++ a/src/backend/access/transam/xact.c
@@ -5279,7 +5279,20 @@
 
 		AtEOSubXact_RelationCshould be ache(false, s->subTransactionId,

  
s->parent->subTransactionId);
+
+
+   /*
+* AtEOSubXact_Inval sometimes needs to temporarily bump the 
refcount
+* on the relcache entries that it processes.  We cannot use the
+* subtransaction's resource owner anymore, because we've 
already
+* started releasing it.  But we can use the parent resource 
owner.
+*/
+   CurrentResourceOwner = s->parent->curTransactionOwner;
+
AtEOSubXact_Inval(false);
+
+   CurrentResourceOwner = s->curTransactionOwner;
+
ResourceOwnerRelease(s->curTransactionOwner,
 RESOURCE_RELEASE_LOCKS,
 false, false);


AtEOSubXact_Inval calls LocalExecuteInvalidationMessage(), which 
ultimately calls RelationFlushRelation(). Your test case reaches 
ReorderBufferExecuteInvalidations(), which also calls 
LocalExecuteInvalidationMessage(), in an already aborted subtransaction.


Looking closer at relcache.c, I think we need to make 
RelationFlushRelation() safe to call without a valid resource owner. 
There are already IsTransactionState() checks in 
RelationClearRelation(), and a comment noting that it does not touch 
CurrentResourceOwner. So we should make sure RelationFlushRelation() 
doesn't touch it either, and revert the above change to 
AbortTransaction(). I didn't feel very good about it in the first place, 
and now I see what the proper fix is.


A straightforward fix is to modify RelationFlushRelation() so that if 
!IsTransactionState(), it just marks the entry as invalid instead of 
calling RelationClearRelation(). That's what RelationClearRelation() 
would do anyway, if it didn't hit the assertion first.


RelationClearRelation() is complicated. Depending on the circumstances, 
it removes the relcache entry, rebuilds it, marks it as invalid, or 
performs a partial reload of a nailed relation. So before going for the 
straightforward fix, I'm going to see if I can refactor 
RelationClearRelation() to be less complicated.


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2024-06-01 Thread Alexander Lakhin

Hello Heikki,

I've stumbled upon yet another anomaly introduced with b8bff07da.

Please try the following script:
SELECT 'init' FROM pg_create_logical_replication_slot('slot',
  'test_decoding');
CREATE TABLE tbl(t text);
BEGIN;
TRUNCATE table tbl;

SELECT data FROM pg_logical_slot_get_changes('slot', NULL, NULL,
 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');

On b8bff07da (and the current HEAD), it ends up with:
ERROR:  ResourceOwnerEnlarge called after release started

But on the previous commit, I get no error:
 data
--
(0 rows)

Best regards,
Alexander




Re: ResourceOwner refactoring

2024-02-02 Thread Heikki Linnakangas

On 02/02/2024 11:00, Alexander Lakhin wrote:

Please try the following script:
mkdir /tmp/50m
sudo mount -t tmpfs -o size=50M tmpfs /tmp/50m
export PGDATA=/tmp/50m/tmpdb

initdb
pg_ctl -l server.log start

cat << 'EOF' | psql
CREATE TEMP TABLE t (a name, b name, c name, d name);
INSERT INTO t SELECT 'a', 'b', 'c', 'd' FROM generate_series(1, 1000) g;

COPY t TO '/tmp/t.data';
SELECT 'COPY t FROM ''/tmp/t.data''' FROM generate_series(1, 100)
\gexec
EOF

which produces an unexpected error, a warning, and an assertion failure,
starting from b8bff07da:


Fixed, thanks for the report!

Comparing ExtendBufferedRelLocal() and ExtendBufferedRelShared(), it's 
easy to see that ExtendBufferedRelLocal() was missing a 
ResourceOwnerEnlarge() call in the loop. But it's actually a bit more 
subtle: it was correct without the ResourceOwnerEnlarge() call until 
commit b8bff07da, because ExtendBufferedRelLocal() unpins the old buffer 
pinning the new one, while ExtendBufferedRelShared() does it the other 
way 'round. The implicit assumption was that unpinning the old buffer 
ensures that you can pin a new one. That no longer holds with commit 
b8bff07da. Remembering a new resource expects there to be a free slot in 
the fixed-size array, but if the forgotten resource was in the hash, 
rather than the array, forgetting it doesn't make space in the array.


We also make that assumption here in BufferAlloc:


/*
 * Got a collision. Someone has already done what we were about 
to do.
 * We'll just handle this as if it were found in the buffer 
pool in
 * the first place.  First, give up the buffer we were planning 
to
 * use.
 *
 * We could do this after releasing the partition lock, but 
then we'd
 * have to call ResourceOwnerEnlarge() & 
ReservePrivateRefCountEntry()
 * before acquiring the lock, for the rare case of such a 
collision.
 */
UnpinBuffer(victim_buf_hdr);


It turns out to be OK in that case, because it unpins the buffer that 
was the last one pinned. That does ensure that you have one free slot in 
the array, but forgetting anything other than the most recently 
remembered resource does not.


I've added a note to that in ResourceOwnerForget. I read through the 
other callers of ResourceOwnerRemember and PinBuffer, but didn't find 
any other unsafe uses. I'm not too happy with this subtlety, but at 
least it's documented now.


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2024-02-02 Thread Alexander Lakhin

Hello Heikki,

08.11.2023 14:37, Heikki Linnakangas wrote:


Fixed these, and pushed. Thanks everyone for reviewing!



Please try the following script:
mkdir /tmp/50m
sudo mount -t tmpfs -o size=50M tmpfs /tmp/50m
export PGDATA=/tmp/50m/tmpdb

initdb
pg_ctl -l server.log start

cat << 'EOF' | psql
CREATE TEMP TABLE t (a name, b name, c name, d name);
INSERT INTO t SELECT 'a', 'b', 'c', 'd' FROM generate_series(1, 1000) g;

COPY t TO '/tmp/t.data';
SELECT 'COPY t FROM ''/tmp/t.data''' FROM generate_series(1, 100)
\gexec
EOF

which produces an unexpected error, a warning, and an assertion failure,
starting from b8bff07da:
..
COPY 1000
ERROR:  could not extend file "base/5/t2_16386" with FileFallocate(): No space 
left on device
HINT:  Check free disk space.
CONTEXT:  COPY t, line 1000
ERROR:  ResourceOwnerRemember called but array was full
CONTEXT:  COPY t, line 1000
WARNING:  local buffer refcount leak: [-499] (rel=base/5/t2_16386, 
blockNum=1483, flags=0x200, refcount=0 1)
server closed the connection unexpectedly
    This probably means the server terminated abnormally
    before or while processing the request.
connection to server was lost
2024-02-02 08:29:24.434 UTC [2052831] LOG:  server process (PID 2052839) was 
terminated by signal 6: Aborted

server.log contains:
TRAP: failed Assert("RefCountErrors == 0"), File: "localbuf.c", Line: 804, PID: 
2052839

Best regards,
Alexander




Re: ResourceOwner refactoring

2024-01-16 Thread Anton A. Melnikov



On 16.01.2024 14:54, Heikki Linnakangas wrote:

Fixed, thanks. mark_pgdllimport.pl also highlighted two new variables in 
walsummarizer.h, fixed those too.



Thanks!

Have a nice day!

--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company




Re: ResourceOwner refactoring

2024-01-16 Thread Heikki Linnakangas

On 16/01/2024 13:23, Anton A. Melnikov wrote:

Hello!

Found possibly missing PGDLLIMPORT in the definition of the

extern const ResourceOwnerDesc buffer_io_resowner_desc;
and
extern const ResourceOwnerDesc buffer_pin_resowner_desc;

callbacks in the src/include/storage/buf_internals.h

Please take a look on it.


Fixed, thanks. mark_pgdllimport.pl also highlighted two new variables in 
walsummarizer.h, fixed those too.


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2024-01-16 Thread Anton A. Melnikov

Hello!

Found possibly missing PGDLLIMPORT in the definition of the

extern const ResourceOwnerDesc buffer_io_resowner_desc;
and
extern const ResourceOwnerDesc buffer_pin_resowner_desc;

callbacks in the src/include/storage/buf_internals.h

Please take a look on it.

With the best regards,

--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company




Re: ResourceOwner refactoring

2023-11-17 Thread Andres Freund
Hi,

On 2023-11-17 12:44:41 -0800, Andres Freund wrote:
> On 2023-11-07 13:28:28 +0200, Heikki Linnakangas wrote:
> > I feel pretty good about this overall. Barring objections or new cfbot
> > failures, I will commit this in the next few days.
> 
> I am working on rebasing the AIO patch over this. I think I found a crash
> that's unrelated to AIO.
> 
> #4  0x00ea7631 in ExceptionalCondition (conditionName=0x4ba6f1 
> "owner->narr == 0",
> fileName=0x4ba628 
> "../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c",
>  lineNumber=354)
> at 
> ../../../../../home/andres/src/postgresql/src/backend/utils/error/assert.c:66
> #5  0x00ef13b5 in ResourceOwnerReleaseAll (owner=0x3367d48, 
> phase=RESOURCE_RELEASE_BEFORE_LOCKS, printLeakWarnings=false)
> at 
> ../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:354
> #6  0x00ef1d9c in ResourceOwnerReleaseInternal (owner=0x3367d48, 
> phase=RESOURCE_RELEASE_BEFORE_LOCKS, isCommit=false, isTopLevel=true)
> at 
> ../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:717
> #7  0x00ef1c89 in ResourceOwnerRelease (owner=0x3367d48, 
> phase=RESOURCE_RELEASE_BEFORE_LOCKS, isCommit=false, isTopLevel=true)
> at 
> ../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:644
> #8  0x008c1f87 in AbortTransaction () at 
> ../../../../../home/andres/src/postgresql/src/backend/access/transam/xact.c:2851
> #9  0x008c4ae0 in AbortOutOfAnyTransaction () at 
> ../../../../../home/andres/src/postgresql/src/backend/access/transam/xact.c:4761
> #10 0x00ec1502 in ShutdownPostgres (code=1, arg=0) at 
> ../../../../../home/andres/src/postgresql/src/backend/utils/init/postinit.c:1357
> #11 0x00c942e5 in shmem_exit (code=1) at 
> ../../../../../home/andres/src/postgresql/src/backend/storage/ipc/ipc.c:243
> 
> I think the problem is that ResourceOwnerSort() looks at owner->nhash == 0,
> whereas ResourceOwnerReleaseAll() looks at !owner->hash. Therefore it's
> possible to reach ResourceOwnerReleaseAll() with owner->hash != NULL while
> also having owner->narr > 0.
> 
> I think both checks for !owner->hash in ResourceOwnerReleaseAll() need to
> instead check owner->nhash == 0.
> 
> I'm somewhat surprised that this is only hit with the AIO branch. I guess one
> needs to be somewhat unlucky to end up with a hashtable but 0 elements in it
> at the time of resowner release. But still somewhat surprising.

Oops - I hadn't rebased far enough at this point... That was already fixed in
8f4a1ab471e.

Seems like it'd be good to cover that path in one of the tests?

- Andres




Re: ResourceOwner refactoring

2023-11-17 Thread Andres Freund
Hi,

On 2023-11-07 13:28:28 +0200, Heikki Linnakangas wrote:
> I feel pretty good about this overall. Barring objections or new cfbot
> failures, I will commit this in the next few days.

I am working on rebasing the AIO patch over this. I think I found a crash
that's unrelated to AIO.

#4  0x00ea7631 in ExceptionalCondition (conditionName=0x4ba6f1 
"owner->narr == 0",
fileName=0x4ba628 
"../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c",
 lineNumber=354)
at 
../../../../../home/andres/src/postgresql/src/backend/utils/error/assert.c:66
#5  0x00ef13b5 in ResourceOwnerReleaseAll (owner=0x3367d48, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS, printLeakWarnings=false)
at 
../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:354
#6  0x00ef1d9c in ResourceOwnerReleaseInternal (owner=0x3367d48, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS, isCommit=false, isTopLevel=true)
at 
../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:717
#7  0x00ef1c89 in ResourceOwnerRelease (owner=0x3367d48, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS, isCommit=false, isTopLevel=true)
at 
../../../../../home/andres/src/postgresql/src/backend/utils/resowner/resowner.c:644
#8  0x008c1f87 in AbortTransaction () at 
../../../../../home/andres/src/postgresql/src/backend/access/transam/xact.c:2851
#9  0x008c4ae0 in AbortOutOfAnyTransaction () at 
../../../../../home/andres/src/postgresql/src/backend/access/transam/xact.c:4761
#10 0x00ec1502 in ShutdownPostgres (code=1, arg=0) at 
../../../../../home/andres/src/postgresql/src/backend/utils/init/postinit.c:1357
#11 0x00c942e5 in shmem_exit (code=1) at 
../../../../../home/andres/src/postgresql/src/backend/storage/ipc/ipc.c:243

I think the problem is that ResourceOwnerSort() looks at owner->nhash == 0,
whereas ResourceOwnerReleaseAll() looks at !owner->hash. Therefore it's
possible to reach ResourceOwnerReleaseAll() with owner->hash != NULL while
also having owner->narr > 0.

I think both checks for !owner->hash in ResourceOwnerReleaseAll() need to
instead check owner->nhash == 0.


I'm somewhat surprised that this is only hit with the AIO branch. I guess one
needs to be somewhat unlucky to end up with a hashtable but 0 elements in it
at the time of resowner release. But still somewhat surprising.


Seperately, I see that resowner_private.h still exists in the repo, I think
that should be deleted, as many of the functions don't exist anymore?


Lastly, I think it'd be good to assert that we're not releasing the resowner
in ResourceOwnerRememberLock(). It's currently not strictly required, but that
seems like it's just leaking an implementation detail out?

Greetings,

Andres Freund




Re: ResourceOwner refactoring

2023-11-15 Thread Heikki Linnakangas

On 13/11/2023 01:08, Thomas Munro wrote:

On Mon, Nov 13, 2023 at 11:16 AM Heikki Linnakangas  wrote:

On 11/11/2023 14:00, Alexander Lakhin wrote:

10.11.2023 17:26, Heikki Linnakangas wrote:

I think that is surprising behavior from the DSA facility. When you make 
allocations with dsa_allocate() or just call
dsa_get_address() on an existing dsa_pointer, you wouldn't expect the current 
resource owner to matter. I think
dsa_create/attach() should store the current resource owner in the dsa_area, 
for use in subsequent operations on the
DSA, per attached patch (0002-Fix-dsa.c-with-different-resource-owners.patch).


Yeah, I agree that it is surprising and dangerous that the DSM
segments can have different owners if you're not careful, and it's not
clear that you have to be.  Interesting that no one ever reported
breakage in parallel query due to this thinko, presumably because the
current resource owner always turns out to be either the same one or
something with longer life.


Yep. I ran check-world with an extra assertion that 
"CurrentResourceOwner == area->resowner || area->resowner == NULL" to 
verify that. No failures.



With the patch 0002 applied, I'm observing another anomaly:


Ok, so the ownership of a dsa was still muddled :-(. Attached is a new
version that goes a little further. It replaces the whole
'mapping_pinned' flag in dsa_area with the 'resowner'. When a mapping is
pinned, it means that 'resowner == NULL'. This is now similar to how the
resowner field and pinning works in dsm.c.


This patch makes sense to me.

It might be tempting to delete the dsa_pin_mapping() interface
completely and say that if CurrentResourceOwner == NULL, then it's
effectively (what we used to call) pinned, but I think it's useful for
exception-safe construction of multiple objects to be able to start
out with a resource owner and then later 'commit' by clearing it.  As
seen in GetSessionDsmHandle().


Yeah that's useful. I don't like the "pinned mapping" term here. It's 
confusing because we also have dsa_pin(), which means something 
different: the area will continue to exist even after all backends have 
detached from it. I think we should rename 'dsa_pin_mapping()' to 
'dsa_forget_resowner()' or something like that.


I pushed these fixes, without that renaming. Let's do that as a separate 
patch if we like that.


I didn't backpatch this for now, because we don't seem to have a live 
bug in back branches. You could argue for backpatching because this 
could bite us in the future if we backpatch something else that uses 
dsa's, or if there are extensions using it. We can do that later after 
we've had this in 'master' for a while.


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2023-11-12 Thread Thomas Munro
On Mon, Nov 13, 2023 at 11:16 AM Heikki Linnakangas  wrote:
> On 11/11/2023 14:00, Alexander Lakhin wrote:
> > 10.11.2023 17:26, Heikki Linnakangas wrote:
> >> I think that is surprising behavior from the DSA facility. When you make 
> >> allocations with dsa_allocate() or just call
> >> dsa_get_address() on an existing dsa_pointer, you wouldn't expect the 
> >> current resource owner to matter. I think
> >> dsa_create/attach() should store the current resource owner in the 
> >> dsa_area, for use in subsequent operations on the
> >> DSA, per attached patch 
> >> (0002-Fix-dsa.c-with-different-resource-owners.patch).

Yeah, I agree that it is surprising and dangerous that the DSM
segments can have different owners if you're not careful, and it's not
clear that you have to be.  Interesting that no one ever reported
breakage in parallel query due to this thinko, presumably because the
current resource owner always turns out to be either the same one or
something with longer life.

> > With the patch 0002 applied, I'm observing another anomaly:
>
> Ok, so the ownership of a dsa was still muddled :-(. Attached is a new
> version that goes a little further. It replaces the whole
> 'mapping_pinned' flag in dsa_area with the 'resowner'. When a mapping is
> pinned, it means that 'resowner == NULL'. This is now similar to how the
> resowner field and pinning works in dsm.c.

This patch makes sense to me.

It might be tempting to delete the dsa_pin_mapping() interface
completely and say that if CurrentResourceOwner == NULL, then it's
effectively (what we used to call) pinned, but I think it's useful for
exception-safe construction of multiple objects to be able to start
out with a resource owner and then later 'commit' by clearing it.  As
seen in GetSessionDsmHandle().

I don't love the way this stuff is not very explicit, and if we're
going to keep doing this 'dynamic scope' stuff then I wish we could
find a scoping notation that looks more like the stuff one sees in
other languages that say something like
"with-resource-owner(area->resowner) { block of code }".




Re: ResourceOwner refactoring

2023-11-12 Thread Heikki Linnakangas

On 11/11/2023 14:00, Alexander Lakhin wrote:

10.11.2023 17:26, Heikki Linnakangas wrote:


I think that is surprising behavior from the DSA facility. When you make 
allocations with dsa_allocate() or just call
dsa_get_address() on an existing dsa_pointer, you wouldn't expect the current 
resource owner to matter. I think
dsa_create/attach() should store the current resource owner in the dsa_area, 
for use in subsequent operations on the
DSA, per attached patch (0002-Fix-dsa.c-with-different-resource-owners.patch).



With the patch 0002 applied, I'm observing another anomaly:


Ok, so the ownership of a dsa was still muddled :-(. Attached is a new 
version that goes a little further. It replaces the whole 
'mapping_pinned' flag in dsa_area with the 'resowner'. When a mapping is 
pinned, it means that 'resowner == NULL'. This is now similar to how the 
resowner field and pinning works in dsm.c.


--
Heikki Linnakangas
Neon (https://neon.tech)
From 90a0f1c8204f01c423b60be032ea521e4afd7473 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Fri, 10 Nov 2023 13:54:11 +0200
Subject: [PATCH 1/3] Add test_dsa module.

This covers basic calls within a single backend process, and a test
that uses a different resource owner. The resource owner test
demonstrates that calling dsa_allocate() or dsa_get_address() while in
a different resource owner than in the dsa_create/attach call causes a
segfault. I think that's a bug.

This resource owner issue was originally found by Alexander Lakhin,
exposed by my large ResourceOwner rewrite commit.

Reported-by: Alexander Lakhin
Discussion: https://www.postgresql.org/message-id/11b70743-c5f3-3910-8e5b-dd6c115ff829%40gmail.com
---
 src/test/modules/Makefile |   1 +
 src/test/modules/meson.build  |   1 +
 src/test/modules/test_dsa/Makefile|  23 
 .../modules/test_dsa/expected/test_dsa.out|  13 ++
 src/test/modules/test_dsa/meson.build |  33 +
 src/test/modules/test_dsa/sql/test_dsa.sql|   4 +
 src/test/modules/test_dsa/test_dsa--1.0.sql   |  12 ++
 src/test/modules/test_dsa/test_dsa.c  | 115 ++
 src/test/modules/test_dsa/test_dsa.control|   4 +
 9 files changed, 206 insertions(+)
 create mode 100644 src/test/modules/test_dsa/Makefile
 create mode 100644 src/test/modules/test_dsa/expected/test_dsa.out
 create mode 100644 src/test/modules/test_dsa/meson.build
 create mode 100644 src/test/modules/test_dsa/sql/test_dsa.sql
 create mode 100644 src/test/modules/test_dsa/test_dsa--1.0.sql
 create mode 100644 src/test/modules/test_dsa/test_dsa.c
 create mode 100644 src/test/modules/test_dsa/test_dsa.control

diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 738b715e792..a18e4d28a04 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -17,6 +17,7 @@ SUBDIRS = \
 		  test_copy_callbacks \
 		  test_custom_rmgrs \
 		  test_ddl_deparse \
+		  test_dsa \
 		  test_extensions \
 		  test_ginpostinglist \
 		  test_integerset \
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index d4828dc44d5..4e83c0f8d74 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -14,6 +14,7 @@ subdir('test_bloomfilter')
 subdir('test_copy_callbacks')
 subdir('test_custom_rmgrs')
 subdir('test_ddl_deparse')
+subdir('test_dsa')
 subdir('test_extensions')
 subdir('test_ginpostinglist')
 subdir('test_integerset')
diff --git a/src/test/modules/test_dsa/Makefile b/src/test/modules/test_dsa/Makefile
new file mode 100644
index 000..77583464dca
--- /dev/null
+++ b/src/test/modules/test_dsa/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_dsa/Makefile
+
+MODULE_big = test_dsa
+OBJS = \
+	$(WIN32RES) \
+	test_dsa.o
+PGFILEDESC = "test_dsa - test code for dynamic shared memory areas"
+
+EXTENSION = test_dsa
+DATA = test_dsa--1.0.sql
+
+REGRESS = test_dsa
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_dsa
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_dsa/expected/test_dsa.out b/src/test/modules/test_dsa/expected/test_dsa.out
new file mode 100644
index 000..266010e77fe
--- /dev/null
+++ b/src/test/modules/test_dsa/expected/test_dsa.out
@@ -0,0 +1,13 @@
+CREATE EXTENSION test_dsa;
+SELECT test_dsa_basic();
+ test_dsa_basic 
+
+ 
+(1 row)
+
+SELECT test_dsa_resowners();
+ test_dsa_resowners 
+
+ 
+(1 row)
+
diff --git a/src/test/modules/test_dsa/meson.build b/src/test/modules/test_dsa/meson.build
new file mode 100644
index 000..21738290ad5
--- /dev/null
+++ b/src/test/modules/test_dsa/meson.build
@@ -0,0 +1,33 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+test_dsa_sources = files(
+  'test_dsa.c',
+)
+
+if host_system == 'windows'
+  

Re: ResourceOwner refactoring

2023-11-11 Thread Alexander Lakhin

Hello Heikki,

10.11.2023 17:26, Heikki Linnakangas wrote:


I think that is surprising behavior from the DSA facility. When you make allocations with dsa_allocate() or just call 
dsa_get_address() on an existing dsa_pointer, you wouldn't expect the current resource owner to matter. I think 
dsa_create/attach() should store the current resource owner in the dsa_area, for use in subsequent operations on the 
DSA, per attached patch (0002-Fix-dsa.c-with-different-resource-owners.patch).




With the patch 0002 applied, I'm observing another anomaly:
CREATE TABLE t(t1 text, t2 text);
INSERT INTO t SELECT md5(g::text), '12345678901234567890' FROM 
generate_series(1, 10) g;
CREATE INDEX tidx ON t(t1);

CREATE FUNCTION f() RETURNS TABLE (t1 text, t2 text) AS 'BEGIN END' LANGUAGE 
plpgsql;

SELECT * FROM f();

gives me under Valgrind:
2023-11-11 11:54:18.964 UTC|law|regression|654f5d2e.3c7a92|LOG: statement: 
SELECT * FROM f();
==00:00:00:49.589 3963538== Invalid read of size 1
==00:00:00:49.589 3963538==    at 0x9C8785: ResourceOwnerEnlarge 
(resowner.c:454)
==00:00:00:49.589 3963538==    by 0x7507C4: dsm_create_descriptor (dsm.c:1207)
==00:00:00:49.589 3963538==    by 0x74EF71: dsm_create (dsm.c:538)
==00:00:00:49.589 3963538==    by 0x9B7BEA: make_new_segment (dsa.c:2171)
==00:00:00:49.589 3963538==    by 0x9B6E28: ensure_active_superblock 
(dsa.c:1696)
==00:00:00:49.589 3963538==    by 0x9B67DD: alloc_object (dsa.c:1487)
==00:00:00:49.589 3963538==    by 0x9B5064: dsa_allocate_extended (dsa.c:816)
==00:00:00:49.589 3963538==    by 0x978A4C: share_tupledesc (typcache.c:2742)
==00:00:00:49.589 3963538==    by 0x978C07: 
find_or_make_matching_shared_tupledesc (typcache.c:2796)
==00:00:00:49.589 3963538==    by 0x977652: assign_record_type_typmod 
(typcache.c:1995)
==00:00:00:49.589 3963538==    by 0x9885A2: internal_get_result_type 
(funcapi.c:462)
==00:00:00:49.589 3963538==    by 0x98800D: get_expr_result_type (funcapi.c:299)
==00:00:00:49.589 3963538==  Address 0x72f9bf0 is 2,096 bytes inside a recently 
re-allocated block of size 16,384 alloc'd
==00:00:00:49.589 3963538==    at 0x4848899: malloc (vg_replace_malloc.c:381)
==00:00:00:49.589 3963538==    by 0x9B1F94: AllocSetAlloc (aset.c:928)
==00:00:00:49.589 3963538==    by 0x9C1162: MemoryContextAllocZero (mcxt.c:1076)
==00:00:00:49.589 3963538==    by 0x9C872C: ResourceOwnerCreate (resowner.c:423)
==00:00:00:49.589 3963538==    by 0x2CC522: AtStart_ResourceOwner (xact.c:1211)
==00:00:00:49.589 3963538==    by 0x2CD52A: StartTransaction (xact.c:2084)
==00:00:00:49.589 3963538==    by 0x2CE442: StartTransactionCommand 
(xact.c:2948)
==00:00:00:49.589 3963538==    by 0x992D69: InitPostgres (postinit.c:860)
==00:00:00:49.589 3963538==    by 0x791E6E: PostgresMain (postgres.c:4209)
==00:00:00:49.589 3963538==    by 0x6B13C4: BackendRun (postmaster.c:4423)
==00:00:00:49.589 3963538==    by 0x6B09EB: BackendStartup (postmaster.c:4108)
==00:00:00:49.589 3963538==    by 0x6AD0A6: ServerLoop (postmaster.c:1767)

without Valgrind I get:
2023-11-11 11:08:38.511 UTC|law|regression|654f60b6.3ca7eb|ERROR: 
ResourceOwnerEnlarge called after release started
2023-11-11 11:08:38.511 UTC|law|regression|654f60b6.3ca7eb|STATEMENT:  SELECT * 
FROM f();

Best regards,
Alexander




Re: ResourceOwner refactoring

2023-11-10 Thread Andres Freund
Hi,

On 2023-11-10 16:26:51 +0200, Heikki Linnakangas wrote:
> The quick, straightforward fix is to move the "CurrentResourceOwner = NULL"
> line earlier in CommitTransaction, per attached
> 0003-Clear-CurrentResourceOwner-earlier-in-CommitTransact.patch. You're not
> allowed to use the resource owner after you start to release it; it was a
> bit iffy even before the ResourceOwner rewrite but now it's explicitly
> forbidden. By clearing CurrentResourceOwner as soon as we start releasing
> it, we can prevent any accidental use.
> 
> When CurrentResourceOwner == NULL, dsm_attach() returns a handle that's not
> associated with any ResourceOwner. That's appropriate for the pgstat case.
> The DSA is "pinned", so the handle is forgotten from the ResourceOwner right
> after calling dsm_attach() anyway.

Yea - I wonder if we should have a version of dsm_attach() that pins at the
same time. It's pretty silly to first attach (remembering the dsm) and then
immediately pin (forgetting the dsm).


> Looking closer at dsa.c, I think this is a wider problem though. The
> comments don't make it very clear how it's supposed to interact with
> ResourceOwners. There's just this brief comment in dsa_pin_mapping():
> 
> >  * By default, areas are owned by the current resource owner, which means 
> > they
> >  * are detached automatically when that scope ends.
> 
> The dsa_area struct isn't directly owned by any ResourceOwner though. The
> DSM segments created by dsa_create() or dsa_attach() are.

But there's a relationship from the dsa too, via on_dsm_detach().


> But the functions dsa_allocate() and dsa_get_address() can create or attach
> more DSM segments to the area, and they will be owned by the by the current
> resource owner *at the time of the call*.

Yea, that doesn't seem great. It shouldn't affect the stats could though, due
the dsa being pinned.

> I think that is surprising behavior from the DSA facility. When you make
> allocations with dsa_allocate() or just call dsa_get_address() on an
> existing dsa_pointer, you wouldn't expect the current resource owner to
> matter. I think dsa_create/attach() should store the current resource owner
> in the dsa_area, for use in subsequent operations on the DSA, per attached
> patch (0002-Fix-dsa.c-with-different-resource-owners.patch).

Yea, that seems the right direction from here.

Greetings,

Andres Freund




Re: ResourceOwner refactoring

2023-11-10 Thread Heikki Linnakangas

Thanks for the testing again!

On 10/11/2023 11:00, Alexander Lakhin wrote:

I could see two failure modes:
2023-11-10 08:42:28.870 UTC [1163274] ERROR:  ResourceOwnerEnlarge called after 
release started
2023-11-10 08:42:28.870 UTC [1163274] STATEMENT:  drop table t;
2023-11-10 08:42:28.870 UTC [1163274] WARNING:  AbortTransaction while in 
COMMIT state
2023-11-10 08:42:28.870 UTC [1163274] PANIC:  cannot abort transaction 906, it 
was already committed

2023-11-10 08:43:27.897 UTC [1164148] ERROR:  ResourceOwnerEnlarge called after 
release started
2023-11-10 08:43:27.897 UTC [1164148] STATEMENT:  DROP DATABASE db69;
2023-11-10 08:43:27.897 UTC [1164148] WARNING:  AbortTransaction while in 
COMMIT state
2023-11-10 08:43:27.897 UTC [1164148] PANIC:  cannot abort transaction 1043, it 
was already committed

The stack trace for the second ERROR (ResourceOwnerEnlarge called ...) is:
...
#6  0x558af5b2f35c in ResourceOwnerEnlarge (owner=0x558af716f3c8) at 
resowner.c:455
#7  0x558af5888f18 in dsm_create_descriptor () at dsm.c:1207
#8  0x558af5889205 in dsm_attach (h=3172038420) at dsm.c:697
#9  0x558af5b1ebed in get_segment_by_index (area=0x558af711da18, index=2) 
at dsa.c:1764
#10 0x558af5b1ea4b in dsa_get_address (area=0x558af711da18, 
dp=2199023329568) at dsa.c:970
#11 0x558af5669366 in dshash_seq_next (status=0x7ffdd5912fd0) at 
dshash.c:687
#12 0x558af5901998 in pgstat_drop_database_and_contents (dboid=16444) at 
pgstat_shmem.c:830
#13 0x558af59016f0 in pgstat_drop_entry (kind=PGSTAT_KIND_DATABASE, 
dboid=16444, objoid=0) at pgstat_shmem.c:888
#14 0x558af59044eb in AtEOXact_PgStat_DroppedStats 
(xact_state=0x558af7111ee0, isCommit=true) at pgstat_xact.c:88
#15 0x558af59043c7 in AtEOXact_PgStat (isCommit=true, parallel=false) at 
pgstat_xact.c:55
#16 0x558af53c782e in CommitTransaction () at xact.c:2371
#17 0x558af53c709e in CommitTransactionCommand () at xact.c:306
...


The quick, straightforward fix is to move the "CurrentResourceOwner = 
NULL" line earlier in CommitTransaction, per attached 
0003-Clear-CurrentResourceOwner-earlier-in-CommitTransact.patch. You're 
not allowed to use the resource owner after you start to release it; it 
was a bit iffy even before the ResourceOwner rewrite but now it's 
explicitly forbidden. By clearing CurrentResourceOwner as soon as we 
start releasing it, we can prevent any accidental use.


When CurrentResourceOwner == NULL, dsm_attach() returns a handle that's 
not associated with any ResourceOwner. That's appropriate for the pgstat 
case. The DSA is "pinned", so the handle is forgotten from the 
ResourceOwner right after calling dsm_attach() anyway.


Looking closer at dsa.c, I think this is a wider problem though. The 
comments don't make it very clear how it's supposed to interact with 
ResourceOwners. There's just this brief comment in dsa_pin_mapping():



 * By default, areas are owned by the current resource owner, which means they
 * are detached automatically when that scope ends.


The dsa_area struct isn't directly owned by any ResourceOwner though. 
The DSM segments created by dsa_create() or dsa_attach() are.


But the functions dsa_allocate() and dsa_get_address() can create or 
attach more DSM segments to the area, and they will be owned by the by 
the current resource owner *at the time of the call*. So if you call 
dsa_get_address() while in a different resource owner, things get very 
confusing. The attached new test module demonstrates that 
(0001-Add-test_dsa-module.patch), here's a shortened version:


a = dsa_create(tranche_id);

/* Switch to new resource owner */
oldowner = CurrentResourceOwner;
childowner = ResourceOwnerCreate(oldowner, "temp owner");
CurrentResourceOwner = childowner;

/* make a bunch of allocations */
for (int i = 0; i < 1; i++)
p[i] = dsa_allocate(a, 1000);

/* Release the child resource owner */
CurrentResourceOwner = oldowner;
ResourceOwnerRelease(childowner,
 RESOURCE_RELEASE_BEFORE_LOCKS,
 true, false);
ResourceOwnerRelease(childowner,
 RESOURCE_RELEASE_LOCKS,
 true, false);
ResourceOwnerRelease(childowner,
 RESOURCE_RELEASE_AFTER_LOCKS,
 true, false);
ResourceOwnerDelete(childowner);


dsa_detach(a);

This first prints warnings on The ResourceOwnerRelease() calls:

2023-11-10 13:57:21.475 EET [745346] WARNING:  resource was not closed: 
dynamic shared memory segment 2395813396
2023-11-10 13:57:21.475 EET [745346] WARNING:  resource was not closed: 
dynamic shared memory segment 3922992700
2023-11-10 13:57:21.475 EET [745346] WARNING:  resource was not 

Re: ResourceOwner refactoring

2023-11-10 Thread Alexander Lakhin

Hello Heikki,

09.11.2023 02:48, Heikki Linnakangas wrote:


Thanks for the testing! Fixed. ...


Thank you for the fix!

Please look at one more failure caused be the new implementation of
ResourceOwners:
numdbs=80
for ((i=1;i<=10;i++)); do
echo "ITERATION $i"

for ((d=1;d<=$numdbs;d++)); do createdb db$d; done

for ((d=1;d<=$numdbs;d++)); do
echo "
create table t(t1 text);
drop table t;
" | psql -d db$d >psql-$d.log 2>&1 &
done
wait
grep 'PANIC' server.log  && break;

for ((d=1;d<=$numdbs;d++)); do dropdb db$d; done
grep 'PANIC' server.log  && break;
done

I could see two failure modes:
2023-11-10 08:42:28.870 UTC [1163274] ERROR:  ResourceOwnerEnlarge called after 
release started
2023-11-10 08:42:28.870 UTC [1163274] STATEMENT:  drop table t;
2023-11-10 08:42:28.870 UTC [1163274] WARNING:  AbortTransaction while in 
COMMIT state
2023-11-10 08:42:28.870 UTC [1163274] PANIC:  cannot abort transaction 906, it 
was already committed

2023-11-10 08:43:27.897 UTC [1164148] ERROR:  ResourceOwnerEnlarge called after 
release started
2023-11-10 08:43:27.897 UTC [1164148] STATEMENT:  DROP DATABASE db69;
2023-11-10 08:43:27.897 UTC [1164148] WARNING:  AbortTransaction while in 
COMMIT state
2023-11-10 08:43:27.897 UTC [1164148] PANIC:  cannot abort transaction 1043, it 
was already committed

The stack trace for the second ERROR (ResourceOwnerEnlarge called ...) is:
...
#6  0x558af5b2f35c in ResourceOwnerEnlarge (owner=0x558af716f3c8) at 
resowner.c:455
#7  0x558af5888f18 in dsm_create_descriptor () at dsm.c:1207
#8  0x558af5889205 in dsm_attach (h=3172038420) at dsm.c:697
#9  0x558af5b1ebed in get_segment_by_index (area=0x558af711da18, index=2) 
at dsa.c:1764
#10 0x558af5b1ea4b in dsa_get_address (area=0x558af711da18, 
dp=2199023329568) at dsa.c:970
#11 0x558af5669366 in dshash_seq_next (status=0x7ffdd5912fd0) at 
dshash.c:687
#12 0x558af5901998 in pgstat_drop_database_and_contents (dboid=16444) at 
pgstat_shmem.c:830
#13 0x558af59016f0 in pgstat_drop_entry (kind=PGSTAT_KIND_DATABASE, 
dboid=16444, objoid=0) at pgstat_shmem.c:888
#14 0x558af59044eb in AtEOXact_PgStat_DroppedStats 
(xact_state=0x558af7111ee0, isCommit=true) at pgstat_xact.c:88
#15 0x558af59043c7 in AtEOXact_PgStat (isCommit=true, parallel=false) at 
pgstat_xact.c:55
#16 0x558af53c782e in CommitTransaction () at xact.c:2371
#17 0x558af53c709e in CommitTransactionCommand () at xact.c:306
...

Best regards,
Alexander




Re: ResourceOwner refactoring

2023-11-08 Thread Heikki Linnakangas

On 08/11/2023 20:00, Alexander Lakhin wrote:

Please look at a new assertion failure, I've managed to trigger with this 
script:
CREATE TABLE t(
i01 int, i02 int, i03 int, i04 int, i05 int, i06 int, i07 int, i08 int, i09 
int, i10 int,
i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 int, i18 int, i19 
int, i20 int,
i21 int, i22 int, i23 int, i24 int, i25 int, i26 int
);
CREATE TABLE tp PARTITION OF t FOR VALUES IN (1);

(gdb) bt
...
#5  0x560dd4e42f77 in ExceptionalCondition (conditionName=0x560dd5059fbc 
"owner->narr == 0", fileName=0x560dd5059e31
"resowner.c", lineNumber=362) at assert.c:66
#6  0x560dd4e93cbd in ResourceOwnerReleaseAll (owner=0x560dd69cebd0, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS,
printLeakWarnings=false) at resowner.c:362
#7  0x560dd4e92e22 in ResourceOwnerReleaseInternal (owner=0x560dd69cebd0, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS,
isCommit=false, isTopLevel=true) at resowner.c:725
#8  0x560dd4e92d42 in ResourceOwnerReleaseInternal (owner=0x560dd69ce3f8, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS,
isCommit=false, isTopLevel=true) at resowner.c:678
#9  0x560dd4e92cdb in ResourceOwnerRelease (owner=0x560dd69ce3f8, 
phase=RESOURCE_RELEASE_BEFORE_LOCKS,
isCommit=false, isTopLevel=true) at resowner.c:652
#10 0x560dd47316ef in AbortTransaction () at xact.c:2848
#11 0x560dd47329ac in AbortCurrentTransaction () at xact.c:3339
#12 0x560dd4c37284 in PostgresMain (dbname=0x560dd69779f0 "regression", 
username=0x560dd69779d8 "law") at
postgres.c:4370
...


Thanks for the testing! Fixed. I introduced that bug in one of the last 
revisions of the patch: I changed ResourceOwnerSort() to not bother 
moving all the elements from the array to the hash table if the hash 
table is allocated but empty. However, ResourceOwnerReleas() didn't get 
the memo, and still checked "owner->hash != NULL" rather than 
"owner->nhash == 0".


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2023-11-08 Thread Alexander Lakhin

Hello Heikki,

08.11.2023 14:37, Heikki Linnakangas wrote:


Fixed these, and pushed. Thanks everyone for reviewing!



Please look at a new assertion failure, I've managed to trigger with this 
script:
CREATE TABLE t(
i01 int, i02 int, i03 int, i04 int, i05 int, i06 int, i07 int, i08 int, i09 
int, i10 int,
i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 int, i18 int, i19 
int, i20 int,
i21 int, i22 int, i23 int, i24 int, i25 int, i26 int
);
CREATE TABLE tp PARTITION OF t FOR VALUES IN (1);

(gdb) bt
...
#5  0x560dd4e42f77 in ExceptionalCondition (conditionName=0x560dd5059fbc "owner->narr == 0", fileName=0x560dd5059e31 
"resowner.c", lineNumber=362) at assert.c:66
#6  0x560dd4e93cbd in ResourceOwnerReleaseAll (owner=0x560dd69cebd0, phase=RESOURCE_RELEASE_BEFORE_LOCKS, 
printLeakWarnings=false) at resowner.c:362
#7  0x560dd4e92e22 in ResourceOwnerReleaseInternal (owner=0x560dd69cebd0, phase=RESOURCE_RELEASE_BEFORE_LOCKS, 
isCommit=false, isTopLevel=true) at resowner.c:725
#8  0x560dd4e92d42 in ResourceOwnerReleaseInternal (owner=0x560dd69ce3f8, phase=RESOURCE_RELEASE_BEFORE_LOCKS, 
isCommit=false, isTopLevel=true) at resowner.c:678
#9  0x560dd4e92cdb in ResourceOwnerRelease (owner=0x560dd69ce3f8, phase=RESOURCE_RELEASE_BEFORE_LOCKS, 
isCommit=false, isTopLevel=true) at resowner.c:652

#10 0x560dd47316ef in AbortTransaction () at xact.c:2848
#11 0x560dd47329ac in AbortCurrentTransaction () at xact.c:3339
#12 0x560dd4c37284 in PostgresMain (dbname=0x560dd69779f0 "regression", username=0x560dd69779d8 "law") at 
postgres.c:4370

...

Best regards,
Alexander




Re: ResourceOwner refactoring

2023-11-08 Thread Heikki Linnakangas

On 07/11/2023 16:00, Alexander Lakhin wrote:

07.11.2023 14:28, Heikki Linnakangas wrote:


I feel pretty good about this overall. Barring objections or new cfbot 
failures, I will commit this in the next few days.


A script, that I published in [1], detects several typos in the patches:
betwen
ther
ReourceOwner
ResouceOwnerSort
ReleaseOwnerRelease

Maybe it's worth to fix them now, or just accumulate and clean all such
defects once a year...


Fixed these, and pushed. Thanks everyone for reviewing!

--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2023-11-07 Thread Alexander Lakhin

Hello Heikki,

07.11.2023 14:28, Heikki Linnakangas wrote:


I feel pretty good about this overall. Barring objections or new cfbot 
failures, I will commit this in the next few days.



A script, that I published in [1], detects several typos in the patches:
betwen
ther
ReourceOwner
ResouceOwnerSort
ReleaseOwnerRelease

Maybe it's worth to fix them now, or just accumulate and clean all such
defects once a year...

[1] 
https://www.postgresql.org/message-id/27a7998b-d67f-e32a-a28d-e659a2e390c6%40gmail.com

Best regards,
Alexander




Re: ResourceOwner refactoring

2023-11-07 Thread Heikki Linnakangas

On 25/10/2023 21:07, Andres Freund wrote:

It's not too awful to have it be in this order:

struct ResourceOwnerData {
 ResourceOwner  parent;   /* 0 8 */
 ResourceOwner  firstchild;   /* 8 8 */
 ResourceOwner  nextchild;/*16 8 */
 const char  *  name; /*24 8 */
 _Bool  releasing;/*32 1 */
 _Bool  sorted;   /*33 1 */
 uint8  narr; /*34 1 */
 uint8  nlocks;   /*35 1 */
 uint32 nhash;/*36 4 */
 uint32 capacity; /*40 4 */
 uint32 grow_at;  /*44 4 */
 ResourceElem   arr[32];  /*48   512 */
 /* --- cacheline 8 boundary (512 bytes) was 48 bytes ago --- */
 ResourceElem * hash; /*   560 8 */
 LOCALLOCK *locks[15];/*   568   120 */

 /* size: 688, cachelines: 11, members: 14 */
 /* last cacheline: 48 bytes */
};

Requires rephrasing a few comments to document that the lenghts are separate
from the array / hashtable / locks, but otherwise...


Hmm, let's move 'capacity' and 'grow_at' after 'arr', too. They are only 
needed together with 'hash'.



This reliably shows a decent speed improvement in my stress test [1], on the
order of 5%.

[1] pgbench running
DO $$ BEGIN FOR i IN 1..5000 LOOP PERFORM abalance FROM pgbench_accounts 
WHERE aid = 17;END LOOP;END;$$;


I'm seeing similar results, although there's enough noise in the test 
that I'm sure how real the would be across different tests.



At that point, the first array entry fits into the first cacheline. If we were
to move parent, firstchild, nextchild, name further down the struct, three
array entries would be on the first line. Just using the array presumably is a
very common case, so that might be worth it?

I got less clear performance results with this one, and it's also quite
possible it could hurt, if resowners aren't actually "used", just
released. Therefore it's probably not worth it for now.


You're assuming that the ResourceOwner struct begins at a cacheline 
boundary. That's not usually true, we don't try to cacheline-align it. 
So while it's helpful to avoid padding and to keep frequently-accessed 
fields close to each other, there's no benefit in keeping them at the 
beginning of the struct.


--
Heikki Linnakangas
Neon (https://neon.tech)





Re: ResourceOwner refactoring

2023-11-06 Thread Peter Eisentraut
It looks like this patch set needs a bit of surgery to adapt to the LLVM 
changes in 9dce22033d.  The cfbot is reporting compiler warnings about 
this, and also some crashes, which might also be caused by this.


I do like the updated APIs.  (Maybe the repeated ".DebugPrint = NULL, 
 /* default message is fine */" lines could be omitted?)


I like that one can now easily change the elog(WARNING) in 
ResourceOwnerReleaseAll() to a PANIC or something to get automatic 
verification during testing.  I wonder if we should make this the 
default if assertions are on?  This would need some adjustments to 
src/test/modules/test_resowner because it would then fail.






Re: ResourceOwner refactoring

2023-10-25 Thread Andres Freund
Hi,

On 2023-10-25 15:43:36 +0300, Heikki Linnakangas wrote:
> On 10/07/2023 22:14, Andres Freund wrote:
> > >   /*
> > > - * Initially allocated size of a ResourceArray.  Must be power of two 
> > > since
> > > - * we'll use (arraysize - 1) as mask for hashing.
> > > + * Size of the fixed-size array to hold most-recently remembered 
> > > resources.
> > >*/
> > > -#define RESARRAY_INIT_SIZE 16
> > > +#define RESOWNER_ARRAY_SIZE 32
> >
> > That's 512 bytes, pretty large to be searched sequentially. It's somewhat 
> > sad
> > that the array needs to include 8 byte of ResourceOwnerFuncs...
>
> Yeah. Early on I considered having a RegisterResourceKind() function that
> you need to call first, and then each resource kind could be assigned a
> smaller ID. If we wanted to keep the old mechanism of separate arrays for
> each different resource kind, that'd be the way to go. But overall this
> seems simpler, and the performance is decent despite that linear scan.

Realistically, on a 64bit platform, the compiler / we want 64bit alignment for
ResourceElem->item, so making "kind" smaller isn't going to do much...


> > Due to 'narr' being before the large 'arr', 'nhash' is always on a separate
> > cacheline. I.e. the number of cachelines accessed in happy paths is higher
> > than necessary, also relevant because there's more dependencies on narr, 
> > nhash
> > than on the contents of those.
> >
> > I'd probably order it so that both of the large elements (arr, locks) are at
> > the end.
> >
> > It's hard to know with changes this small, but it does seem to yield a small
> > and repeatable performance benefit (pipelined pgbench -S).
>
> Swapped the 'hash' and 'arr' parts of the struct, so that 'arr' and 'locks'
> are at the end.

I don't remember what the layout was then, but now there are 10 bytes of holes
on x86-64 (and I think most other 64bit architectures).


struct ResourceOwnerData {
ResourceOwner  parent;   /* 0 8 */
ResourceOwner  firstchild;   /* 8 8 */
ResourceOwner  nextchild;/*16 8 */
const char  *  name; /*24 8 */
_Bool  releasing;/*32 1 */
_Bool  sorted;   /*33 1 */

/* XXX 6 bytes hole, try to pack */

ResourceElem * hash; /*40 8 */
uint32 nhash;/*48 4 */
uint32 capacity; /*52 4 */
uint32 grow_at;  /*56 4 */
uint32 narr; /*60 4 */
/* --- cacheline 1 boundary (64 bytes) --- */
ResourceElem   arr[32];  /*64   512 */
/* --- cacheline 9 boundary (576 bytes) --- */
uint32 nlocks;   /*   576 4 */

/* XXX 4 bytes hole, try to pack */

LOCALLOCK *locks[15];/*   584   120 */

/* size: 704, cachelines: 11, members: 14 */
/* sum members: 694, holes: 2, sum holes: 10 */
};

While somewhat annoying from a human pov, it seems like it would make sense to
rearrange a bit? At least moving nlocks into the first cacheline would be good
(not that we guarantee cacheline alignment rn, though it sometimes works out
to be aligned).

We also can make narr, nlocks uint8s.

With that narrowing and reordering, we end up with 688 bytes. Not worth for
most structs, but here perhaps worth doing?

Moving the lock length to the start of the struct would make sense to keep
things that are likely to be used in a "stalling" manner at the start of the
struct / in one cacheline.

It's not too awful to have it be in this order:

struct ResourceOwnerData {
ResourceOwner  parent;   /* 0 8 */
ResourceOwner  firstchild;   /* 8 8 */
ResourceOwner  nextchild;/*16 8 */
const char  *  name; /*24 8 */
_Bool  releasing;/*32 1 */
_Bool  sorted;   /*33 1 */
uint8  narr; /*34 1 */
uint8  nlocks;   /*35 1 */
uint32 nhash;/*36 4 */
uint32 capacity; /*40 4 */
uint32 grow_at;  /*44 4 */
ResourceElem   arr[32];  /*48   512 */
/* --- cacheline 8 boundary (512 bytes) was 48 bytes ago --- */
ResourceElem * hash; /*   560 8 */
 

Re: ResourceOwner refactoring

2023-10-25 Thread Heikki Linnakangas

On 10/07/2023 15:37, Peter Eisentraut wrote:

A few suggestions on the API:

  > +static ResourceOwnerFuncs tupdesc_resowner_funcs =

These aren't all "functions", so maybe another word like "info" or
"description" would be more appropriate?


  > +   .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
  > +   .release_priority = RELEASE_PRIO_TUPDESC_REFS,

I suggest assigning explicit non-zero numbers to the RESOURCE_RELEASE_*
constants, as well as changing RELEASE_PRIO_FIRST to 1 and adding an
assertion that the priority is not zero.  Otherwise, someone might
forget to assign these fields and would implicitly get phase 0 and
priority 0, which would probably work in most cases, but wouldn't be the
explicit intention.


Done.


Then again, you are using RELEASE_PRIO_FIRST for the pgcrypto patch.  Is
it the recommendation that if there are no other requirements, external
users should use that?  If we are going to open this up to external
users, we should probably have some documented guidance around this.


I added a brief comment about that in the ResourceReleasePhase typedef.

I also added a section to the README on how to define your own resource 
kind. (The README doesn't go into any detail on how to choose the 
release priority though).



  > +   .PrintLeakWarning = ResOwnerPrintTupleDescLeakWarning

I think this can be refactored further.  We really just need a function
to describe a resource, like maybe

static char *
ResOwnerDescribeTupleDesc(Datum res)
{
  TupleDesc   tupdesc = (TupleDesc) DatumGetPointer(res);

  return psprintf("TupleDesc %p (%u,%d)",
  tupdesc, tupdesc->tdtypeid, tupdesc->tdtypmod);
}

and then the common code can generate the warnings from that like

  elog(WARNING,
   "%s leak: %s still referenced",
   kind->name, kind->describe(value));

That way, we could more easily develop further options, such as turning
this warning into an error (useful for TAP tests).

Also, maybe, you could supply a default description that just prints
"%p", which appears to be applicable in about half the places.


Refactored it that way.


Possible bonus project: I'm not sure these descriptions are so useful
anyway.  It doesn't help me much in debugging if "File 55 was not
released" or some such.  If would be cool if ResourceOwnerRemember() in
some debug mode could remember the source code location, and then print
that together with the leak warning.


Yeah, that would be useful. I remember I've hacked something like that 
as a one-off thing in the past, when debugging a leak.



  > +#define ResourceOwnerRememberTupleDesc(owner, tupdesc) \
  > +   ResourceOwnerRemember(owner, PointerGetDatum(tupdesc),
_resowner_funcs)
  > +#define ResourceOwnerForgetTupleDesc(owner, tupdesc) \
  > +   ResourceOwnerForget(owner, PointerGetDatum(tupdesc),
_resowner_funcs)

I would prefer that these kinds of things be made inline functions, so
that we maintain the type safety of the previous interface.  And it's
also easier when reading code to see type decorations.

(But I suspect the above bonus project would require these to be macros?)


  > +   if (context->resowner)
  > +   ResourceOwnerForgetJIT(context->resowner, context);

It seems most ResourceOwnerForget*() calls have this kind of check
before them.  Could this be moved inside the function?


Many do, but I don't know if it's the majority. We could make 
ResurceOwnerForget(NULL) do nothing, but I think it's better to have the 
check in the callers. You wouldn't want to silently do nothing when the 
resource owner is not expected to be NULL.


(I'm attaching new patch version in my reply to Andres shortly)

--
Heikki Linnakangas
Neon (https://neon.tech)




Re: ResourceOwner refactoring

2023-07-10 Thread Andres Freund
Hi,

On 2023-07-10 12:14:46 -0700, Andres Freund wrote:
> >  /*
> > - * Initially allocated size of a ResourceArray.  Must be power of two since
> > - * we'll use (arraysize - 1) as mask for hashing.
> > + * Size of the fixed-size array to hold most-recently remembered resources.
> >   */
> > -#define RESARRAY_INIT_SIZE 16
> > +#define RESOWNER_ARRAY_SIZE 32
>
> That's 512 bytes, pretty large to be searched sequentially. It's somewhat sad
> that the array needs to include 8 byte of ResourceOwnerFuncs...

On that note:

It's perhaps worth noting that this change actually *increases* the size of
ResourceOwnerData. Previously:

/* size: 576, cachelines: 9, members: 19 */
/* sum members: 572, holes: 1, sum holes: 4 */

now:
/* size: 696, cachelines: 11, members: 13 */
/* sum members: 693, holes: 1, sum holes: 3 */

It won't make a difference memory-usage wise, given aset.c's rounding
behaviour. But it does mean that more memory needs to be zeroed, as
ResourceOwnerCreate() uses MemoryContextAllocZero.

As there's really no need to initialize the long ->arr, ->locks, it seems
worth to just initialize explicitly instead of using MemoryContextAllocZero().


With the new representation, is there any point in having ->locks? We still
need ->nlocks to be able to switch to the lossy behaviour, but there doesn't
seem to be much point in having the separate array.

Greetings,

Andres Freund




Re: ResourceOwner refactoring

2023-07-10 Thread Andres Freund
Hi,

On 2023-05-25 20:14:23 +0300, Heikki Linnakangas wrote:
> @@ -2491,9 +2495,6 @@ BufferSync(int flags)
>   int mask = BM_DIRTY;
>   WritebackContext wb_context;
>
> - /* Make sure we can handle the pin inside SyncOneBuffer */
> - ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
> -
>   /*
>* Unless this is a shutdown checkpoint or we have been explicitly told,
>* we write only permanent, dirty buffers.  But at shutdown or end of
> @@ -2970,9 +2971,6 @@ BgBufferSync(WritebackContext *wb_context)
>* requirements, or hit the bgwriter_lru_maxpages limit.
>*/
>
> - /* Make sure we can handle the pin inside SyncOneBuffer */
> - ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
> -
>   num_to_scan = bufs_to_lap;
>   num_written = 0;
>   reusable_buffers = reusable_buffers_est;
> @@ -3054,8 +3052,6 @@ BgBufferSync(WritebackContext *wb_context)
>   *
>   * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
>   * after locking it, but we don't care all that much.)
> - *
> - * Note: caller must have done ResourceOwnerEnlargeBuffers.
>   */
>  static int
>  SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext 
> *wb_context)

> @@ -3065,7 +3061,9 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, 
> WritebackContext *wb_context)
>   uint32  buf_state;
>   BufferTag   tag;
>
> + /* Make sure we can handle the pin */
>   ReservePrivateRefCountEntry();
> + ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
>
>   /*
>* Check whether buffer needs writing.
> @@ -4110,9 +4108,6 @@ FlushRelationBuffers(Relation rel)
>   return;
>   }
>
> - /* Make sure we can handle the pin inside the loop */
> - ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
> -
>   for (i = 0; i < NBuffers; i++)
>   {
>   uint32  buf_state;
> @@ -4126,7 +4121,9 @@ FlushRelationBuffers(Relation rel)
>   if (!BufTagMatchesRelFileLocator(>tag, 
> >rd_locator))
>   continue;
>
> + /* Make sure we can handle the pin */
>   ReservePrivateRefCountEntry();
> + ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
>
>   buf_state = LockBufHdr(bufHdr);
>   if (BufTagMatchesRelFileLocator(>tag, >rd_locator) 
> &&
> @@ -4183,9 +4180,6 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
>   if (use_bsearch)
>   pg_qsort(srels, nrels, sizeof(SMgrSortArray), 
> rlocator_comparator);
>
> - /* Make sure we can handle the pin inside the loop */
> - ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
> -
>   for (i = 0; i < NBuffers; i++)
>   {
>   SMgrSortArray *srelent = NULL;

Hm, a tad worried that increasing the number of ResourceOwnerEnlargeBuffers()
that drastically could have a visible overhead.



> +static ResourceOwnerFuncs tupdesc_resowner_funcs =
> +{
> + .name = "tupdesc reference",
> + .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
> + .release_priority = RELEASE_PRIO_TUPDESC_REFS,
> + .ReleaseResource = ResOwnerReleaseTupleDesc,
> + .PrintLeakWarning = ResOwnerPrintTupleDescLeakWarning
> +};

I think these should all be marked const, there's no need to have them be in a
mutable section of the binary. Of course that will require adjusting a bunch
of the signatures too, but that seems fine.

> --- a/src/backend/access/transam/xact.c
> +++ b/src/backend/access/transam/xact.c
> @@ -5171,9 +5171,24 @@ AbortSubTransaction(void)
>   ResourceOwnerRelease(s->curTransactionOwner,
>
> RESOURCE_RELEASE_BEFORE_LOCKS,
>false, false);
> +
>   AtEOSubXact_RelationCache(false, s->subTransactionId,
> 
> s->parent->subTransactionId);
> +
> +
> + /*
> +  * AtEOSubXact_Inval sometimes needs to temporarily
> +  * bump the refcount on the relcache entries that it processes.
> +  * We cannot use the subtransaction's resource owner anymore,
> +  * because we've already started releasing it.  But we can use
> +  * the parent resource owner.
> +  */
> + CurrentResourceOwner = s->parent->curTransactionOwner;
> +
>   AtEOSubXact_Inval(false);
> +
> + CurrentResourceOwner = s->curTransactionOwner;
> +
>   ResourceOwnerRelease(s->curTransactionOwner,
>RESOURCE_RELEASE_LOCKS,
>false, false);

I might be a bit slow this morning, but why did this need to change as part of
this?


> @@ -5182,8 +5221,8 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, 
> uint32 

Re: ResourceOwner refactoring

2023-07-10 Thread Peter Eisentraut

A few suggestions on the API:

> +static ResourceOwnerFuncs tupdesc_resowner_funcs =

These aren't all "functions", so maybe another word like "info" or 
"description" would be more appropriate?



> +   .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
> +   .release_priority = RELEASE_PRIO_TUPDESC_REFS,

I suggest assigning explicit non-zero numbers to the RESOURCE_RELEASE_* 
constants, as well as changing RELEASE_PRIO_FIRST to 1 and adding an 
assertion that the priority is not zero.  Otherwise, someone might 
forget to assign these fields and would implicitly get phase 0 and 
priority 0, which would probably work in most cases, but wouldn't be the 
explicit intention.


Then again, you are using RELEASE_PRIO_FIRST for the pgcrypto patch.  Is 
it the recommendation that if there are no other requirements, external 
users should use that?  If we are going to open this up to external 
users, we should probably have some documented guidance around this.



> +   .PrintLeakWarning = ResOwnerPrintTupleDescLeakWarning

I think this can be refactored further.  We really just need a function 
to describe a resource, like maybe


static char *
ResOwnerDescribeTupleDesc(Datum res)
{
TupleDesc   tupdesc = (TupleDesc) DatumGetPointer(res);

return psprintf("TupleDesc %p (%u,%d)",
tupdesc, tupdesc->tdtypeid, tupdesc->tdtypmod);
}

and then the common code can generate the warnings from that like

elog(WARNING,
 "%s leak: %s still referenced",
 kind->name, kind->describe(value));

That way, we could more easily develop further options, such as turning 
this warning into an error (useful for TAP tests).


Also, maybe, you could supply a default description that just prints 
"%p", which appears to be applicable in about half the places.


Possible bonus project: I'm not sure these descriptions are so useful 
anyway.  It doesn't help me much in debugging if "File 55 was not 
released" or some such.  If would be cool if ResourceOwnerRemember() in 
some debug mode could remember the source code location, and then print 
that together with the leak warning.


> +#define ResourceOwnerRememberTupleDesc(owner, tupdesc) \
> +   ResourceOwnerRemember(owner, PointerGetDatum(tupdesc), 
_resowner_funcs)

> +#define ResourceOwnerForgetTupleDesc(owner, tupdesc) \
> +   ResourceOwnerForget(owner, PointerGetDatum(tupdesc), 
_resowner_funcs)


I would prefer that these kinds of things be made inline functions, so 
that we maintain the type safety of the previous interface.  And it's 
also easier when reading code to see type decorations.


(But I suspect the above bonus project would require these to be macros?)


> +   if (context->resowner)
> +   ResourceOwnerForgetJIT(context->resowner, context);

It seems most ResourceOwnerForget*() calls have this kind of check 
before them.  Could this be moved inside the function?






Re: ResourceOwner refactoring

2023-07-04 Thread Aleksander Alekseev
Hi,

> Thanks, here's a fixed version. (ResourceOwner resource release
> callbacks mustn't call ResourceOwnerForget anymore, but AbortBufferIO
> was still doing that)

I believe v15 is ready to be merged. I suggest we merge it early in
the PG17 release cycle.

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2023-05-23 Thread Aleksander Alekseev
Hi,

> [...]
> A-ha, it's because I didn't add the new test_resowner directory to the
> src/test/modules/meson.build file. Fixed.
>
> Thanks for the review!

You probably already noticed, but for the record: cfbot seems to be
extremely unhappy with the patch.

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2023-03-30 Thread Aleksander Alekseev
Hi,

> This patch, although moderately complicated, was moved between several
> commitfests. I think it would be great if it made it to PG16. I'm
> inclined to change the status of the patchset to RfC in a bit, unless
> anyone has a second opinion.

> I added a test module in src/test/modules/test_resowner to test those cases.

Hmm... it looks like a file is missing in the patchset. When building
with Autotools:

```
== running regression test queries==
test test_resowner... /bin/sh: 1: cannot open
/home/eax/projects/postgresql/src/test/modules/test_resowner/sql/test_resowner.sql:
No such file
```

I wonder why the tests pass when using Meson.

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2023-03-22 Thread Aleksander Alekseev
Hi,

I noticed that the patchset didn't make much progress since February
and decided to give it another round of code review.

> [...]. But in general, end-of-transaction activities should be kept
> simple, especially between the release phases, so I feel that having to
> remember extra resources there is a bad code smell and we shouldn't
> encourage that.

+1

> I added a test module in src/test/modules/test_resowner to test those cases.

This is much appreciated, as well as extensive changes made to READMEs
and the comments.

> [...] New patchset attached.
> I added it as a separate patch on top of the previous patches, as
> v13-0003-Release-resources-in-
> priority-order.patch, to make it easier to
> see what changed after the previous patchset version. But it should be
> squashed with patch 0002 before committing.

My examination, which besides reading the code included running it
under sanitizer and checking the code coverage, didn't reveal any
major problems with the patchset.

Certain "should never happen in practice" scenarios seem not to be
test-covered in resowner.c, particularly:

```
elog(ERROR, "ResourceOwnerEnlarge called after release started");
elog(ERROR, "ResourceOwnerRemember called but array was full");
elog(ERROR, "ResourceOwnerForget called for %s after release started",
kind->name);
elog(ERROR, "%s %p is not owned by resource owner %s",
elog(ERROR, "ResourceOwnerForget called for %s after release started",
kind->name);
elog(ERROR, "lock reference %p is not owned by resource owner %s"
```

I didn't check whether these or similar code paths were tested before
the patch and I don't have a strong opinion on whether we should test
these scenarios. Personally I'm OK with the fact that these few lines
are not covered with tests.

The following procedures are never executed:

* RegisterResourceReleaseCallback()
* UnregisterResourceReleaseCallback()

And are never actually called anymore due to changes in 0005.
Previously they were used by contrib/pgcrypto. I suggest dropping this
part of the API since it seems to be redundant now. This will simplify
the implementation even further.

This patch, although moderately complicated, was moved between several
commitfests. I think it would be great if it made it to PG16. I'm
inclined to change the status of the patchset to RfC in a bit, unless
anyone has a second opinion.

--
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2022-11-01 Thread Andres Freund
Hi,

On 2022-11-01 12:39:39 +0200, Heikki Linnakangas wrote:
> > 1) As far as I can tell, the way ResourceOwnerReleaseAll() now works seems 
> > to
> > assume that within a phase the reset order does not matter. I don't 
> > think
> > that's a good assumption. I e.g. have a patch to replace InProgressBuf 
> > with
> > resowner handling, and in-progress IO errors should be processed before
> > before pins are released.
> 
> Hmm. Currently, you're not supposed to hold any resources at commit. You get
> warnings about resource leaks if a resource owner is not empty on
> ResourceOwnerReleaseAll(). On abort, does the order matter? I'm not familiar
> with your InProgressBuf patch, but I guess you could handle the in-progress
> IO errors in ReleaseBuffer().

I was thinking about doing that as well, but it's not really trivial to know
about the in-progress IO at that time, without additional tracking (which
isn't free).


> If we do need to worry about release order, perhaps add a "priority" or
> "phase" to each resource kind, and release them in priority order. We
> already have before- and after-locks as phases, but we could generalize
> that.
> 
> However, I feel that trying to enforce a particular order moves the
> goalposts. If we need that, let's add it as a separate patch later.

Like Robert, I think that the patch is moving the goalpost...


> > 2) There's quite a few resource types where we actually don't need an entry 
> > in
> > an array, because we can instead add a dlist_node to the resource -
> > avoiding memory overhead and making removal cheap. I have a few pending
> > patches that use that approach, and this doesn't really provide a path 
> > for
> > that anymore.
> 
> Is that materially better than using the array?

It's safe in critical sections. I have a, not really baked but promising,
patch to make WAL writes use AIO. There's no way to know the number of
"asynchronous IOs" needed before entering the critical section.


> The fast path with an array is very fast. If it is better, perhaps we should
> bite the bullet and require a dlist node and use that mechanism for all
> resource types?

I don't think it's suitable for all - you need an exclusively owned region of
memory to embed a list in. That works nicely for some things, but not others
(e.g. buffer pins).

Greetings,

Andres Freund




Re: ResourceOwner refactoring

2022-11-01 Thread Robert Haas
On Tue, Nov 1, 2022 at 6:39 AM Heikki Linnakangas  wrote:
> However, I feel that trying to enforce a particular order moves the
> goalposts. If we need that, let's add it as a separate patch later.

I don't really see it that way, because with the current
implementation, we do all resources of a particular type together,
before moving on to the next type. That seems like a valuable property
to preserve, and I think we should.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: ResourceOwner refactoring

2022-11-01 Thread Heikki Linnakangas

On 01/11/2022 02:15, Andres Freund wrote:

On 2022-10-31 10:51:36 +0100, Heikki Linnakangas wrote:

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code in
between, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.


This seems to work towards a future where only one kind of resource can be
reserved ahead of time. That doesn't strike me as great.


True. It is enough for all the current callers AFAICS, though. I don't 
remember wanting to reserve multiple resources at the same time. 
Usually, the distance between the Enlarge and Remember calls is very 
short. If it's not, it's error-prone anyway, so we should try to keep 
the distance short.


While we're at it, who says that it's enough to reserve space for only 
one resource of a kind either? For example, what if you want to pin two 
buffers?


If we really need to support that, I propose something like this:

/*
 * Reserve a slot in the resource owner.
 *
 * This is separate from actually inserting an entry because if we run out
 * of memory, it's critical to do so *before* acquiring the resource.
 */
ResOwnerReservation *
ResourceOwnerReserve(ResourceOwner owner)

/*
 * Remember that an object is owner by a ResourceOwner
 *
 * 'reservation' is a slot in the resource owner that was pre-reserved
 * by ResOwnerReservation.
 */
void
ResOwnerRemember(ResOwnerReservaton *reservation, Datum value, 
ResourceOwnerFuncs *kind)




Instead of having a separate array/hash for each resource kind, use a
single array and hash to hold all kinds of resources. This makes it
possible to introduce new resource "kinds" without having to modify the
ResourceOwnerData struct. In particular, this makes it possible for
extensions to register custom resource kinds.


As a goal I like this.

However, I'm not quite sold on the implementation. Two main worries:

1) As far as I can tell, the way ResourceOwnerReleaseAll() now works seems to
assume that within a phase the reset order does not matter. I don't think
that's a good assumption. I e.g. have a patch to replace InProgressBuf with
resowner handling, and in-progress IO errors should be processed before
before pins are released.


Hmm. Currently, you're not supposed to hold any resources at commit. You 
get warnings about resource leaks if a resource owner is not empty on 
ResourceOwnerReleaseAll(). On abort, does the order matter? I'm not 
familiar with your InProgressBuf patch, but I guess you could handle the 
in-progress IO errors in ReleaseBuffer().


If we do need to worry about release order, perhaps add a "priority" or 
"phase" to each resource kind, and release them in priority order. We 
already have before- and after-locks as phases, but we could generalize 
that.


However, I feel that trying to enforce a particular order moves the 
goalposts. If we need that, let's add it as a separate patch later.



2) There's quite a few resource types where we actually don't need an entry in
an array, because we can instead add a dlist_node to the resource -
avoiding memory overhead and making removal cheap. I have a few pending
patches that use that approach, and this doesn't really provide a path for
that anymore.


Is that materially better than using the array? The fast path with an 
array is very fast. If it is better, perhaps we should bite the bullet 
and require a dlist node and use that mechanism for all resource types?



I did try out the benchmark from
https://postgr.es/m/20221029200025.w7bvlgvamjfo6z44%40awork3.anarazel.de and
the patches performed well, slightly better than my approach of allocating
some initial memory for each resarray.


Thank you, glad to hear that!

- Heikki





Re: ResourceOwner refactoring

2022-10-31 Thread Andres Freund
Hi,

On 2022-10-31 10:51:36 +0100, Heikki Linnakangas wrote:
> These are functions where quite a lot of things happen between the
> ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
> there are no unrelated ResourceOwnerRemember() calls in the code in
> between, otherwise the entry reserved by the ResourceOwnerEnlarge() call
> might be used up by the intervening ResourceOwnerRemember() and not be
> available at the intended ResourceOwnerRemember() call anymore. The longer
> the code path between them is, the harder it is to verify that.

This seems to work towards a future where only one kind of resource can be
reserved ahead of time. That doesn't strike me as great.


> Instead of having a separate array/hash for each resource kind, use a
> single array and hash to hold all kinds of resources. This makes it
> possible to introduce new resource "kinds" without having to modify the
> ResourceOwnerData struct. In particular, this makes it possible for
> extensions to register custom resource kinds.

As a goal I like this.

However, I'm not quite sold on the implementation. Two main worries:

1) As far as I can tell, the way ResourceOwnerReleaseAll() now works seems to
   assume that within a phase the reset order does not matter. I don't think
   that's a good assumption. I e.g. have a patch to replace InProgressBuf with
   resowner handling, and in-progress IO errors should be processed before
   before pins are released.

2) There's quite a few resource types where we actually don't need an entry in
   an array, because we can instead add a dlist_node to the resource -
   avoiding memory overhead and making removal cheap. I have a few pending
   patches that use that approach, and this doesn't really provide a path for
   that anymore.


I did try out the benchmark from
https://postgr.es/m/20221029200025.w7bvlgvamjfo6z44%40awork3.anarazel.de and
the patches performed well, slightly better than my approach of allocating
some initial memory for each resarray.

Greetings,

Andres Freund




Re: ResourceOwner refactoring

2022-10-31 Thread Aleksander Alekseev
Hi hackers,

> > Rebased version attached. Given that Aleksander marked this as Ready for
> > Committer earlier, I'll add this to the next commitfest in that state,
> > and will commit in the next few days, barring any new objections.
>
> Thanks for resurrecting this patch.

Additionally I decided to run `make installcheck-world` on Raspberry
Pi 3. It just terminated successfully.

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2022-10-31 Thread Aleksander Alekseev
Hi Heikki,

> Rebased version attached. Given that Aleksander marked this as Ready for
> Committer earlier, I'll add this to the next commitfest in that state,
> and will commit in the next few days, barring any new objections.

Thanks for resurrecting this patch.

While taking a fresh look at the code I noticed a few things.

In 0002 we have:

```
+.name = "buffer"
...
+.name = "File",
```

Not sure why "File" starts with an uppercase letter while "buffer"
starts with a lowercase one. This is naturally not a big deal but
could be worth changing for consistency.

In 0003:

```
+#if SIZEOF_DATUM == 8
+return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
+#else
+return hash_combine(murmurhash32((uint32) value), (uint32) kind);
+#endif
```

Maybe it's worth using PointerGetDatum() + DatumGetInt32() /
DatumGetInt64() inline functions instead of casting Datums and
pointers directly.

These are arguably nitpicks though and shouldn't stop you from merging
the patches as is.

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2022-10-31 Thread Heikki Linnakangas

On 12/01/2022 07:57, Julien Rouhaud wrote:

On Fri, Nov 26, 2021 at 8:41 PM Aleksander Alekseev
 wrote:


The patchset is in a good shape. I'm changing the status to "Ready for
Committer".


The 2nd patch doesn't apply anymore due to a conflict on
resowner_private.h: http://cfbot.cputube.org/patch_36_3364.log.


Rebased version attached. Given that Aleksander marked this as Ready for 
Committer earlier, I'll add this to the next commitfest in that state, 
and will commit in the next few days, barring any new objections.


- Heikki
From 14d05ab81a3615a0772bf34297fafce406e160ce Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Mon, 31 Oct 2022 10:33:36 +0100
Subject: [PATCH v12 1/3] Move a few ResourceOwnerEnlarge() calls for safety
 and clarity.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code in
between, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In bufmgr.c, there is a function similar to ResourceOwnerEnlarge(), to
ensure that the private refcount array has enough space. The
ReservePrivateRefCountEntry() calls, analogous to ResourceOwnerEnlarge(),
were made at different places than the ResourceOwnerEnlarge() calls. Move
the ResourceOwnerEnlarge() calls together with the
ReservePrivateRefCountEntry() calls for consistency.

Reviewed-by: Aleksander Alekseev, Michael Paquier, Julien Rouhaud, Kyotaro Horiguchi, Hayato Kuroda, Álvaro Herrera, Zhihong Yu
Discussion: https://www.postgresql.org/message-id/cbfabeb0-cd3c-e951-a572-19b365ed314d%40iki.fi
---
 src/backend/storage/buffer/bufmgr.c   | 39 +++
 src/backend/storage/buffer/localbuf.c |  3 +++
 src/backend/utils/cache/catcache.c| 13 ++---
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 73d30bf6191..d12e81097c5 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -828,9 +828,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	*hit = false;
 
-	/* Make sure we will have room to remember the buffer pin */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	isExtend = (blockNum == P_NEW);
 
 	TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
@@ -1192,9 +1189,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	{
 		/*
 		 * Ensure, while the spinlock's not yet held, that there's a free
-		 * refcount entry.
+		 * refcount entry and that the resource owner has room to remember the
+		 * pin.
 		 */
 		ReservePrivateRefCountEntry();
+		ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
 		/*
 		 * Select a victim buffer.  The buffer is returned with its header
@@ -1695,8 +1694,6 @@ ReleaseAndReadBuffer(Buffer buffer,
  * taking the buffer header lock; instead update the state variable in loop of
  * CAS operations. Hopefully it's just a single CAS.
  *
- * Note that ResourceOwnerEnlargeBuffers must have been done already.
- *
  * Returns true if buffer is BM_VALID, else false.  This provision allows
  * some callers to avoid an extra spinlock cycle.
  */
@@ -1707,6 +1704,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
 	bool		result;
 	PrivateRefCountEntry *ref;
 
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
 	ref = GetPrivateRefCountEntry(b, true);
 
 	if (ref == NULL)
@@ -1787,7 +1786,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
  * The spinlock is released before return.
  *
  * As this function is called with the spinlock held, the caller has to
- * previously call ReservePrivateRefCountEntry().
+ * previously call ReservePrivateRefCountEntry() and
+ * ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
  *
  * Currently, no callers of this function want to modify the buffer's
  * usage_count at all, so there's no need for a strategy parameter.
@@ -1960,9 +1960,6 @@ BufferSync(int flags)
 	int			mask = BM_DIRTY;
 	WritebackContext wb_context;
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	/*
 	 * Unless this is a shutdown checkpoint or we have been explicitly told,
 	 * we write only permanent, dirty buffers.  But at shutdown or end of
@@ -2436,9 +2433,6 @@ BgBufferSync(WritebackContext *wb_context)
 	 * requirements, or hit the bgwriter_lru_maxpages limit.
 	 */
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	num_to_scan = bufs_to_lap;
 	num_written = 0;
 	

Re: ResourceOwner refactoring

2022-01-11 Thread Julien Rouhaud
Hi,

On Fri, Nov 26, 2021 at 8:41 PM Aleksander Alekseev
 wrote:
>
> > I will submit the actual code review in the follow-up email
>
> The patchset is in a good shape. I'm changing the status to "Ready for
> Committer".

The 2nd patch doesn't apply anymore due to a conflict on
resowner_private.h: http://cfbot.cputube.org/patch_36_3364.log.

I'm switching the entry to Waiting on Author.




Re: ResourceOwner refactoring

2021-11-26 Thread Aleksander Alekseev
Hi Heikki,

> I will submit the actual code review in the follow-up email

The patchset is in a good shape. I'm changing the status to "Ready for
Committer".

-- 
Best regards,
Aleksander Alekseev




Re: ResourceOwner refactoring

2021-11-23 Thread Aleksander Alekseev
Hi Heikki,

> Attached is a newly rebased version. It includes your tweaks, and a few
> more comment and indentation tweaks.

v10-0002 rotted a little so I rebased it. The new patch set passed `make
installcheck-world` locally, but let's see if cfbot has a second opinion.

I'm a bit busy with various things at the moment, so (hopefully) I will
submit the actual code review in the follow-up email.

-- 
Best regards,
Aleksander Alekseev


v11-0001-Move-a-few-ResourceOwnerEnlarge-calls-for-safety.patch
Description: Binary data


v11-0002-Make-resowners-more-easily-extensible.patch
Description: Binary data


v11-0003-Optimize-hash-function.patch
Description: Binary data


Re: ResourceOwner refactoring

2021-10-18 Thread Heikki Linnakangas

On 08/09/2021 13:19, Aleksander Alekseev wrote:

Hi Heikki,


Yeah, needed some manual fixing, but here you go.


Thanks for working on this!

v8-0002 didn't apply to the current master, so I rebased it. See
attached v9-* patches. I also included v9-0004 with some minor tweaks
from me. I have several notes regarding the code.


Thanks!

Attached is a newly rebased version. It includes your tweaks, and a few 
more comment and indentation tweaks.



1. Not sure if I understand this code of ResourceOwnerReleaseAll():
```
 /* First handle all the entries in the array. */
 do
 {
 found = false;
 for (int i = 0; i < owner->narr; i++)
 {
 if (owner->arr[i].kind->phase == phase)
 {
 Datumvalue = owner->arr[i].item;
 ResourceOwnerFuncs *kind = owner->arr[i].kind;

 if (printLeakWarnings)
 kind->PrintLeakWarning(value);
 kind->ReleaseResource(value);
 found = true;
 }
 }

 /*
  * If any resources were released, check again because some of the
  * elements might have been moved by the callbacks. We don't want to
  * miss them.
  */
 } while (found && owner->narr > 0);
```

Shouldn't we mark the resource as released and/or decrease narr and/or
save the last processed i? Why this will not call ReleaseResource()
endlessly on the same resource (array item)? Same question for the
following code that iterates over the hash table.


ReleaseResource() must call ResourceOwnerForget(), which removes the 
item from the or hash table. This works the same as the code in 'master':



/*
 * Release buffer pins.  Note that ReleaseBuffer will remove the
 * buffer entry from our array, so we just have to iterate till 
there
 * are none.
 *
 * During a commit, there shouldn't be any remaining pins --- 
that
 * would indicate failure to clean up the executor correctly 
--- so
 * issue warnings.  In the abort case, just clean up quietly.
 */
while (ResourceArrayGetAny(&(owner->bufferarr), ))
{
Buffer  res = DatumGetBuffer(foundres);

if (isCommit)
PrintBufferLeakWarning(res);
ReleaseBuffer(res);
}


That comment explains how it works. I added a comment like that in this 
patch, too.



2. Just an idea/observation. It's possible that the performance of
ResourceOwnerEnlarge() can be slightly improved. Since the size of the
hash table is always a power of 2 and the code always doubles the size
of the hash table, (idx & mask) value will get one extra bit, which
can be 0 or 1. If it's 0, the value is already in its place,
otherwise, it should be moved on the known distance. In other words,
it's possible to copy `oldhash` to `newhash` and then move only half
of the items. I don't claim that this code necessarily should be
faster, or that this should be checked in the scope of this work.


Hmm, the hash table uses open addressing, I think we want to also 
rearrange any existing entries that might not be at their right place 
because of collisions. We don't actually do that currently when an 
element is removed (even before this patch), but enlarging the array is 
a good opportunity for it. In any case, I haven't seen 
ResourceOwnerEnlarge() show up while profiling, so I'm going to leave it 
as it is.


- Heikki
>From 98758f3774484f54f010d4bec663ac60b764170f Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 13 Jan 2021 12:21:28 +0200
Subject: [PATCH v10 1/3] Move a few ResourceOwnerEnlarge() calls for safety
 and clarity.

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code
inbetween, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In bufmgr.c, there is a function similar to ResourceOwnerEnlarge(),
to ensure that the private refcount array has enough space. The
ReservePrivateRefCountEntry() calls, analogous to ResourceOwnerEnlarge(),
were made at different places than the ResourceOwnerEnlarge() calls.
Move the ResourceOwnerEnlarge() calls together with the
ReservePrivateRefCountEntry() calls for consistency.
---
 src/backend/storage/buffer/bufmgr.c   | 39 +++
 src/backend/storage/buffer/localbuf.c |  3 +++
 src/backend/utils/cache/catcache.c| 13 ++---
 3 files changed, 28 insertions(+), 27 

Re: ResourceOwner refactoring

2021-10-01 Thread Michael Paquier
On Wed, Sep 08, 2021 at 01:19:19PM +0300, Aleksander Alekseev wrote:
> Thanks for working on this!

The latest patch does not apply anymore, and has been waiting on
author for a couple of weeks now, so switched as RwF for now.
--
Michael


signature.asc
Description: PGP signature


Re: ResourceOwner refactoring

2021-09-08 Thread Aleksander Alekseev
Hi Heikki,

> Yeah, needed some manual fixing, but here you go.

Thanks for working on this!

v8-0002 didn't apply to the current master, so I rebased it. See
attached v9-* patches. I also included v9-0004 with some minor tweaks
from me. I have several notes regarding the code.

1. Not sure if I understand this code of ResourceOwnerReleaseAll():
```
/* First handle all the entries in the array. */
do
{
found = false;
for (int i = 0; i < owner->narr; i++)
{
if (owner->arr[i].kind->phase == phase)
{
Datumvalue = owner->arr[i].item;
ResourceOwnerFuncs *kind = owner->arr[i].kind;

if (printLeakWarnings)
kind->PrintLeakWarning(value);
kind->ReleaseResource(value);
found = true;
}
}

/*
 * If any resources were released, check again because some of the
 * elements might have been moved by the callbacks. We don't want to
 * miss them.
 */
} while (found && owner->narr > 0);
```

Shouldn't we mark the resource as released and/or decrease narr and/or
save the last processed i? Why this will not call ReleaseResource()
endlessly on the same resource (array item)? Same question for the
following code that iterates over the hash table.

2. Just an idea/observation. It's possible that the performance of
ResourceOwnerEnlarge() can be slightly improved. Since the size of the
hash table is always a power of 2 and the code always doubles the size
of the hash table, (idx & mask) value will get one extra bit, which
can be 0 or 1. If it's 0, the value is already in its place,
otherwise, it should be moved on the known distance. In other words,
it's possible to copy `oldhash` to `newhash` and then move only half
of the items. I don't claim that this code necessarily should be
faster, or that this should be checked in the scope of this work.

-- 
Best regards,
Aleksander Alekseev


v9-0003-Optimize-hash-function.patch
Description: Binary data


v9-0001-Move-a-few-ResourceOwnerEnlarge-calls-for-safety.patch
Description: Binary data


v9-0002-Make-resowners-more-easily-extensible.patch
Description: Binary data


v9-0004-Minor-tweaks.patch
Description: Binary data


Re: ResourceOwner refactoring

2021-07-14 Thread Zhihong Yu
On Wed, Jul 14, 2021 at 8:26 AM Heikki Linnakangas  wrote:

> Thanks for having a look!
>
> On 14/07/2021 18:18, Zhihong Yu wrote:
> > For the loop over the hash:
> >
> > +   for (int idx = 0; idx < capacity; idx++)
> >  {
> > -   if (olditemsarr[i] != resarr->invalidval)
> > -   ResourceArrayAdd(resarr, olditemsarr[i]);
> > +   while (owner->hash[idx].kind != NULL &&
> > +  owner->hash[idx].kind->phase == phase)
> > ...
> > +   } while (capacity != owner->capacity);
> >
> > Since the phase variable doesn't seem to change for the while loop, I
> > wonder what benefit the while loop has (since the release is governed by
> > phase).
>
> Hmm, the phase variable doesn't change, but could the element at
> 'owner->hash[idx]' change? I'm not sure about that. The loop is supposed
> to handle the case that the hash table grows; could that replace the
> element at 'owner->hash[idx]' with something else, with different phase?
> The check is very cheap, so I'm inclined to keep it to be sure.
>
> - Heikki
>
Hi,
Agreed that ```owner->hash[idx].kind->phase == phase``` can be kept.

I just wonder if we should put limit on the number of iterations for the
while loop.
Maybe add a bool variable indicating that kind->ReleaseResource(value) is
called in the inner loop.
If there is no releasing when the inner loop finishes, we can come out of
the while loop.

Cheers


Re: ResourceOwner refactoring

2021-07-14 Thread Heikki Linnakangas

Thanks for having a look!

On 14/07/2021 18:18, Zhihong Yu wrote:

For the loop over the hash:

+       for (int idx = 0; idx < capacity; idx++)
         {
-           if (olditemsarr[i] != resarr->invalidval)
-               ResourceArrayAdd(resarr, olditemsarr[i]);
+           while (owner->hash[idx].kind != NULL &&
+                  owner->hash[idx].kind->phase == phase)
...
+   } while (capacity != owner->capacity);

Since the phase variable doesn't seem to change for the while loop, I 
wonder what benefit the while loop has (since the release is governed by 
phase).


Hmm, the phase variable doesn't change, but could the element at 
'owner->hash[idx]' change? I'm not sure about that. The loop is supposed 
to handle the case that the hash table grows; could that replace the 
element at 'owner->hash[idx]' with something else, with different phase? 
The check is very cheap, so I'm inclined to keep it to be sure.


- Heikki




Re: ResourceOwner refactoring

2021-07-14 Thread Zhihong Yu
On Wed, Jul 14, 2021 at 7:40 AM Heikki Linnakangas  wrote:

> On 14/07/2021 17:07, Alvaro Herrera wrote:
> > On 2021-Jul-14, vignesh C wrote:
> >
> >> On Tue, Mar 9, 2021 at 6:10 PM Heikki Linnakangas 
> wrote:
> >
> >>> Here you go.
> >>
> >> The patch does not apply on Head anymore, could you rebase and post a
> >> patch. I'm changing the status to "Waiting for Author".
> >
> > Support for hmac was added by e6bdfd9700eb so the rebase is not trivial.
>
> Yeah, needed some manual fixing, but here you go.
>
> - Heikki
>
Hi,
For the loop over the hash:

+   for (int idx = 0; idx < capacity; idx++)
{
-   if (olditemsarr[i] != resarr->invalidval)
-   ResourceArrayAdd(resarr, olditemsarr[i]);
+   while (owner->hash[idx].kind != NULL &&
+  owner->hash[idx].kind->phase == phase)
...
+   } while (capacity != owner->capacity);

Since the phase variable doesn't seem to change for the while loop, I
wonder what benefit the while loop has (since the release is governed by
phase).

Cheers


Re: ResourceOwner refactoring

2021-07-14 Thread Heikki Linnakangas

On 14/07/2021 17:07, Alvaro Herrera wrote:

On 2021-Jul-14, vignesh C wrote:


On Tue, Mar 9, 2021 at 6:10 PM Heikki Linnakangas  wrote:



Here you go.


The patch does not apply on Head anymore, could you rebase and post a
patch. I'm changing the status to "Waiting for Author".


Support for hmac was added by e6bdfd9700eb so the rebase is not trivial.


Yeah, needed some manual fixing, but here you go.

- Heikki
>From 19fb4369e3b7bfb29afc6cfe597af5a89698dd3f Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 13 Jan 2021 12:21:28 +0200
Subject: [PATCH v8 1/3] Move a few ResourceOwnerEnlarge() calls for safety and
 clarity.

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code
inbetween, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In bufmgr.c, there is a function similar to ResourceOwnerEnlarge(),
to ensure that the private refcount array has enough space. The
ReservePrivateRefCountEntry() calls, analogous to ResourceOwnerEnlarge(),
were made at different places than the ResourceOwnerEnlarge() calls.
Move the ResourceOwnerEnlarge() calls together with the
ReservePrivateRefCountEntry() calls for consistency.
---
 src/backend/storage/buffer/bufmgr.c   | 39 +++
 src/backend/storage/buffer/localbuf.c |  3 +++
 src/backend/utils/cache/catcache.c| 13 ++---
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 86ef607ff38..2383e4cac08 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -810,9 +810,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	*hit = false;
 
-	/* Make sure we will have room to remember the buffer pin */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	isExtend = (blockNum == P_NEW);
 
 	TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
@@ -1165,9 +1162,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	{
 		/*
 		 * Ensure, while the spinlock's not yet held, that there's a free
-		 * refcount entry.
+		 * refcount entry and that the resoure owner has room to remember the
+		 * pin.
 		 */
 		ReservePrivateRefCountEntry();
+		ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
 		/*
 		 * Select a victim buffer.  The buffer is returned with its header
@@ -1668,8 +1667,6 @@ ReleaseAndReadBuffer(Buffer buffer,
  * taking the buffer header lock; instead update the state variable in loop of
  * CAS operations. Hopefully it's just a single CAS.
  *
- * Note that ResourceOwnerEnlargeBuffers must have been done already.
- *
  * Returns true if buffer is BM_VALID, else false.  This provision allows
  * some callers to avoid an extra spinlock cycle.
  */
@@ -1680,6 +1677,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
 	bool		result;
 	PrivateRefCountEntry *ref;
 
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
 	ref = GetPrivateRefCountEntry(b, true);
 
 	if (ref == NULL)
@@ -1760,7 +1759,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
  * The spinlock is released before return.
  *
  * As this function is called with the spinlock held, the caller has to
- * previously call ReservePrivateRefCountEntry().
+ * previously call ReservePrivateRefCountEntry() and
+ * ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
  *
  * Currently, no callers of this function want to modify the buffer's
  * usage_count at all, so there's no need for a strategy parameter.
@@ -1936,9 +1936,6 @@ BufferSync(int flags)
 	int			mask = BM_DIRTY;
 	WritebackContext wb_context;
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	/*
 	 * Unless this is a shutdown checkpoint or we have been explicitly told,
 	 * we write only permanent, dirty buffers.  But at shutdown or end of
@@ -2412,9 +2409,6 @@ BgBufferSync(WritebackContext *wb_context)
 	 * requirements, or hit the bgwriter_lru_maxpages limit.
 	 */
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	num_to_scan = bufs_to_lap;
 	num_written = 0;
 	reusable_buffers = reusable_buffers_est;
@@ -2496,8 +2490,6 @@ BgBufferSync(WritebackContext *wb_context)
  *
  * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
  * after locking it, but we don't care all that much.)
- *
- * Note: caller must have done ResourceOwnerEnlargeBuffers.
  */
 static int
 SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
@@ -2507,7 +2499,9 @@ 

Re: ResourceOwner refactoring

2021-07-14 Thread Alvaro Herrera
On 2021-Jul-14, vignesh C wrote:

> On Tue, Mar 9, 2021 at 6:10 PM Heikki Linnakangas  wrote:

> > Here you go.
> 
> The patch does not apply on Head anymore, could you rebase and post a
> patch. I'm changing the status to "Waiting for Author".

Support for hmac was added by e6bdfd9700eb so the rebase is not trivial.

-- 
Álvaro Herrera  Valdivia, Chile  —  https://www.EnterpriseDB.com/
"Ellos andaban todos desnudos como su madre los parió, y también las mujeres,
aunque no vi más que una, harto moza, y todos los que yo vi eran todos
mancebos, que ninguno vi de edad de más de XXX años" (Cristóbal Colón)




Re: ResourceOwner refactoring

2021-07-14 Thread vignesh C
On Tue, Mar 9, 2021 at 6:10 PM Heikki Linnakangas  wrote:
>
> On 08/03/2021 18:47, Ibrar Ahmed wrote:
> > The patchset does not apply successfully, there are some hunk failures.
> >
> > http://cfbot.cputube.org/patch_32_2834.log
> > 
> >
> > v6-0002-Make-resowners-more-easily-extensible.patch
> >
> > 1 out of 6 hunks FAILED -- saving rejects to file
> > src/backend/utils/cache/plancache.c.rej
> > 2 out of 15 hunks FAILED -- saving rejects to file
> > src/backend/utils/resowner/resowner.c.rej
> >
> >
> > Can we get a rebase?
>
> Here you go.

The patch does not apply on Head anymore, could you rebase and post a
patch. I'm changing the status to "Waiting for Author".

Regards,
Vignesh




Re: ResourceOwner refactoring

2021-03-09 Thread Heikki Linnakangas

On 08/03/2021 18:47, Ibrar Ahmed wrote:

The patchset does not apply successfully, there are some hunk failures.

http://cfbot.cputube.org/patch_32_2834.log 



v6-0002-Make-resowners-more-easily-extensible.patch

1 out of 6 hunks FAILED -- saving rejects to file 
src/backend/utils/cache/plancache.c.rej
2 out of 15 hunks FAILED -- saving rejects to file 
src/backend/utils/resowner/resowner.c.rej



Can we get a rebase?


Here you go.

- Heikki
>From 8b852723212c51d3329267fcbf8e7a28f49dbbdd Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 13 Jan 2021 12:21:28 +0200
Subject: [PATCH v7 1/3] Move a few ResourceOwnerEnlarge() calls for safety and
 clarity.

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code
inbetween, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In bufmgr.c, there is a function similar to ResourceOwnerEnlarge(),
to ensure that the private refcount array has enough space. The
ReservePrivateRefCountEntry() calls, analogous to ResourceOwnerEnlarge(),
were made at different places than the ResourceOwnerEnlarge() calls.
Move the ResourceOwnerEnlarge() calls together with the
ReservePrivateRefCountEntry() calls for consistency.
---
 src/backend/storage/buffer/bufmgr.c   | 39 +++
 src/backend/storage/buffer/localbuf.c |  3 +++
 src/backend/utils/cache/catcache.c| 13 ++---
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 561c212092f..cde869e7d64 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -738,9 +738,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	*hit = false;
 
-	/* Make sure we will have room to remember the buffer pin */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	isExtend = (blockNum == P_NEW);
 
 	TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
@@ -1093,9 +1090,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	{
 		/*
 		 * Ensure, while the spinlock's not yet held, that there's a free
-		 * refcount entry.
+		 * refcount entry and that the resoure owner has room to remember the
+		 * pin.
 		 */
 		ReservePrivateRefCountEntry();
+		ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
 		/*
 		 * Select a victim buffer.  The buffer is returned with its header
@@ -1595,8 +1594,6 @@ ReleaseAndReadBuffer(Buffer buffer,
  * taking the buffer header lock; instead update the state variable in loop of
  * CAS operations. Hopefully it's just a single CAS.
  *
- * Note that ResourceOwnerEnlargeBuffers must have been done already.
- *
  * Returns true if buffer is BM_VALID, else false.  This provision allows
  * some callers to avoid an extra spinlock cycle.
  */
@@ -1607,6 +1604,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
 	bool		result;
 	PrivateRefCountEntry *ref;
 
+	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
+
 	ref = GetPrivateRefCountEntry(b, true);
 
 	if (ref == NULL)
@@ -1687,7 +1686,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
  * The spinlock is released before return.
  *
  * As this function is called with the spinlock held, the caller has to
- * previously call ReservePrivateRefCountEntry().
+ * previously call ReservePrivateRefCountEntry() and
+ * ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
  *
  * Currently, no callers of this function want to modify the buffer's
  * usage_count at all, so there's no need for a strategy parameter.
@@ -1857,9 +1857,6 @@ BufferSync(int flags)
 	int			mask = BM_DIRTY;
 	WritebackContext wb_context;
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	/*
 	 * Unless this is a shutdown checkpoint or we have been explicitly told,
 	 * we write only permanent, dirty buffers.  But at shutdown or end of
@@ -2334,9 +2331,6 @@ BgBufferSync(WritebackContext *wb_context)
 	 * requirements, or hit the bgwriter_lru_maxpages limit.
 	 */
 
-	/* Make sure we can handle the pin inside SyncOneBuffer */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	num_to_scan = bufs_to_lap;
 	num_written = 0;
 	reusable_buffers = reusable_buffers_est;
@@ -2418,8 +2412,6 @@ BgBufferSync(WritebackContext *wb_context)
  *
  * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
  * after locking it, but we don't care all that much.)
- *
- * Note: caller must have done ResourceOwnerEnlargeBuffers.
  */
 static int
 SyncOneBuffer(int buf_id, bool 

Re: ResourceOwner refactoring

2021-03-08 Thread Ibrar Ahmed
On Mon, Jan 25, 2021 at 10:15 PM Robert Haas  wrote:

> On Thu, Jan 21, 2021 at 5:14 AM Heikki Linnakangas 
> wrote:
> > Here you can see that as numsnaps increases, the test becomes slower,
> > but then it becomes faster again at 64-66, when it switches to the hash
> > table. So 64 seems too much. 32 seems to be the sweet spot here, that's
> > where scanning the hash and scanning the array are about the same speed.
>
> That sounds good. I mean, it could be that not all hardware behaves
> the same here. But trying to get it into the right ballpark makes
> sense.
>
> I also like the fact that this now has some cases where it wins by a
> significant margin. That's pretty cool; thanks for working on it!
>
> --
> Robert Haas
> EDB: http://www.enterprisedb.com
>
>
>
The patchset does not apply successfully, there are some hunk failures.

http://cfbot.cputube.org/patch_32_2834.log

v6-0002-Make-resowners-more-easily-extensible.patch

1 out of 6 hunks FAILED -- saving rejects to file
src/backend/utils/cache/plancache.c.rej
2 out of 15 hunks FAILED -- saving rejects to file
src/backend/utils/resowner/resowner.c.rej


Can we get a rebase?

I am marking the patch "Waiting on Author"

-- 
Ibrar Ahmed


Re: ResourceOwner refactoring

2021-01-25 Thread Robert Haas
On Thu, Jan 21, 2021 at 5:14 AM Heikki Linnakangas  wrote:
> Here you can see that as numsnaps increases, the test becomes slower,
> but then it becomes faster again at 64-66, when it switches to the hash
> table. So 64 seems too much. 32 seems to be the sweet spot here, that's
> where scanning the hash and scanning the array are about the same speed.

That sounds good. I mean, it could be that not all hardware behaves
the same here. But trying to get it into the right ballpark makes
sense.

I also like the fact that this now has some cases where it wins by a
significant margin. That's pretty cool; thanks for working on it!

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: ResourceOwner refactoring

2021-01-21 Thread Heikki Linnakangas

On 21/01/2021 06:14, Michael Paquier wrote:

On Thu, Jan 21, 2021 at 12:11:37AM +0200, Heikki Linnakangas wrote:

Summary: In the the worst scenario, the patched version is still 24% slower
than unpatched. But many other scenarios are now faster with the patch.


Is there a reason explaining the sudden drop for numsnaps within the
[10,60] range?  The gap looks deeper with a low numkeep.


It's the switch from array to hash table. With the patch, the array 
holds 8 entries. Without the patch, it's 64 entries. So you see a drop 
around those points. I added more data points in that range to get a 
better picture:


LIFO tests:
 numkeep | numsnaps | unpatched | patched | ratio
-+--+---+-+---
   0 |1 |  32.8 |32.9 |  1.00
   0 |2 |  31.6 |32.8 |  1.04
   0 |4 |  32.7 |32.0 |  0.98
   0 |6 |  34.1 |33.9 |  0.99
   0 |8 |  35.1 |32.4 |  0.92
   0 |   10 |  34.0 |37.1 |  1.09
   0 |   15 |  33.1 |35.9 |  1.08
   0 |   20 |  33.0 |38.8 |  1.18
   0 |   25 |  32.9 |42.3 |  1.29
   0 |   30 |  32.9 |40.5 |  1.23
   0 |   35 |  33.1 |39.9 |  1.21
   0 |   40 |  33.0 |39.0 |  1.18
   0 |   45 |  35.3 |41.1 |  1.16
   0 |   50 |  33.0 |40.8 |  1.24
   0 |   55 |  32.8 |40.6 |  1.24
   0 |   58 |  33.0 |41.5 |  1.26
   0 |   60 |  33.1 |41.6 |  1.26
   0 |   62 |  32.8 |41.7 |  1.27
   0 |   64 |  46.8 |40.9 |  0.87
   0 |   66 |  47.0 |42.5 |  0.90
   0 |   68 |  47.1 |41.8 |  0.89
   0 |   70 |  47.8 |41.7 |  0.87
(22 rows)

FIFO tests:
 numkeep | numsnaps | unpatched | patched | ratio
-+--+---+-+---
   0 |1 |  32.3 |32.1 |  0.99
   0 |2 |  33.4 |31.6 |  0.95
   0 |4 |  34.0 |31.4 |  0.92
   0 |6 |  35.4 |33.2 |  0.94
   0 |8 |  34.8 |31.9 |  0.92
   0 |   10 |  35.4 |40.2 |  1.14
   0 |   15 |  35.4 |40.3 |  1.14
   0 |   20 |  35.6 |43.8 |  1.23
   0 |   25 |  35.4 |42.4 |  1.20
   0 |   30 |  36.0 |43.3 |  1.20
   0 |   35 |  36.4 |45.1 |  1.24
   0 |   40 |  36.9 |46.6 |  1.26
   0 |   45 |  37.7 |45.3 |  1.20
   0 |   50 |  37.2 |43.9 |  1.18
   0 |   55 |  38.4 |46.8 |  1.22
   0 |   58 |  37.6 |45.0 |  1.20
   0 |   60 |  37.7 |46.6 |  1.24
   0 |   62 |  38.4 |46.5 |  1.21
   0 |   64 |  48.7 |47.6 |  0.98
   0 |   66 |  48.2 |45.8 |  0.95
   0 |   68 |  48.5 |47.5 |  0.98
   0 |   70 |  48.4 |47.3 |  0.98
(22 rows)

Let's recap the behavior:

Without patch
-

For each different kind of resource, there's an array that holds up to 
64 entries. In ResourceOwnerForget(), the array is scanned linearly. If 
the array fills up, we replace the array with a hash table. After 
switching, all operations use the hash table.


With patch
--

There is one array that holds up to 8 entries. It is shared by all 
resources. In ResourceOwnerForget(), the array is always scanned.


If the array fills up, all the entries are moved to a hash table, 
freeing up space in the array, and the new entry is added to the array. 
So the array is used together with the hash table, like an LRU cache of 
the most recently remembered entries.



Why this change? I was afraid that now that all different resources 
share the same data structure, remembering e.g. a lot of locks at the 
beginning of a transaction would cause the switch to the hash table, 
making all subsequent remember/forget operations, like for buffer pins, 
slower. That kind of interference seems bad. By continuing to use the 
array for the recently-remembered entries, we avoid that problem. The 
[numkeep, numsnaps] = [65, 70] test is in that regime, and the patched 
version was significantly faster.


Because the array is now always scanned, I felt that it needs to be 
small, to avoid wasting much time scanning for entries that have already 
been moved to the hash table. That's why I made it just 8 entries.


Perhaps 8 entries is too small, after all? Linearly scanning an array is 
very fast. To test that, I bumped up RESOWNER_ARRAY_SIZE to 64, and ran 
the test again:


 numkeep | numsnaps | lifo_time_ns | fifo_time_ns
-+--+--+--
   0 |1 | 35.4 | 31.5
   0 |2 | 32.3 | 32.3
   0 |4 | 32.8 | 31.0
   0 |6 | 34.5 |   

Re: ResourceOwner refactoring

2021-01-20 Thread Michael Paquier
On Thu, Jan 21, 2021 at 12:11:37AM +0200, Heikki Linnakangas wrote:
> Summary: In the the worst scenario, the patched version is still 24% slower
> than unpatched. But many other scenarios are now faster with the patch.

Is there a reason explaining the sudden drop for numsnaps within the
[10,60] range?  The gap looks deeper with a low numkeep.
--
Michael


signature.asc
Description: PGP signature


RE: ResourceOwner refactoring

2021-01-20 Thread kuroda.hay...@fujitsu.com
Dear Heikki,

I tested in the same situation, and I confirmed that almost same results are 
returned.
(results are at the end of the e-mail)

You think that these results are acceptable
because resowners own many resources(more than 64) in general
and it's mainly faster in such a situation, isn't it?

I cannot distinguish correctly, but sounds good.

test result

unpatched

 numkeep | numsnaps | lifo_time_ns | fifo_time_ns 
-+--+--+--
   0 |1 | 68.5 | 69.9
   0 |5 | 73.2 | 76.8
   0 |   10 | 70.6 | 74.7
   0 |   60 | 68.0 | 75.6
   0 |   70 | 91.3 | 94.8
   0 |  100 | 89.0 | 89.1
   0 | 1000 | 97.9 | 98.9
   0 |1 |116.0 |115.9
   9 |   10 | 74.7 | 76.6
   9 |  100 | 80.8 | 80.1
   9 | 1000 | 86.0 | 86.2
   9 |1 |116.1 |116.8
  65 |   70 | 84.7 | 85.3
  65 |  100 | 80.5 | 80.3
  65 | 1000 | 86.3 | 86.2
  65 |1 |115.4 |115.9
(16 rows)

patched

 numkeep | numsnaps | lifo_time_ns | fifo_time_ns 
-+--+--+--
   0 |1 | 62.4 | 62.6
   0 |5 | 68.0 | 66.9
   0 |   10 | 73.6 | 78.1
   0 |   60 | 82.3 | 87.2
   0 |   70 | 83.0 | 89.1
   0 |  100 | 82.8 | 87.9
   0 | 1000 | 88.2 | 96.6
   0 |1 |119.6 |124.5
   9 |   10 | 62.0 | 62.8
   9 |  100 | 75.3 | 78.0
   9 | 1000 | 82.6 | 89.3
   9 |1 |116.6 |122.6
  65 |   70 | 66.7 | 66.4
  65 |  100 | 74.6 | 77.2
  65 | 1000 | 82.1 | 88.2
  65 |1 |118.0 |124.1
(16 rows)

Best Regards,
Hayato Kuroda
FUJITSU LIMITED



Re: ResourceOwner refactoring

2021-01-20 Thread Heikki Linnakangas

On 19/01/2021 11:09, Heikki Linnakangas wrote:

On 18/01/2021 18:11, Robert Haas wrote:

On Mon, Jan 18, 2021 at 11:11 AM Robert Haas  wrote:

On Mon, Jan 18, 2021 at 10:19 AM Heikki Linnakangas  wrote:

On 18/01/2021 16:34, Alvaro Herrera wrote:

So according to your performance benchmark, we're willing to accept a
30% performance loss on an allegedly common operation -- numkeep=0
numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
Maybe you can claim that these operations aren't exactly hot spots, and
so the fact that we remain in the same power-of-ten is sufficient.  Is
that the argument?


That's right. The fast path is fast, and that's important. The slow path
becomes 30% slower, but that's acceptable.


I don't know whether a 30% slowdown will hurt anybody, but it seems
like kind of a lot, and I'm not sure I understand what corresponding
benefit we're getting.


The benefit is to make it easy for extensions to use resource owners to
track whatever resources they need to track. And arguably, the new
mechanism is nicer for built-in code, too.

I'll see if I can optimize the slow paths, to make it more palatable.


Ok, here's a new set of patches, and new test results. I replaced the 
hash function with a cheaper one. I also added the missing wrappers that 
Alvaro and Hayato Kuroda commented on, and fixed the typos that Michael 
Paquier pointed out.


In the test script, I increased the number of iterations used in the 
tests, to make them run longer and produce more stable results. There is 
still a fair amount of jitter in the results, so take any particular 
number with a grain of salt, but the overall trend is repeatable.


The results now look like this:

Unpatched
-

postgres=# \i snaptest.sql
 numkeep | numsnaps | lifo_time_ns | fifo_time_ns
-+--+--+--
   0 |1 | 32.7 | 32.3
   0 |5 | 33.0 | 32.9
   0 |   10 | 34.1 | 35.5
   0 |   60 | 32.5 | 38.3
   0 |   70 | 47.6 | 48.9
   0 |  100 | 47.9 | 49.3
   0 | 1000 | 52.9 | 52.7
   0 |1 | 61.7 | 62.4
   9 |   10 | 38.4 | 37.6
   9 |  100 | 42.3 | 42.3
   9 | 1000 | 43.9 | 45.0
   9 |1 | 62.2 | 62.5
  65 |   70 | 42.4 | 42.9
  65 |  100 | 43.2 | 43.9
  65 | 1000 | 44.0 | 45.1
  65 |1 | 62.4 | 62.6
(16 rows)

Patched
---

postgres=# \i snaptest.sql
 numkeep | numsnaps | lifo_time_ns | fifo_time_ns
-+--+--+--
   0 |1 | 32.8 | 34.2
   0 |5 | 33.8 | 32.2
   0 |   10 | 37.2 | 40.2
   0 |   60 | 40.2 | 45.1
   0 |   70 | 40.9 | 48.4
   0 |  100 | 41.9 | 45.2
   0 | 1000 | 49.0 | 55.6
   0 |1 | 62.4 | 67.2
   9 |   10 | 33.6 | 33.0
   9 |  100 | 39.6 | 39.7
   9 | 1000 | 42.2 | 45.0
   9 |1 | 60.7 | 63.9
  65 |   70 | 32.5 | 33.6
  65 |  100 | 37.5 | 39.7
  65 | 1000 | 42.3 | 46.3
  65 |1 | 61.9 | 64.8
(16 rows)

For easier side-by-side comparison, here are the same numbers with the 
patched and unpatched results side by side, and their ratio (ratio < 1 
means the patched version is faster):


LIFO tests:
 numkeep | numsnaps | unpatched | patched | ratio
-+--+---+-+---
   0 |1 |  32.7 |32.8 |  1.00
   0 |5 |  33.0 |33.8 |  1.02
   0 |   10 |  34.1 |37.2 |  1.09
   0 |   60 |  32.5 |40.2 |  1.24
   0 |   70 |  47.6 |40.9 |  0.86
   0 |  100 |  47.9 |41.9 |  0.87
   0 | 1000 |  52.9 |49.0 |  0.93
   0 |1 |  61.7 |62.4 |  1.01
   9 |   10 |  38.4 |33.6 |  0.88
   9 |  100 |  42.3 |39.6 |  0.94
   9 | 1000 |  43.9 |42.2 |  0.96
   9 |1 |  62.2 |60.7 |  0.98
  65 |   70 |  42.4 |32.5 |  0.77
  65 |  100 |  43.2 |37.5 |  0.87
  65 | 1000 |  44.0 |42.3 |  0.96
  65 |1 |  62.4 |61.9 |  0.99
(16 rows)


FIFO tests:
 numkeep | numsnaps | unpatched | patched | ratio
-+--+---+-+---
   0 |1 |  32.3 |34.2 |  1.06
   0 |5 |  32.9 |32.2 |  0.98
   0 |   10 |  35.5 |40.2 |  1.13
  

Re: ResourceOwner refactoring

2021-01-19 Thread Heikki Linnakangas

On 18/01/2021 16:34, Alvaro Herrera wrote:

On 2021-Jan-18, Heikki Linnakangas wrote:


+static ResourceOwnerFuncs jit_funcs =
+{
+   /* relcache references */
+   .name = "LLVM JIT context",
+   .phase = RESOURCE_RELEASE_BEFORE_LOCKS,
+   .ReleaseResource = ResOwnerReleaseJitContext,
+   .PrintLeakWarning = ResOwnerPrintJitContextLeakWarning
+};


I think you mean jit_resowner_funcs here; "jit_funcs" is a bit
excessively vague.  Also, why did you choose not to define
ResourceOwnerRememberJIT?  You do that in other modules and it seems
better.


I did it in modules that had more than one ResourceOwnerRemeber/Forget 
call. Didn't seem worth it in functions like IncrTupleDescRefCount(), 
for example.


Hayato Kuroda also pointed that out, though. So perhaps it's better to 
be consistent, to avoid the confusion. I'll add the missing wrappers.


- Heikki




Re: ResourceOwner refactoring

2021-01-19 Thread Heikki Linnakangas

On 18/01/2021 18:11, Robert Haas wrote:

On Mon, Jan 18, 2021 at 11:11 AM Robert Haas  wrote:

On Mon, Jan 18, 2021 at 10:19 AM Heikki Linnakangas  wrote:

On 18/01/2021 16:34, Alvaro Herrera wrote:

So according to your performance benchmark, we're willing to accept a
30% performance loss on an allegedly common operation -- numkeep=0
numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
Maybe you can claim that these operations aren't exactly hot spots, and
so the fact that we remain in the same power-of-ten is sufficient.  Is
that the argument?


That's right. The fast path is fast, and that's important. The slow path
becomes 30% slower, but that's acceptable.


I don't know whether a 30% slowdown will hurt anybody, but it seems
like kind of a lot, and I'm not sure I understand what corresponding
benefit we're getting.


The benefit is to make it easy for extensions to use resource owners to 
track whatever resources they need to track. And arguably, the new 
mechanism is nicer for built-in code, too.


I'll see if I can optimize the slow paths, to make it more palatable.

- Heikki




Re: ResourceOwner refactoring

2021-01-18 Thread Michael Paquier
On Mon, Jan 18, 2021 at 02:22:33PM +0200, Heikki Linnakangas wrote:
> diff --git a/src/common/cryptohash_openssl.c b/src/common/cryptohash_openssl.c
> index 551ec392b60..642e72d8c0f 100644
> --- a/src/common/cryptohash_openssl.c
> +++ b/src/common/cryptohash_openssl.c
[...]
> +/* ResourceOwner callbacks to hold JitContexts  */

Slight copy-paste error here.

>   /*
>* Ensure, while the spinlock's not yet held, that there's a free
> -  * refcount entry.
> +  * refcount entry and that the resoure owner has room to remember the
> +  * pin.
s/resoure/resource/.
--
Michael


signature.asc
Description: PGP signature


Re: ResourceOwner refactoring

2021-01-18 Thread Robert Haas
On Mon, Jan 18, 2021 at 11:11 AM Robert Haas  wrote:
> On Mon, Jan 18, 2021 at 10:19 AM Heikki Linnakangas  wrote:
> > On 18/01/2021 16:34, Alvaro Herrera wrote:
> > > So according to your performance benchmark, we're willing to accept a
> > > 30% performance loss on an allegedly common operation -- numkeep=0
> > > numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
> > > Maybe you can claim that these operations aren't exactly hot spots, and
> > > so the fact that we remain in the same power-of-ten is sufficient.  Is
> > > that the argument?
> >
> > That's right. The fast path is fast, and that's important. The slow path
> > becomes 30% slower, but that's acceptable.

Sorry for the empty message.

I don't know whether a 30% slowdown will hurt anybody, but it seems
like kind of a lot, and I'm not sure I understand what corresponding
benefit we're getting.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: ResourceOwner refactoring

2021-01-18 Thread Robert Haas
On Mon, Jan 18, 2021 at 10:19 AM Heikki Linnakangas  wrote:
>
> On 18/01/2021 16:34, Alvaro Herrera wrote:
> > So according to your performance benchmark, we're willing to accept a
> > 30% performance loss on an allegedly common operation -- numkeep=0
> > numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
> > Maybe you can claim that these operations aren't exactly hot spots, and
> > so the fact that we remain in the same power-of-ten is sufficient.  Is
> > that the argument?
>
> That's right. The fast path is fast, and that's important. The slow path
> becomes 30% slower, but that's acceptable.
>
> - Heikki
>
>


-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: ResourceOwner refactoring

2021-01-18 Thread Heikki Linnakangas

On 18/01/2021 16:34, Alvaro Herrera wrote:

So according to your performance benchmark, we're willing to accept a
30% performance loss on an allegedly common operation -- numkeep=0
numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
Maybe you can claim that these operations aren't exactly hot spots, and
so the fact that we remain in the same power-of-ten is sufficient.  Is
that the argument?


That's right. The fast path is fast, and that's important. The slow path 
becomes 30% slower, but that's acceptable.


- Heikki




Re: ResourceOwner refactoring

2021-01-18 Thread Alvaro Herrera
On 2021-Jan-18, Heikki Linnakangas wrote:

> +static ResourceOwnerFuncs jit_funcs =
> +{
> + /* relcache references */
> + .name = "LLVM JIT context",
> + .phase = RESOURCE_RELEASE_BEFORE_LOCKS,
> + .ReleaseResource = ResOwnerReleaseJitContext,
> + .PrintLeakWarning = ResOwnerPrintJitContextLeakWarning
> +};

I think you mean jit_resowner_funcs here; "jit_funcs" is a bit
excessively vague.  Also, why did you choose not to define
ResourceOwnerRememberJIT?  You do that in other modules and it seems
better.

> +static ResourceOwnerFuncs catcache_funcs =
> +{
> + /* catcache references */
> + .name = "catcache reference",
> + .phase = RESOURCE_RELEASE_AFTER_LOCKS,
> + .ReleaseResource = ResOwnerReleaseCatCache,
> + .PrintLeakWarning = ResOwnerPrintCatCacheLeakWarning
> +};
> +
> +static ResourceOwnerFuncs catlistref_funcs =
> +{
> + /* catcache-list pins */
> + .name = "catcache list reference",
> + .phase = RESOURCE_RELEASE_AFTER_LOCKS,
> + .ReleaseResource = ResOwnerReleaseCatCacheList,
> + .PrintLeakWarning = ResOwnerPrintCatCacheListLeakWarning
> +};

Similar naming issue here, I say.


> diff --git a/src/common/cryptohash_openssl.c b/src/common/cryptohash_openssl.c
> index 551ec392b60..642e72d8c0f 100644
> --- a/src/common/cryptohash_openssl.c
> +++ b/src/common/cryptohash_openssl.c
> @@ -60,6 +59,21 @@ struct pg_cryptohash_ctx
>  #endif
>  };
>  
> +/* ResourceOwner callbacks to hold JitContexts  */
> +#ifndef FRONTEND
> +static void ResOwnerReleaseCryptoHash(Datum res);
> +static void ResOwnerPrintCryptoHashLeakWarning(Datum res);

The comment is wrong -- "Crypohashes", not "JitContexts".


So according to your performance benchmark, we're willing to accept a
30% performance loss on an allegedly common operation -- numkeep=0
numsnaps=10 becomes 49.8ns from 37.6ns.  That seems a bit shocking.
Maybe you can claim that these operations aren't exactly hot spots, and
so the fact that we remain in the same power-of-ten is sufficient.  Is
that the argument?

-- 
Álvaro Herrera39°49'30"S 73°17'W
"The Postgresql hackers have what I call a "NASA space shot" mentality.
 Quite refreshing in a world of "weekend drag racer" developers."
(Scott Marlowe)




Re: ResourceOwner refactoring

2021-01-18 Thread Heikki Linnakangas

On 18/01/2021 09:49, kuroda.hay...@fujitsu.com wrote:

Dear Heikki,

I apologize for sending again.


I will check another ResourceOwnerEnlarge() if I have a time.


I checked all ResourceOwnerEnlarge() types after applying patch 0001.
(searched by "grep -rI ResourceOwnerEnlarge")
No problem was found.


Great, thanks!

Here are more details on the performance testing I did:

Unpatched
--

postgres=# \i snaptest.sql
 numkeep | numsnaps | lifo_time_ns | fifo_time_ns
-+--+--+--
   0 |1 | 38.2 | 34.8
   0 |5 | 35.7 | 35.4
   0 |   10 | 37.6 | 37.6
   0 |   60 | 35.4 | 39.2
   0 |   70 | 55.0 | 53.8
   0 |  100 | 48.6 | 48.9
   0 | 1000 | 54.8 | 57.0
   0 |1 | 63.9 | 67.1
   9 |   10 | 39.3 | 38.8
   9 |  100 | 44.0 | 42.5
   9 | 1000 | 45.8 | 47.1
   9 |1 | 64.2 | 66.8
  65 |   70 | 45.7 | 43.7
  65 |  100 | 42.9 | 43.7
  65 | 1000 | 46.9 | 45.7
  65 |1 | 65.0 | 64.5
(16 rows)


Patched


postgres=# \i snaptest.sql
 numkeep | numsnaps | lifo_time_ns | fifo_time_ns
-+--+--+--
   0 |1 | 35.3 | 33.8
   0 |5 | 34.8 | 34.6
   0 |   10 | 49.8 | 51.4
   0 |   60 | 53.1 | 57.2
   0 |   70 | 53.2 | 59.6
   0 |  100 | 53.1 | 58.2
   0 | 1000 | 63.1 | 69.7
   0 |1 | 83.3 | 83.5
   9 |   10 | 35.0 | 35.1
   9 |  100 | 55.4 | 54.1
   9 | 1000 | 56.8 | 60.3
   9 |1 | 88.6 | 82.0
  65 |   70 | 36.4 | 35.1
  65 |  100 | 52.4 | 53.0
  65 | 1000 | 55.8 | 59.4
  65 |1 | 79.3 | 80.3
(16 rows)

About the test:

Each test call Register/UnregisterSnapshot in a loop. numsnaps is the 
number of snapshots that are registered in each iteration. For exmaple, 
on the second line with numkeep=0 and numnaps=5, each iteration in the 
LIFO test does essentially:


rs[0] = RegisterSnapshot()
rs[1] = RegisterSnapshot()
rs[2] = RegisterSnapshot()
rs[3] = RegisterSnapshot()
rs[4] = RegisterSnapshot()
UnregisterSnapshot(rs[4]);
UnregisterSnapshot(rs[3]);
UnregisterSnapshot(rs[2]);
UnregisterSnapshot(rs[1]);
UnregisterSnapshot(rs[0]);

In the FIFO tests, the Unregister calls are made in reverse order.

When numkeep is non zero, that many snapshots are registered once at the 
beginning of the test, and released only after the test loop. So this 
simulates the scenario that you remember a bunch of resources and hold 
them for a long time, and while holding them, you do many more 
remember/forget calls.


Based on this test, this patch makes things slightly slower overall. 
However, *not* in the cases that matter the most I believe. That's the 
cases where the (numsnaps - numkeep) is small, so that the hot action 
happens in the array and the hashing is not required. In my testing 
earlier, about 95% of the ResourceOwnerRemember calls in the regression 
tests fall into that category.


There are a few simple things we could do speed this up, if needed. A 
different hash function might be cheaper, for example. And we could 
inline the fast paths of the ResourceOwnerEnlarge and 
ResourceOwnerRemember() into the callers. But I didn't do those things 
as part of this patch, because it wouldn't be a fair comparison; we 
could do those with the old implementation too. And unless we really 
need the speed, it's more readable this way.


I'm OK with these results. Any objections?

Attached are the patches again. Same as previous version, except for a 
couple of little comment changes. And a third patch containing the 
source for the performance test.


- Heikki
>From ca5ef92d30d6d2d77e6758da3ae60d075451d5c1 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 13 Jan 2021 12:21:28 +0200
Subject: [PATCH v5 1/2] Move a few ResourceOwnerEnlarge() calls for safety and
 clarity.

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code
inbetween, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In 

RE: ResourceOwner refactoring

2021-01-17 Thread kuroda.hay...@fujitsu.com
Dear Heikki,

I apologize for sending again.

> I will check another ResourceOwnerEnlarge() if I have a time.

I checked all ResourceOwnerEnlarge() types after applying patch 0001.
(searched by "grep -rI ResourceOwnerEnlarge")
No problem was found.

Hayato Kuroda
FUJITSU LIMITED



RE: ResourceOwner refactoring

2021-01-15 Thread kuroda.hay...@fujitsu.com
Dear Heikki,

> Hmm. ResOwnerReleaseTupleDesc() does exist, those functions are needed 
> for the callbacks. I think you meant the wrappers around 
> ResourceOwnerRemember and ResourceOwnerForget, like 
> ResourceOwnerRememberCatCacheRef(). I admit those are not fully 
> consistent: I didn't bother with the wrapper functions when there is 
> only one caller.

Yeah, I meant it. And I prefer your policy.

> Hmm. ResOwnerReleaseTupleDesc() does exist, those functions are needed 
> for the callbacks. I think you meant the wrappers around 
> ResourceOwnerRemember and ResourceOwnerForget, like 
> ResourceOwnerRememberCatCacheRef(). I admit those are not fully 
> consistent: I didn't bother with the wrapper functions when there is 
> only one caller.

Good job. I confirmed your fixes, and I confirmed it looks fine.
I will check another ResourceOwnerEnlarge() if I have a time.

> I've been working on performance testing too.

I'm looking forward to seeing it.

Best regards,
Hayato Kuroda
FUJITSU LIMITED



Re: ResourceOwner refactoring

2021-01-14 Thread Heikki Linnakangas

On 14/01/2021 12:15, kuroda.hay...@fujitsu.com wrote:

I put some comments.


Thanks for the review!


Throughout, some components don’t have helper functions.
(e.g. catcache has ResOwnerReleaseCatCache, but tupdesc doesn't.)
I think it should be unified.


Hmm. ResOwnerReleaseTupleDesc() does exist, those functions are needed 
for the callbacks. I think you meant the wrappers around 
ResourceOwnerRemember and ResourceOwnerForget, like 
ResourceOwnerRememberCatCacheRef(). I admit those are not fully 
consistent: I didn't bother with the wrapper functions when there is 
only one caller.



[catcache.c]

+/* support for catcache refcount management */
+static inline void
+ResourceOwnerEnlargeCatCacheRefs(ResourceOwner owner)
+{
+   ResourceOwnerEnlarge(owner);
+}


This function is not needed.


Removed.


[syscache.c]

-static CatCache *SysCache[SysCacheSize];
+ CatCache *SysCache[SysCacheSize];


Is it right? Compilation is done even if this variable is static...


Fixed. (It was a leftover from when I was playing with Kyotaro's 
"catcachebench" tool from another thread).



[fd.c, dsm.c]
In these files helper functions are implemented as the define directive.
Could you explain the reason? For the performance?


No particular reason. I turned them all into macros for consistency.


Previously, this was OK, because resources AAA and BBB were kept in
separate arrays. But after this patch, it's not guaranteed that the
ResourceOwnerRememberBBB() will find an empty slot.

I don't think we do that currently, but to be sure, I'm planning to grep
ResourceOwnerRemember and look at each call carefullly. And perhaps we
can add an assertion for this, although I'm not sure where.


Indeed, but I think this line works well, isn't it?


Assert(owner->narr < RESOWNER_ARRAY_SIZE);


That catches cases where you actually overrun the array, but it doesn't 
catch unsafe patterns when there happens to be enough space left in the 
array. For example, if you have code like this:


/* Make sure there's room for one more entry, but remember *two* things */
ResourceOwnerEnlarge();
ResourceOwnerRemember(foo);
ResourceOwnerRemember(bar);

That is not safe, but it would only fail the assertion if the first 
ResourceOwnerRemember() call happens to consume the last remaining slot 
in the array.



I checked all the callers of ResourceOwnerEnlarge() to see if they're 
safe. A couple of places seemed a bit suspicious. I fixed them by moving 
the ResourceOwnerEnlarge() calls closer to the ResourceOwnerRemember() 
calls, so that it's now easier to see that they are correct. See first 
attached patch.


The second patch is an updated version of the main patch, fixing all the 
little things you and Michael pointed out since the last patch version.


I've been working on performance testing too. I'll post more numbers 
later, but preliminary result from some micro-benchmarking suggests that 
the new code is somewhat slower, except in the common case that the 
object to remember and forget fits in the array. When running the 
regression test suite, about 96% of ResourceOwnerForget() calls fit in 
the array. I think that's acceptable.


- Heikki
>From 915ef70c8e227fe1c9d403bf6414d54b6673ddae Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 13 Jan 2021 12:21:28 +0200
Subject: [PATCH v4 1/2] Move a few ResourceOwnerEnlarge() calls for safety and
 clarity.

These are functions where quite a lot of things happen between the
ResourceOwnerEnlarge and ResourceOwnerRemember calls. It's important that
there are no unrelated ResourceOwnerRemember() calls in the code
inbetween, otherwise the entry reserved by the ResourceOwnerEnlarge() call
might be used up by the intervening ResourceOwnerRemember() and not be
available at the intended ResourceOwnerRemember() call anymore. The longer
the code path between them is, the harder it is to verify that.

In bufmgr.c, there is a function similar to ResourceOwnerEnlarge(),
to ensure that the private refcount array has enough space. The
ReservePrivateRefCountEntry() calls, analogous to ResourceOwnerEnlarge(),
were made at different places than the ResourceOwnerEnlarge() calls.
Move the ResourceOwnerEnlarge() calls together with the
ReservePrivateRefCountEntry() calls for consistency.
---
 src/backend/storage/buffer/bufmgr.c   | 39 +++
 src/backend/storage/buffer/localbuf.c |  3 +++
 src/backend/utils/cache/catcache.c| 13 ++---
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 561c212092f..cde869e7d64 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -738,9 +738,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	*hit = false;
 
-	/* Make sure we will have room to remember the buffer pin */
-	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
-
 	isExtend = (blockNum == P_NEW);
 
 	

RE: ResourceOwner refactoring

2021-01-14 Thread kuroda.hay...@fujitsu.com
Hi,

I put some comments.

Throughout, some components don’t have helper functions.
(e.g. catcache has ResOwnerReleaseCatCache, but tupdesc doesn't.)
I think it should be unified.

[catcache.c]
> +/* support for catcache refcount management */
> +static inline void
> +ResourceOwnerEnlargeCatCacheRefs(ResourceOwner owner)
> +{
> +   ResourceOwnerEnlarge(owner);
> +}

This function is not needed.

[syscache.c]
> -static CatCache *SysCache[SysCacheSize];
> + CatCache *SysCache[SysCacheSize];

Is it right? Compilation is done even if this variable is static...

[fd.c, dsm.c]
In these files helper functions are implemented as the define directive.
Could you explain the reason? For the performance?

> Previously, this was OK, because resources AAA and BBB were kept in 
> separate arrays. But after this patch, it's not guaranteed that the 
> ResourceOwnerRememberBBB() will find an empty slot.
>
> I don't think we do that currently, but to be sure, I'm planning to grep 
> ResourceOwnerRemember and look at each call carefullly. And perhaps we 
> can add an assertion for this, although I'm not sure where.

Indeed, but I think this line works well, isn't it?

>   Assert(owner->narr < RESOWNER_ARRAY_SIZE);

Best regards,
Hayato Kuroda
FUJITSU LIMITED



RE: ResourceOwner refactoring

2021-01-12 Thread kuroda.hay...@fujitsu.com
Dear Heikki,

Thank you for rebasing it, I confirmed it can be applied.
I will check the source.

Now I put the very elementary comment.
ResourceOwnerEnlarge(), ResourceOwnerRemember(), and ResourceOwnerForget()
are exported routines.
They should put below L418.

Best regards,
Hayato Kuroda
FUJITSU LIMITED



Re: ResourceOwner refactoring

2021-01-12 Thread Michael Paquier
On Wed, Jan 13, 2021 at 09:18:57AM +0200, Heikki Linnakangas wrote:
> --- a/src/common/cryptohash_openssl.c
> +++ b/src/common/cryptohash_openssl.c
> +static ResourceOwnerFuncs cryptohash_funcs =
> +{
> + /* relcache references */
> + .name = "LLVM JIT context",
> + .phase = RESOURCE_RELEASE_BEFORE_LOCKS,
> + .ReleaseResource = ResOwnerReleaseCryptoHash,
> + .PrintLeakWarning = ResOwnerPrintCryptoHashLeakWarning,
> +};
> +#endif

Looks like a copy-paste error here.
--
Michael


signature.asc
Description: PGP signature


Re: ResourceOwner refactoring

2021-01-12 Thread Heikki Linnakangas

On 13/01/2021 03:55, kuroda.hay...@fujitsu.com wrote:

Dear Heikki,

I'm also interested in this patch, but it cannot be applied to the current 
HEAD...

$ git apply ~/v2-0001-Make-resowners-more-easily-extensible.patch
error: patch failed: src/common/cryptohash_openssl.c:57
error: src/common/cryptohash_openssl.c: patch does not apply
error: patch failed: src/include/utils/resowner_private.h:1
error: src/include/utils/resowner_private.h: patch does not apply


Here's a rebased version. Thanks!

- Heikki
>From 31b1b4661823cf38b2d4c5931f96c477b6441271 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 16 Dec 2020 17:26:03 +0200
Subject: [PATCH v3 1/1] Make resowners more easily extensible.

Use a single array and hash, instead of one for each object kind.
---
 src/backend/access/common/tupdesc.c   |   42 +-
 src/backend/jit/jit.c |2 -
 src/backend/jit/llvm/llvmjit.c|   40 +-
 src/backend/storage/buffer/bufmgr.c   |   43 +-
 src/backend/storage/buffer/localbuf.c |2 +-
 src/backend/storage/file/fd.c |   44 +-
 src/backend/storage/ipc/dsm.c |   44 +-
 src/backend/storage/lmgr/lock.c   |2 +-
 src/backend/utils/cache/catcache.c|   98 ++-
 src/backend/utils/cache/plancache.c   |   50 +-
 src/backend/utils/cache/relcache.c|   39 +-
 src/backend/utils/cache/syscache.c|2 +-
 src/backend/utils/resowner/README |   19 +-
 src/backend/utils/resowner/resowner.c | 1139 +++--
 src/backend/utils/time/snapmgr.c  |   38 +-
 src/common/cryptohash_openssl.c   |   45 +-
 src/include/storage/buf_internals.h   |   15 +
 src/include/utils/catcache.h  |3 -
 src/include/utils/plancache.h |2 +
 src/include/utils/resowner.h  |   41 +-
 src/include/utils/resowner_private.h  |  105 ---
 21 files changed, 795 insertions(+), 1020 deletions(-)
 delete mode 100644 src/include/utils/resowner_private.h

diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 902f59440cd..7fa44d5f7ee 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -29,9 +29,21 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
-#include "utils/resowner_private.h"
+#include "utils/resowner.h"
 #include "utils/syscache.h"
 
+/* ResourceOwner callbacks to hold tupledesc references  */
+static void ResOwnerReleaseTupleDesc(Datum res);
+static void ResOwnerPrintTupleDescLeakWarning(Datum res);
+
+static ResourceOwnerFuncs tupdesc_resowner_funcs =
+{
+	/* relcache references */
+	.name = "tupdesc reference",
+	.phase = RESOURCE_RELEASE_AFTER_LOCKS,
+	.ReleaseResource = ResOwnerReleaseTupleDesc,
+	.PrintLeakWarning = ResOwnerPrintTupleDescLeakWarning
+};
 
 /*
  * CreateTemplateTupleDesc
@@ -376,9 +388,10 @@ IncrTupleDescRefCount(TupleDesc tupdesc)
 {
 	Assert(tupdesc->tdrefcount >= 0);
 
-	ResourceOwnerEnlargeTupleDescs(CurrentResourceOwner);
+	ResourceOwnerEnlarge(CurrentResourceOwner);
 	tupdesc->tdrefcount++;
-	ResourceOwnerRememberTupleDesc(CurrentResourceOwner, tupdesc);
+	ResourceOwnerRemember(CurrentResourceOwner, PointerGetDatum(tupdesc),
+		  _resowner_funcs);
 }
 
 /*
@@ -394,7 +407,8 @@ DecrTupleDescRefCount(TupleDesc tupdesc)
 {
 	Assert(tupdesc->tdrefcount > 0);
 
-	ResourceOwnerForgetTupleDesc(CurrentResourceOwner, tupdesc);
+	ResourceOwnerForget(CurrentResourceOwner, PointerGetDatum(tupdesc),
+		_resowner_funcs);
 	if (--tupdesc->tdrefcount == 0)
 		FreeTupleDesc(tupdesc);
 }
@@ -925,3 +939,23 @@ BuildDescFromLists(List *names, List *types, List *typmods, List *collations)
 
 	return desc;
 }
+
+
+/*
+ * ResourceOwner callbacks
+ */
+static void
+ResOwnerReleaseTupleDesc(Datum res)
+{
+	DecrTupleDescRefCount((TupleDesc) DatumGetPointer(res));
+}
+
+static void
+ResOwnerPrintTupleDescLeakWarning(Datum res)
+{
+	TupleDesc tupdesc = (TupleDesc) DatumGetPointer(res);
+
+	elog(WARNING,
+		 "TupleDesc reference leak: TupleDesc %p (%u,%d) still referenced",
+		 tupdesc, tupdesc->tdtypeid, tupdesc->tdtypmod);
+}
diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c
index 2da300e000d..1aa04d173b4 100644
--- a/src/backend/jit/jit.c
+++ b/src/backend/jit/jit.c
@@ -26,7 +26,6 @@
 #include "jit/jit.h"
 #include "miscadmin.h"
 #include "utils/fmgrprotos.h"
-#include "utils/resowner_private.h"
 
 /* GUCs */
 bool		jit_enabled = true;
@@ -140,7 +139,6 @@ jit_release_context(JitContext *context)
 	if (provider_successfully_loaded)
 		provider.release_context(context);
 
-	ResourceOwnerForgetJIT(context->resowner, PointerGetDatum(context));
 	pfree(context);
 }
 
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index b0789a5fb80..64c7fd92bc7 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -40,7 +40,7 @@
 #include "portability/instr_time.h"
 #include "storage/ipc.h"
 #include "utils/memutils.h"
-#include "utils/resowner_private.h"
+#include 

RE: ResourceOwner refactoring

2021-01-12 Thread kuroda.hay...@fujitsu.com
Dear Heikki, 

I'm also interested in this patch, but it cannot be applied to the current 
HEAD...

$ git apply ~/v2-0001-Make-resowners-more-easily-extensible.patch
error: patch failed: src/common/cryptohash_openssl.c:57
error: src/common/cryptohash_openssl.c: patch does not apply
error: patch failed: src/include/utils/resowner_private.h:1
error: src/include/utils/resowner_private.h: patch does not apply

Best Regards,
Hayato Kuroda
FUJITSU LIMITED



Re: ResourceOwner refactoring

2020-12-16 Thread Heikki Linnakangas

On 26/11/2020 10:52, Kyotaro Horiguchi wrote:

+   for (int i = 0; i < owner->narr; i++)
{
+   if (owner->arr[i].kind->phase == phase)
+   {
+   Datum   value = owner->arr[i].item;
+   ResourceOwnerFuncs *kind = owner->arr[i].kind;
+
+   if (printLeakWarnings)
+   kind->PrintLeakWarning(value);
+   kind->ReleaseResource(value);
+   found = true;
+   }
+   /*
+* If any resources were released, check again because some of 
the
+* elements might have moved by the callbacks. We don't want to
+* miss them.
+*/
+   } while (found && owner->narr > 0);

Coundn't that missing be avoided by just not incrementing i if found?


Hmm, perhaps. ResourceOwnerForget() can move an entry from the end of 
the array to the beginning, but if it's removing the entry that we're 
just looking at, it probably can't move anything before that entry. I'm 
not very comfortable with that, though. What if the callback releases 
something else as a side effect?


This isn't super-critical for performance, and given that the array is 
very small, it's very cheap to loop through it. So I'm inclined to keep 
it safe.



+   /*
+* Like with the array, we must check again after we reach the
+* end, if any callbacks were called. XXX: We could probably
+* stipulate that the callbacks may not do certain thing, like
+* remember more references in the same resource owner, to avoid
+* that.
+*/

If I read this patch correctly, ResourceOwnerForget doesn't seem to do
such a thing for hash?


Hmm, true. I tried removing the loop and hit another issue, however: if 
the same resource has been remembered twice in the same resource owner, 
the callback might remove different reference to it than what we're 
looking at. So we need to loop anyway to handle that. Also, what if the 
callback remembers some other resource in the same resowner, causing the 
hash to grow? I'm not sure if any of the callbacks currently do that, 
but better safe than sorry. I updated the code and the comments accordingly.



Here's an updated version of this patch. I fixed the bit rot, and 
addressed all the other comment issues and such that people pointed out 
(thanks!).


TODO:

1. Performance testing. We discussed this a little bit, but I haven't 
done any more testing.


2. Before this patch, the ResourceOwnerEnlarge() calls enlarged the 
array for the particular "kind" of resource, but now they are all stored 
in the same structure. That could lead to trouble if we do something 
like this:


ResourceOwnerEnlargeAAA()
ResourceOwnerEnlargeBBB()
ResourceOwnerRememberAAA()
ResourceOwnerRememberBBB()

Previously, this was OK, because resources AAA and BBB were kept in 
separate arrays. But after this patch, it's not guaranteed that the 
ResourceOwnerRememberBBB() will find an empty slot.


I don't think we do that currently, but to be sure, I'm planning to grep 
ResourceOwnerRemember and look at each call carefullly. And perhaps we 
can add an assertion for this, although I'm not sure where.


- Heikki
>From 71a372b9a82a5b50a40386b403d256b4f6fac794 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas 
Date: Wed, 16 Dec 2020 17:26:03 +0200
Subject: [PATCH v2 1/1] Make resowners more easily extensible.

Use a single array and hash, instead of one for each object kind.
---
 src/backend/access/common/tupdesc.c   |   42 +-
 src/backend/jit/jit.c |2 -
 src/backend/jit/llvm/llvmjit.c|   40 +-
 src/backend/storage/buffer/bufmgr.c   |   43 +-
 src/backend/storage/buffer/localbuf.c |2 +-
 src/backend/storage/file/fd.c |   44 +-
 src/backend/storage/ipc/dsm.c |   44 +-
 src/backend/storage/lmgr/lock.c   |2 +-
 src/backend/utils/cache/catcache.c|   98 ++-
 src/backend/utils/cache/plancache.c   |   50 +-
 src/backend/utils/cache/relcache.c|   39 +-
 src/backend/utils/cache/syscache.c|2 +-
 src/backend/utils/resowner/README |   19 +-
 src/backend/utils/resowner/resowner.c | 1139 +++--
 src/backend/utils/time/snapmgr.c  |   38 +-
 src/common/cryptohash_openssl.c   |   45 +-
 src/include/storage/buf_internals.h   |   15 +
 src/include/utils/catcache.h  |3 -
 src/include/utils/plancache.h |2 +
 src/include/utils/resowner.h  |   41 +-
 src/include/utils/resowner_private.h  |  105 ---
 21 files changed, 795 insertions(+), 1020 deletions(-)
 delete mode 100644 src/include/utils/resowner_private.h

diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 30c30cf3a2e..c099f04f551 

Re: ResourceOwner refactoring

2020-11-26 Thread Kyotaro Horiguchi
At Thu, 19 Nov 2020 17:27:18 +0800, Julien Rouhaud  wrote 
in 
> On Thu, Nov 19, 2020 at 10:16 AM Michael Paquier  wrote:
> >
> > On Wed, Nov 18, 2020 at 10:50:08AM +0200, Heikki Linnakangas wrote:
> > > If RESOWNER_ARRAY_STATS is increased to 16, all the lookups fit in the
> > > array. But I haven't done any benchmarking to see which is faster.
> >
> > My gut tells me that your guess is right, but it would be better to be
> > sure.
> >
> > > BTW, I think there would be an easy win in the hashing codepath, by 
> > > changing
> > > to a cheaper hash function. Currently, with or without this patch, we use
> > > hash_any(). Changing that to murmurhash32() or something similar would be 
> > > a
> > > drop-in replacement.
> >
> > Good idea.
> 
> +1, and +1 for this refactoring.

+1 for making the interface more generic. I thought a similar thing
when I added an resowner array for WaitEventSet (not committed yet).
About performance, though I'm not sure about, there's no reason not to
do this as far as the resowner mechanism doesn't get slower, and +2 if
gets faster.

> I just saw a minor issue in a comment while reviewing the patch:
> 
> [...]
> + /* Is there space in the hash? If not, enlarge it. */
> 
>   /* Double the capacity of the array (capacity must stay a power of 2!) */
> - newcap = (oldcap > 0) ? oldcap * 2 : RESARRAY_INIT_SIZE;
> [...]
> 
> The existing comment is kept as-is, but it should mention that it's
> now the hash capacity that is increased.

+   /* And release old array. */
+   pfree(oldhash);

:p


+   for (int i = 0; i < owner->narr; i++)
{
+   if (owner->arr[i].kind->phase == phase)
+   {
+   Datum   value = owner->arr[i].item;
+   ResourceOwnerFuncs *kind = owner->arr[i].kind;
+
+   if (printLeakWarnings)
+   kind->PrintLeakWarning(value);
+   kind->ReleaseResource(value);
+   found = true;
+   }
+   /*
+* If any resources were released, check again because some of 
the
+* elements might have moved by the callbacks. We don't want to
+* miss them.
+*/
+   } while (found && owner->narr > 0);

Coundn't that missing be avoided by just not incrementing i if found?


+   /*
+* Like with the array, we must check again after we reach the
+* end, if any callbacks were called. XXX: We could probably
+* stipulate that the callbacks may not do certain thing, like
+* remember more references in the same resource owner, to avoid
+* that.
+*/

If I read this patch correctly, ResourceOwnerForget doesn't seem to do
such a thing for hash?

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: ResourceOwner refactoring

2020-11-25 Thread Michael Paquier
On Thu, Nov 19, 2020 at 06:40:19PM +0800, Craig Ringer wrote:
> [off-list for now]

Looks like you have missed something here.
--
Michael


signature.asc
Description: PGP signature


Re: ResourceOwner refactoring

2020-11-19 Thread Craig Ringer
[off-list for now]

On Tue, Nov 17, 2020 at 10:21 PM Heikki Linnakangas  wrote:

>
> 2. It's difficult for extensions to use. There is a callback mechanism
> for extensions, but it's much less convenient to use than the built-in
> functions. The pgcrypto code uses the callbacks currently, and Michael's
> patch [2] would move that support for tracking OpenSSL contexts to the
> core, which makes it a lot more convenient for pgcrypto. Wouldn't it be
> nice if extensions could have the same ergonomics as built-in code, if
> they need to track external resources?
>

Yes, in general I'm a big fan of making life easier for extensions (see
postscript), and this looks helpful. It's certainly fairly clear to read.
I'm in-principle in favour, though I haven't read the old resowner code
closely enough to have a strong opinion.

I haven't done thorough performance testing of this, but with some quick
> testing with Kyotaro's "catcachebench" to stress-test syscache lookups,
> this performs a little better. In that test, all the activity happens in
> the small array portion, but I believe that's true for most use cases.
>
> Thoughts? Can anyone suggest test scenarios to verify the performance of
> this?
>

I didn't think of much really.

I have tossed together a patch on top of yours that adds some
systemtap/dtrace tracepoints and provides a demo systemtap script that
shows some basic stats collection done using them. My intention was to make
it easier to see where the work in the resource owner code was being done.
But after I did the initial version I realised that in this case it
probably doesn't add a lot of value over using targeted profiling of only
functions in resowner.c and their callees which is easy enough using perf
and regular DWARF debuginfo. So I didn't land up polishing it and adding
stats for the other counters. It's attached anyway, in case it's
interesting or useful to anyone.

P.S. At some point I want to tackle some things similar to what you've done
here for other kinds of backend resources. In particular, to allow
extensions to register their own heavyweight lock types - with the ability
to handle lock timeouts, and add callbacks in the deadlock detector to
allow extensions to register dependency edges that are not natively visible
to the deadlock detector and to choose victim priorities. Also some
enhancements to how pg_depend tracking can be used by extensions. And I
want to help get the proposed custom ProcSignal changes in too. I think
there's a whole lot to be done to make extensions easier and safer to
author in general, like providing a simple way to do error trapping and
recovery in an extension mainloop without copy/pasting a bunch of
PostgresMain code, better default signal handlers, startup/shutdown that
shares more with user backends, etc. Right now it's quite tricky to get
bgworkers to behave well. 
From 9f3e6978b1bbfeb8ca3f1cac42e86c4731143404 Mon Sep 17 00:00:00 2001
From: Craig Ringer 
Date: Wed, 18 Nov 2020 14:30:08 +0800
Subject: [PATCH] Poc of systemtap probes for resowner timings

---
 resowner.stp  | 106 ++
 src/backend/utils/probes.d|  22 ++
 src/backend/utils/resowner/resowner.c |  96 +--
 3 files changed, 216 insertions(+), 8 deletions(-)
 create mode 100644 resowner.stp

diff --git a/resowner.stp b/resowner.stp
new file mode 100644
index 00..24b37b39e6
--- /dev/null
+++ b/resowner.stp
@@ -0,0 +1,106 @@
+# PATH="$(dirname $(realpath $(find build/ -name postgres -type f))):$PATH" /usr/local/systemtap/bin/stap -v ./resowner.stp
+
+/*
+.mark("resowner__created") $arg1:long $arg2:long
+.mark("resowner__delete__done") $arg1:long $arg2:long
+.mark("resowner__delete__start") $arg1:long $arg2:long
+.mark("resowner__enlarge__done") $arg1:long $arg2:long $arg3:long
+.mark("resowner__enlarge__skipped") $arg1:long $arg2:long
+.mark("resowner__enlarge__start") $arg1:long $arg2:long $arg3:long $arg4:long
+.mark("resowner__forget__done") $arg1:long $arg2:long $arg3:long $arg4:long $arg5:long
+.mark("resowner__forget__start") $arg1:long $arg2:long $arg3:long $arg4:long
+.mark("resowner__new__parent__done") $arg1:long
+.mark("resowner__new__parent__start") $arg1:long $arg2:long $arg3:long
+.mark("resowner__release__all__done") $arg1:long $arg2:long $arg3:long
+.mark("resowner__release__all__hash")
+.mark("resowner__release__all__start") $arg1:long $arg2:long $arg3:long $arg4:long $arg5:long
+.mark("resowner__release__done") $arg1:long $arg2:long
+.mark("resowner__release__plancacherefs__done") $arg1:long $arg2:long
+.mark("resowner__release__plancacherefs__start") $arg1:long $arg2:long
+.mark("resowner__release__start") $arg1:long $arg2:long $arg3:long $arg4:long $arg5:long
+.mark("resowner__remembered") $arg1:long $arg2:long $arg3:long $arg4:long
+*/
+
+@define probe_prefix %( sprintf("[%6d] %20s %20s %8x %20s", pid(), application_names[pid()], $$name, owner, owner_name) %)
+
+probe pg = 

Re: ResourceOwner refactoring

2020-11-19 Thread Julien Rouhaud
On Thu, Nov 19, 2020 at 10:16 AM Michael Paquier  wrote:
>
> On Wed, Nov 18, 2020 at 10:50:08AM +0200, Heikki Linnakangas wrote:
> > If RESOWNER_ARRAY_STATS is increased to 16, all the lookups fit in the
> > array. But I haven't done any benchmarking to see which is faster.
>
> My gut tells me that your guess is right, but it would be better to be
> sure.
>
> > BTW, I think there would be an easy win in the hashing codepath, by changing
> > to a cheaper hash function. Currently, with or without this patch, we use
> > hash_any(). Changing that to murmurhash32() or something similar would be a
> > drop-in replacement.
>
> Good idea.

+1, and +1 for this refactoring.

I just saw a minor issue in a comment while reviewing the patch:

[...]
+ /* Is there space in the hash? If not, enlarge it. */

  /* Double the capacity of the array (capacity must stay a power of 2!) */
- newcap = (oldcap > 0) ? oldcap * 2 : RESARRAY_INIT_SIZE;
[...]

The existing comment is kept as-is, but it should mention that it's
now the hash capacity that is increased.




Re: ResourceOwner refactoring

2020-11-18 Thread Michael Paquier
On Wed, Nov 18, 2020 at 10:50:08AM +0200, Heikki Linnakangas wrote:
> If RESOWNER_ARRAY_STATS is increased to 16, all the lookups fit in the
> array. But I haven't done any benchmarking to see which is faster.

My gut tells me that your guess is right, but it would be better to be
sure.

> BTW, I think there would be an easy win in the hashing codepath, by changing
> to a cheaper hash function. Currently, with or without this patch, we use
> hash_any(). Changing that to murmurhash32() or something similar would be a
> drop-in replacement.

Good idea.
--
Michael


signature.asc
Description: PGP signature


Re: ResourceOwner refactoring

2020-11-18 Thread Heikki Linnakangas

On 18/11/2020 10:06, Michael Paquier wrote:

On Tue, Nov 17, 2020 at 04:21:29PM +0200, Heikki Linnakangas wrote:

Attached patch refactors the ResourceOwner internals to do that.


+ * Size of the small fixed-size array to hold most-recently remembered 
resources.
   */
-#define RESARRAY_INIT_SIZE 16
+#define RESOWNER_ARRAY_SIZE 8
Why did you choose this size for the initial array?


Just a guess. The old init size was 16 Datums. The entries in the new 
array are twice as large, Datum+pointer.


The "RESOWNER_STATS" #ifdef blocks can be enabled to check how many 
lookups fit in the array. With pgbench, RESOWNER_ARRAY_SIZE 8:


RESOWNER STATS: lookups: array 235, hash 32

If RESOWNER_ARRAY_STATS is increased to 16, all the lookups fit in the 
array. But I haven't done any benchmarking to see which is faster.


BTW, I think there would be an easy win in the hashing codepath, by 
changing to a cheaper hash function. Currently, with or without this 
patch, we use hash_any(). Changing that to murmurhash32() or something 
similar would be a drop-in replacement.


- Heikki




Re: ResourceOwner refactoring

2020-11-18 Thread Michael Paquier
On Tue, Nov 17, 2020 at 04:21:29PM +0200, Heikki Linnakangas wrote:
> 2. It's difficult for extensions to use. There is a callback mechanism for
> extensions, but it's much less convenient to use than the built-in
> functions. The pgcrypto code uses the callbacks currently, and Michael's
> patch [2] would move that support for tracking OpenSSL contexts to the core,
> which makes it a lot more convenient for pgcrypto. Wouldn't it be nice if
> extensions could have the same ergonomics as built-in code, if they need to
> track external resources?

+1.  True that the current interface is a bit hard to grasp, one can
easily miss that showing reference leaks is very important if an
allocation happens out of the in-core palloc machinery at commit time.
(The patch you are referring to is not really popular, but that does
not prevent this thread to move on on its own.)

> Attached patch refactors the ResourceOwner internals to do that.

+ * Size of the small fixed-size array to hold most-recently remembered 
resources.
  */
-#define RESARRAY_INIT_SIZE 16
+#define RESOWNER_ARRAY_SIZE 8
Why did you choose this size for the initial array?

+extern const char *ResourceOwnerGetName(ResourceOwner owner);
This is only used in resowner.c, at one place.

@@ -140,7 +139,6 @@ jit_release_context(JitContext *context)
if (provider_successfully_loaded)
provider.release_context(context);

-   ResourceOwnerForgetJIT(context->resowner, PointerGetDatum(context));
[...]
+   ResourceOwnerForget(context->resowner, PointerGetDatum(context), 
_funcs);
This moves the JIT context release from jit.c to llvm.c.  I think
that's indeed more consistent, and correct.  It would be better to
check this one with Andres, though.

> I haven't done thorough performance testing of this, but with some quick
> testing with Kyotaro's "catcachebench" to stress-test syscache lookups, this
> performs a little better. In that test, all the activity happens in the
> small array portion, but I believe that's true for most use cases.



> Thoughts? Can anyone suggest test scenarios to verify the performance of
> this?

Playing with catcache.c is the first thing that came to my mind.
After that some micro-benchmarking with DSM or snapshots?  I am not
sure if we would notice a difference in a real-life scenario, say even
a pgbench -S with prepared queries.
--
Michael


signature.asc
Description: PGP signature


ResourceOwner refactoring

2020-11-17 Thread Heikki Linnakangas
Two recent patches that I have reviewed have reminded me that the 
ResourceOwner interface is a bit clunky. There are two issues:


1. Performance. It's fast enough in most use, but when I was testing 
Kyotaro's catcache patches [1], the Resowner functions to track catcache 
references nevertheless showed up, accounting for about 20% of the CPU 
time used by catcache lookups.


2. It's difficult for extensions to use. There is a callback mechanism 
for extensions, but it's much less convenient to use than the built-in 
functions. The pgcrypto code uses the callbacks currently, and Michael's 
patch [2] would move that support for tracking OpenSSL contexts to the 
core, which makes it a lot more convenient for pgcrypto. Wouldn't it be 
nice if extensions could have the same ergonomics as built-in code, if 
they need to track external resources?


Attached patch refactors the ResourceOwner internals to do that.

The current code in HEAD has a separate array for each "kind" of object 
that can be tracked. The number of different kinds of objects has grown 
over the years, currently there is support for tracking buffers, files, 
catcache, relcache and plancache references, tupledescs, snapshots, DSM 
segments and LLVM JIT contexts. And locks, which are a bit special.


In the patch, I'm using the same array to hold all kinds of objects, and 
carry another field with each tracked object, to tell what kind of an 
object it is. An extension can define a new object kind, by defining 
Release and PrintLeakWarning callback functions for it. The code in 
resowner.c is now much more generic, as it doesn't need to know about 
all the different object kinds anymore (with a couple of exceptions). In 
the new scheme, there is one small fixed-size array to hold a few most 
recently-added references, that is linear-searched, and older entries 
are moved to a hash table.


I haven't done thorough performance testing of this, but with some quick 
testing with Kyotaro's "catcachebench" to stress-test syscache lookups, 
this performs a little better. In that test, all the activity happens in 
the small array portion, but I believe that's true for most use cases.


Thoughts? Can anyone suggest test scenarios to verify the performance of 
this?


[1] 
https://www.postgresql.org/message-id/20201106.172958.1103727352904717607.horikyota.ntt%40gmail.com


[2] https://www.postgresql.org/message-id/20201113031429.gb1...@paquier.xyz

- Heikki
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 30c30cf3a2e..c099f04f551 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -29,9 +29,21 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
-#include "utils/resowner_private.h"
+#include "utils/resowner.h"
 #include "utils/syscache.h"
 
+/* ResourceOwner callbacks to hold tupledesc references  */
+static void ResOwnerReleaseTupleDesc(Datum res);
+static void ResOwnerPrintTupleDescLeakWarning(Datum res);
+
+static ResourceOwnerFuncs tupdesc_resowner_funcs =
+{
+	/* relcache references */
+	.name = "tupdesc reference",
+	.phase = RESOURCE_RELEASE_AFTER_LOCKS,
+	.ReleaseResource = ResOwnerReleaseTupleDesc,
+	.PrintLeakWarning = ResOwnerPrintTupleDescLeakWarning
+};
 
 /*
  * CreateTemplateTupleDesc
@@ -376,9 +388,10 @@ IncrTupleDescRefCount(TupleDesc tupdesc)
 {
 	Assert(tupdesc->tdrefcount >= 0);
 
-	ResourceOwnerEnlargeTupleDescs(CurrentResourceOwner);
+	ResourceOwnerEnlarge(CurrentResourceOwner);
 	tupdesc->tdrefcount++;
-	ResourceOwnerRememberTupleDesc(CurrentResourceOwner, tupdesc);
+	ResourceOwnerRemember(CurrentResourceOwner, PointerGetDatum(tupdesc),
+		  _resowner_funcs);
 }
 
 /*
@@ -394,7 +407,8 @@ DecrTupleDescRefCount(TupleDesc tupdesc)
 {
 	Assert(tupdesc->tdrefcount > 0);
 
-	ResourceOwnerForgetTupleDesc(CurrentResourceOwner, tupdesc);
+	ResourceOwnerForget(CurrentResourceOwner, PointerGetDatum(tupdesc),
+		_resowner_funcs);
 	if (--tupdesc->tdrefcount == 0)
 		FreeTupleDesc(tupdesc);
 }
@@ -925,3 +939,23 @@ BuildDescFromLists(List *names, List *types, List *typmods, List *collations)
 
 	return desc;
 }
+
+
+/*
+ * ResourceOwner callbacks
+ */
+static void
+ResOwnerReleaseTupleDesc(Datum res)
+{
+	DecrTupleDescRefCount((TupleDesc) DatumGetPointer(res));
+}
+
+static void
+ResOwnerPrintTupleDescLeakWarning(Datum res)
+{
+	TupleDesc tupdesc = (TupleDesc) DatumGetPointer(res);
+
+	elog(WARNING,
+		 "TupleDesc reference leak: TupleDesc %p (%u,%d) still referenced",
+		 tupdesc, tupdesc->tdtypeid, tupdesc->tdtypmod);
+}
diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c
index 5ca3f922fed..c44080b9742 100644
--- a/src/backend/jit/jit.c
+++ b/src/backend/jit/jit.c
@@ -26,7 +26,6 @@
 #include "jit/jit.h"
 #include "miscadmin.h"
 #include "utils/fmgrprotos.h"
-#include "utils/resowner_private.h"
 
 /* GUCs */
 bool		jit_enabled = true;
@@ -140,7 +139,6 @@ jit_release_context(JitContext *context)
 	if