Hi, On Tue, Apr 28, 2026 at 8:28 PM shveta malik <[email protected]> wrote: > > On Wed, Apr 29, 2026 at 7:12 AM Ajin Cherian <[email protected]> wrote: > > > One small comment. The includes need to be in alphabetical order. > > injection_point.h should come after fmgroids.h > > > > +#include "utils/injection_point.h" > > #include "utils/syscache.h" > > Also there is a trailing whitespace issue while applying the patch. > Other than these, the patch looks good.
Fixed. Please find the attached v5 patch. The fix is needed only for PG16 and later, not PG15 or PG14. The bug was introduced by b7ae03953690 [1] in PG16, which added a table_open() call in pg_get_publication_tables(). PG15 and earlier only use get_rel_namespace() and syscache lookups, both of which gracefully handle dropped relations (returning InvalidOid/false rather than erroring). I verified the bug and the fix on all affected branches. Please find the attached version-specific patches for backpatching. Thank you! [1] b7ae03953690 - Ignore dropped and generated columns from the column list -- Bharath Rupireddy Amazon Web Services: https://aws.amazon.com
From 843e1aa3afb9a09bed1ffe2aaa9953fd6c2f88d1 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <[email protected]> Date: Wed, 29 Apr 2026 05:25:56 +0000 Subject: [PATCH v5] PG16 - Fix pg_get_publication_tables race with concurrent DROP TABLE pg_get_publication_tables() collects table OIDs without locks on the first call, then opens each table on subsequent calls. If a table is dropped in between, the function errors with "could not open relation with OID". This is common in environments where many tables are being created and dropped while pg_publication_tables view is queried, such as with FOR ALL TABLES publications. The bug was introduced by b7ae03953690 in PG16. Fix by skipping concurrently dropped tables instead of erroring out. Tables created after the list is built are simply not present in the result set, which is expected point-in-time behavior. Author: Bharath Rupireddy <[email protected]> Reviewed-by: Bertrand Drouvot <[email protected]> Reviewed-by: shveta malik <[email protected]> Reviewed-by: Ajin Cherian <[email protected]> Discussion: https://www.postgresql.org/message-id/CALj2ACVYYooWH-5tJ6cPKkU%2BmutVxwb_z4S%2BqAi-zdrFqxXE2Q%40mail.gmail.com Backpatch-through: 16 --- src/backend/catalog/pg_publication.c | 38 +++++++++++++++++++++++----- src/tools/pgindent/typedefs.list | 1 + 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index c488b6370b6..e773308539b 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -56,6 +56,13 @@ typedef struct static void publication_translate_columns(Relation targetrel, List *columns, int *natts, AttrNumber **attrs); +/* State for pg_get_publication_tables SRF */ +typedef struct +{ + List *table_infos; /* list of published_rel */ + int curr_idx; /* current index into table_infos */ +} publication_tables_state; + /* * Check if relation can be in given publication and throws appropriate * error if not. @@ -1058,7 +1065,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) { #define NUM_PUBLICATION_TABLES_ELEM 4 FuncCallContext *funcctx; - List *table_infos = NIL; + publication_tables_state *ptstate; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) @@ -1066,6 +1073,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) TupleDesc tupdesc; MemoryContext oldcontext; ArrayType *arr; + List *table_infos = NIL; Datum *elems; int nelems, i; @@ -1165,24 +1173,36 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = (void *) table_infos; + /* Store the state to be used across SRF calls. */ + ptstate = palloc_object(publication_tables_state); + ptstate->table_infos = table_infos; + ptstate->curr_idx = 0; + funcctx->user_fctx = ptstate; + MemoryContextSwitchTo(oldcontext); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); - table_infos = (List *) funcctx->user_fctx; + ptstate = (publication_tables_state *) funcctx->user_fctx; - if (funcctx->call_cntr < list_length(table_infos)) + while (ptstate->curr_idx < list_length(ptstate->table_infos)) { HeapTuple pubtuple = NULL; HeapTuple rettuple; Publication *pub; - published_rel *table_info = (published_rel *) list_nth(table_infos, funcctx->call_cntr); + published_rel *table_info = (published_rel *) list_nth(ptstate->table_infos, ptstate->curr_idx); Oid relid = table_info->relid; Oid schemaid = get_rel_namespace(relid); Datum values[NUM_PUBLICATION_TABLES_ELEM] = {0}; bool nulls[NUM_PUBLICATION_TABLES_ELEM] = {0}; + ptstate->curr_idx++; + + /* Skip if the relation has been concurrently dropped. */ + if (!OidIsValid(schemaid)) + continue; + /* * Form tuple with appropriate data. */ @@ -1225,12 +1245,18 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) /* Show all columns when the column list is not specified. */ if (nulls[2]) { - Relation rel = table_open(relid, AccessShareLock); + Relation rel = try_table_open(relid, AccessShareLock); int nattnums = 0; int16 *attnums; - TupleDesc desc = RelationGetDescr(rel); + TupleDesc desc; int i; + /* Skip if the relation has been concurrently dropped. */ + if (rel == NULL) + continue; + + desc = RelationGetDescr(rel); + attnums = (int16 *) palloc(desc->natts * sizeof(int16)); for (i = 0; i < desc->natts; i++) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index db43f32bcdd..8a93cc8dfc4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3652,6 +3652,7 @@ pthread_mutex_t pthread_once_t pthread_t ptrdiff_t +publication_tables_state published_rel pull_var_clause_context pull_varattnos_context -- 2.47.3
v5-0001-Fix-pg_get_publication_tables-race-with-concurren.patch
Description: Binary data
From 4960baa358c41c908742437d4b087aca7c46e1fd Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <[email protected]> Date: Wed, 29 Apr 2026 04:41:09 +0000 Subject: [PATCH v5] PG18 - Fix pg_get_publication_tables race with concurrent DROP TABLE pg_get_publication_tables() collects table OIDs without locks on the first call, then opens each table on subsequent calls. If a table is dropped in between, the function errors with "could not open relation with OID". This is common in environments where many tables are being created and dropped while pg_publication_tables view is queried, such as with FOR ALL TABLES publications. The bug was introduced by b7ae03953690 in PG16. Fix by skipping concurrently dropped tables instead of erroring out. Tables created after the list is built are simply not present in the result set, which is expected point-in-time behavior. Also add an injection point to test this issue predictably. Author: Bharath Rupireddy <[email protected]> Reviewed-by: Bertrand Drouvot <[email protected]> Reviewed-by: shveta malik <[email protected]> Reviewed-by: Ajin Cherian <[email protected]> Discussion: https://www.postgresql.org/message-id/CALj2ACVYYooWH-5tJ6cPKkU%2BmutVxwb_z4S%2BqAi-zdrFqxXE2Q%40mail.gmail.com Backpatch-through: 16 --- src/backend/catalog/pg_publication.c | 42 ++++++++++++++--- src/test/subscription/t/100_bugs.pl | 68 ++++++++++++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 3 files changed, 104 insertions(+), 7 deletions(-) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index d6f94db5d99..15a361bc346 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -36,6 +36,7 @@ #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" +#include "utils/injection_point.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -48,6 +49,13 @@ typedef struct * table. */ } published_rel; +/* State for pg_get_publication_tables SRF */ +typedef struct +{ + List *table_infos; /* list of published_rel */ + int curr_idx; /* current index into table_infos */ +} publication_tables_state; + /* * Check if relation can be in given publication and throws appropriate * error if not. @@ -1118,7 +1126,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) { #define NUM_PUBLICATION_TABLES_ELEM 4 FuncCallContext *funcctx; - List *table_infos = NIL; + publication_tables_state *ptstate; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) @@ -1126,6 +1134,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) TupleDesc tupdesc; MemoryContext oldcontext; ArrayType *arr; + List *table_infos = NIL; Datum *elems; int nelems, i; @@ -1222,26 +1231,39 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) PG_NODE_TREEOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); - funcctx->user_fctx = table_infos; + + /* Store the state to be used across SRF calls. */ + ptstate = palloc_object(publication_tables_state); + ptstate->table_infos = table_infos; + ptstate->curr_idx = 0; + funcctx->user_fctx = ptstate; MemoryContextSwitchTo(oldcontext); + + INJECTION_POINT("pg-get-publication-tables-after-list-built", NULL); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); - table_infos = (List *) funcctx->user_fctx; + ptstate = (publication_tables_state *) funcctx->user_fctx; - if (funcctx->call_cntr < list_length(table_infos)) + while (ptstate->curr_idx < list_length(ptstate->table_infos)) { HeapTuple pubtuple = NULL; HeapTuple rettuple; Publication *pub; - published_rel *table_info = (published_rel *) list_nth(table_infos, funcctx->call_cntr); + published_rel *table_info = (published_rel *) list_nth(ptstate->table_infos, ptstate->curr_idx); Oid relid = table_info->relid; Oid schemaid = get_rel_namespace(relid); Datum values[NUM_PUBLICATION_TABLES_ELEM] = {0}; bool nulls[NUM_PUBLICATION_TABLES_ELEM] = {0}; + ptstate->curr_idx++; + + /* Skip if the relation has been concurrently dropped. */ + if (!OidIsValid(schemaid)) + continue; + /* * Form tuple with appropriate data. */ @@ -1284,12 +1306,18 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) /* Show all columns when the column list is not specified. */ if (nulls[2]) { - Relation rel = table_open(relid, AccessShareLock); + Relation rel = try_table_open(relid, AccessShareLock); int nattnums = 0; int16 *attnums; - TupleDesc desc = RelationGetDescr(rel); + TupleDesc desc; int i; + /* Skip if the relation has been concurrently dropped. */ + if (rel == NULL) + continue; + + desc = RelationGetDescr(rel); + attnums = (int16 *) palloc(desc->natts * sizeof(int16)); for (i = 0; i < desc->natts; i++) diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index 50223054918..a9238f4a616 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -23,6 +23,9 @@ my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); $node_publisher->init(allows_streaming => 'logical'); $node_publisher->start; +my $injection_points_supported = + $node_publisher->check_extension('injection_points'); + my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); $node_subscriber->init; $node_subscriber->start; @@ -605,4 +608,69 @@ $node_publisher->safe_psql('postgres', "DROP DATABASE regress_db"); $node_publisher->stop('fast'); +# BUG: pg_get_publication_tables() race with concurrent DROP TABLE. +# +# pg_get_publication_tables() collects table OIDs without locks on the first +# call, then opens each table on subsequent calls. If a table is dropped in +# between, the function errors with "could not open relation with OID". +if ($injection_points_supported != 0) +{ + $node_publisher->append_conf('postgresql.conf', + "shared_preload_libraries = 'injection_points'"); + $node_publisher->start(); + + my $pub_db = 'concurrent_drop_table_db'; + + $node_publisher->safe_psql('postgres', "CREATE DATABASE $pub_db"); + + $node_publisher->safe_psql( + $pub_db, qq{ + CREATE EXTENSION injection_points; + CREATE PUBLICATION pub_all FOR ALL TABLES; + CREATE TABLE t_dropme (id int, data text); + }); + + # Pause pg_get_publication_tables() after building the table OID list. + $node_publisher->safe_psql($pub_db, + "SELECT injection_points_attach('pg-get-publication-tables-after-list-built', 'wait');" + ); + + # Background session queries pg_publication_tables view; it will block at + # the injection point. + my $bgpsql = + $node_publisher->background_psql($pub_db, on_error_stop => 0); + $bgpsql->query_until( + qr/querying_publication_tables/, + qq{\\echo querying_publication_tables +SELECT count(*) FROM pg_publication_tables WHERE pubname = 'pub_all'; +}); + + $node_publisher->wait_for_event('client backend', + 'pg-get-publication-tables-after-list-built'); + + # Drop the table while pg_get_publication_tables() is paused with injection + # point. + $node_publisher->safe_psql($pub_db, "DROP TABLE t_dropme"); + + # Resume and detach. + $node_publisher->safe_psql($pub_db, + "SELECT injection_points_wakeup('pg-get-publication-tables-after-list-built'); + SELECT injection_points_detach('pg-get-publication-tables-after-list-built');" + ); + + # Verify the background session completed without error. + my (undef, $bg_err) = $bgpsql->query("SELECT 1"); + $bgpsql->quit; + + is($bg_err, 0, + "pg_publication_tables handles concurrently dropped tables"); + + # Cleanup. + $node_publisher->safe_psql($pub_db, "DROP PUBLICATION pub_all"); + $node_publisher->safe_psql($pub_db, "DROP EXTENSION injection_points"); + $node_publisher->safe_psql('postgres', "DROP DATABASE $pub_db"); + + $node_publisher->stop('fast'); +} + done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 38620a595c1..a15fc465ced 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3969,6 +3969,7 @@ pthread_mutex_t pthread_once_t pthread_t ptrdiff_t +publication_tables_state published_rel pull_var_clause_context pull_varattnos_context -- 2.47.3
From bc8f759dcbb26ece509817a6017024f41258f2e8 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <[email protected]> Date: Wed, 29 Apr 2026 04:57:08 +0000 Subject: [PATCH v5] PG17 - Fix pg_get_publication_tables race with concurrent DROP TABLE pg_get_publication_tables() collects table OIDs without locks on the first call, then opens each table on subsequent calls. If a table is dropped in between, the function errors with "could not open relation with OID". This is common in environments where many tables are being created and dropped while pg_publication_tables view is queried, such as with FOR ALL TABLES publications. The bug was introduced by b7ae03953690 in PG16. Fix by skipping concurrently dropped tables instead of erroring out. Tables created after the list is built are simply not present in the result set, which is expected point-in-time behavior. Also add an injection point to test this issue predictably. Author: Bharath Rupireddy <[email protected]> Reviewed-by: Bertrand Drouvot <[email protected]> Reviewed-by: shveta malik <[email protected]> Reviewed-by: Ajin Cherian <[email protected]> Discussion: https://www.postgresql.org/message-id/CALj2ACVYYooWH-5tJ6cPKkU%2BmutVxwb_z4S%2BqAi-zdrFqxXE2Q%40mail.gmail.com Backpatch-through: 16 --- src/backend/catalog/pg_publication.c | 41 +++++++++++++--- src/test/subscription/t/100_bugs.pl | 72 ++++++++++++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 3 files changed, 108 insertions(+), 6 deletions(-) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 0602398a545..b4dc01a9e4a 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -36,6 +36,7 @@ #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" +#include "utils/injection_point.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -51,6 +52,13 @@ typedef struct static void publication_translate_columns(Relation targetrel, List *columns, int *natts, AttrNumber **attrs); +/* State for pg_get_publication_tables SRF */ +typedef struct +{ + List *table_infos; /* list of published_rel */ + int curr_idx; /* current index into table_infos */ +} publication_tables_state; + /* * Check if relation can be in given publication and throws appropriate * error if not. @@ -1053,7 +1061,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) { #define NUM_PUBLICATION_TABLES_ELEM 4 FuncCallContext *funcctx; - List *table_infos = NIL; + publication_tables_state *ptstate; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) @@ -1061,6 +1069,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) TupleDesc tupdesc; MemoryContext oldcontext; ArrayType *arr; + List *table_infos = NIL; Datum *elems; int nelems, i; @@ -1160,24 +1169,38 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = (void *) table_infos; + /* Store the state to be used across SRF calls. */ + ptstate = palloc_object(publication_tables_state); + ptstate->table_infos = table_infos; + ptstate->curr_idx = 0; + funcctx->user_fctx = ptstate; + MemoryContextSwitchTo(oldcontext); + + INJECTION_POINT("pg-get-publication-tables-after-list-built"); } /* stuff done on every call of the function */ funcctx = SRF_PERCALL_SETUP(); - table_infos = (List *) funcctx->user_fctx; + ptstate = (publication_tables_state *) funcctx->user_fctx; - if (funcctx->call_cntr < list_length(table_infos)) + while (ptstate->curr_idx < list_length(ptstate->table_infos)) { HeapTuple pubtuple = NULL; HeapTuple rettuple; Publication *pub; - published_rel *table_info = (published_rel *) list_nth(table_infos, funcctx->call_cntr); + published_rel *table_info = (published_rel *) list_nth(ptstate->table_infos, ptstate->curr_idx); Oid relid = table_info->relid; Oid schemaid = get_rel_namespace(relid); Datum values[NUM_PUBLICATION_TABLES_ELEM] = {0}; bool nulls[NUM_PUBLICATION_TABLES_ELEM] = {0}; + ptstate->curr_idx++; + + /* Skip if the relation has been concurrently dropped. */ + if (!OidIsValid(schemaid)) + continue; + /* * Form tuple with appropriate data. */ @@ -1220,12 +1243,18 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) /* Show all columns when the column list is not specified. */ if (nulls[2]) { - Relation rel = table_open(relid, AccessShareLock); + Relation rel = try_table_open(relid, AccessShareLock); int nattnums = 0; int16 *attnums; - TupleDesc desc = RelationGetDescr(rel); + TupleDesc desc; int i; + /* Skip if the relation has been concurrently dropped. */ + if (rel == NULL) + continue; + + desc = RelationGetDescr(rel); + attnums = (int16 *) palloc(desc->natts * sizeof(int16)); for (i = 0; i < desc->natts; i++) diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index 17accd11d93..18ffdf4944d 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -23,6 +23,13 @@ my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); $node_publisher->init(allows_streaming => 'logical'); $node_publisher->start; +my $injection_points_supported = 0; +if ($node_publisher->safe_psql('postgres', + "SELECT count(*) > 0 FROM pg_available_extensions WHERE name = 'injection_points';") eq 't') +{ + $injection_points_supported = 1; +} + my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); $node_subscriber->init; $node_subscriber->start; @@ -597,4 +604,69 @@ $node_publisher->safe_psql('postgres', "DROP DATABASE regress_db"); $node_publisher->stop('fast'); +# BUG: pg_get_publication_tables() race with concurrent DROP TABLE. +# +# pg_get_publication_tables() collects table OIDs without locks on the first +# call, then opens each table on subsequent calls. If a table is dropped in +# between, the function errors with "could not open relation with OID". +if ($injection_points_supported != 0) +{ + $node_publisher->append_conf('postgresql.conf', + "shared_preload_libraries = 'injection_points'"); + $node_publisher->start(); + + my $pub_db = 'concurrent_drop_table_db'; + + $node_publisher->safe_psql('postgres', "CREATE DATABASE $pub_db"); + + $node_publisher->safe_psql( + $pub_db, qq{ + CREATE EXTENSION injection_points; + CREATE PUBLICATION pub_all FOR ALL TABLES; + CREATE TABLE t_dropme (id int, data text); + }); + + # Pause pg_get_publication_tables() after building the table OID list. + $node_publisher->safe_psql($pub_db, + "SELECT injection_points_attach('pg-get-publication-tables-after-list-built', 'wait');" + ); + + # Background session queries pg_publication_tables view; it will block at + # the injection point. + my $bgpsql = + $node_publisher->background_psql($pub_db, on_error_stop => 0); + $bgpsql->query_until( + qr/querying_publication_tables/, + qq{\\echo querying_publication_tables +SELECT count(*) FROM pg_publication_tables WHERE pubname = 'pub_all'; +}); + + $node_publisher->wait_for_event('client backend', + 'pg-get-publication-tables-after-list-built'); + + # Drop the table while pg_get_publication_tables() is paused with injection + # point. + $node_publisher->safe_psql($pub_db, "DROP TABLE t_dropme"); + + # Resume and detach. + $node_publisher->safe_psql($pub_db, + "SELECT injection_points_wakeup('pg-get-publication-tables-after-list-built'); + SELECT injection_points_detach('pg-get-publication-tables-after-list-built');" + ); + + # Verify the background session completed without error. + my (undef, $bg_err) = $bgpsql->query("SELECT 1"); + $bgpsql->quit; + + is($bg_err, 0, + "pg_publication_tables handles concurrently dropped tables"); + + # Cleanup. + $node_publisher->safe_psql($pub_db, "DROP PUBLICATION pub_all"); + $node_publisher->safe_psql($pub_db, "DROP EXTENSION injection_points"); + $node_publisher->safe_psql('postgres', "DROP DATABASE $pub_db"); + + $node_publisher->stop('fast'); +} + done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 84d88f895d6..056596b8c62 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3804,6 +3804,7 @@ pthread_mutex_t pthread_once_t pthread_t ptrdiff_t +publication_tables_state published_rel pull_var_clause_context pull_varattnos_context -- 2.47.3
