Hi,

On Mon, Apr 27, 2026 at 3:01 AM Bertrand Drouvot
<[email protected]> wrote:
>
> I've 2 comments:

Thank you for reviewing!

> 1/ What about having just one curr_idx increment? (right after list_nth(),
> before the skip checks). I think that would be less error-prone if new skip
> conditions are added later.

Done.

> 2/ I think that the test is racy and could also succeed even without the 
> fixes.
> Indeed, I think that the drops can complete before any concurrent polling
> happens (I can see it by adding a pg_sleep(2) before the first poll in the DO
> block). What about using an injection point to ensure a relation is removed
> during the polling?

I initially considered an injection point but chose polling since the
TAP test reproduced the bug consistently with hundreds of tables on my
dev system. I've now added an injection point for predictability.

I adjusted the commit message a bit. Please find the attached v3 patch
for further review. Thank you!

--
Bharath Rupireddy
Amazon Web Services: https://aws.amazon.com
From 9424990e60dec6a41146ef7ad2b1fb2503a617c5 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <[email protected]>
Date: Mon, 27 Apr 2026 19:17:11 +0000
Subject: [PATCH v3] Fix pg_get_publication_tables race with concurrent DROP
 TABLE

pg_get_publication_tables() collects table OIDs without locks on
the first call, then opens each table on subsequent calls. If a
table is dropped in between, the function errors with
"could not open relation with OID". This is common in environments
where many tables are being created and dropped while
pg_publication_tables view is queried, such as with
FOR ALL TABLES publications.

Fix by skipping concurrently dropped tables instead of erroring
out. Tables created after the list is built are simply not present
in the result set, which is expected point-in-time behavior.

Also add an injection point to test this issue predictably.

Author: Bharath Rupireddy <[email protected]>
Reviewed-by: Bertrand Drouvot <[email protected]>
Reviewed-by: shveta malik <[email protected]>
Discussion: https://www.postgresql.org/message-id/CALj2ACVYYooWH-5tJ6cPKkU%2BmutVxwb_z4S%2BqAi-zdrFqxXE2Q%40mail.gmail.com
Backpatch-through: 14
---
 src/backend/catalog/pg_publication.c | 42 +++++++++++++++----
 src/test/subscription/t/100_bugs.pl  | 63 ++++++++++++++++++++++++++++
 src/tools/pgindent/typedefs.list     |  1 +
 3 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index a43d385c605..22e6bb54afe 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -38,6 +38,7 @@
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
+#include "utils/injection_point.h"
 #include "utils/syscache.h"
 
 /* Records association between publication and published table */
@@ -48,6 +49,13 @@ typedef struct
 								 * table. */
 } published_rel;
 
+/* State for pg_get_publication_tables SRF */
+typedef struct
+{
+	List	   *table_infos;	/* list of published_rel */
+	int			curr_idx;		/* current index into table_infos */
+} publication_tables_state;
+
 /*
  * Check if relation can be in given publication and throws appropriate
  * error if not.
@@ -1408,13 +1416,14 @@ pg_get_publication_tables(FunctionCallInfo fcinfo, ArrayType *pubnames,
 {
 #define NUM_PUBLICATION_TABLES_ELEM	4
 	FuncCallContext *funcctx;
-	List	   *table_infos = NIL;
+	publication_tables_state *ptstate;
 
 	/* stuff done only on the first call of the function */
 	if (SRF_IS_FIRSTCALL())
 	{
 		TupleDesc	tupdesc;
 		MemoryContext oldcontext;
+		List	   *table_infos = NIL;
 		Datum	   *elems;
 		int			nelems,
 					i;
@@ -1537,26 +1546,39 @@ pg_get_publication_tables(FunctionCallInfo fcinfo, ArrayType *pubnames,
 
 		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
-		funcctx->user_fctx = table_infos;
+
+		/* Store the state to be used across SRF calls */
+		ptstate = palloc_object(publication_tables_state);
+		ptstate->table_infos = table_infos;
+		ptstate->curr_idx = 0;
+		funcctx->user_fctx = ptstate;
 
 		MemoryContextSwitchTo(oldcontext);
+
+		INJECTION_POINT("pg-get-publication-tables-build-list", NULL);
 	}
 
 	/* stuff done on every call of the function */
 	funcctx = SRF_PERCALL_SETUP();
-	table_infos = (List *) funcctx->user_fctx;
+	ptstate = (publication_tables_state *) funcctx->user_fctx;
 
-	if (funcctx->call_cntr < list_length(table_infos))
+	while (ptstate->curr_idx < list_length(ptstate->table_infos))
 	{
 		HeapTuple	pubtuple = NULL;
 		HeapTuple	rettuple;
 		Publication *pub;
-		published_rel *table_info = (published_rel *) list_nth(table_infos, funcctx->call_cntr);
+		published_rel *table_info = (published_rel *) list_nth(ptstate->table_infos, ptstate->curr_idx);
 		Oid			relid = table_info->relid;
 		Oid			schemaid = get_rel_namespace(relid);
 		Datum		values[NUM_PUBLICATION_TABLES_ELEM] = {0};
 		bool		nulls[NUM_PUBLICATION_TABLES_ELEM] = {0};
 
+		ptstate->curr_idx++;
+
+		/* Skip if the relation has been concurrently dropped. */
+		if (!OidIsValid(schemaid))
+			continue;
+
 		/*
 		 * Form tuple with appropriate data.
 		 */
@@ -1599,12 +1621,18 @@ pg_get_publication_tables(FunctionCallInfo fcinfo, ArrayType *pubnames,
 		/* Show all columns when the column list is not specified. */
 		if (nulls[2])
 		{
-			Relation	rel = table_open(relid, AccessShareLock);
+			Relation	rel = try_table_open(relid, AccessShareLock);
 			int			nattnums = 0;
 			int16	   *attnums;
-			TupleDesc	desc = RelationGetDescr(rel);
+			TupleDesc	desc;
 			int			i;
 
+			/* Skip if the relation has been concurrently dropped. */
+			if (rel == NULL)
+				continue;
+
+			desc = RelationGetDescr(rel);
+
 			attnums = palloc_array(int16, desc->natts);
 
 			for (i = 0; i < desc->natts; i++)
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index a23035e23fe..80e1ef12e66 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -23,6 +23,9 @@ my $node_publisher = PostgreSQL::Test::Cluster->new('publisher');
 $node_publisher->init(allows_streaming => 'logical');
 $node_publisher->start;
 
+my $injection_points_supported =
+  $node_publisher->check_extension('injection_points');
+
 my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber');
 $node_subscriber->init;
 $node_subscriber->start;
@@ -605,4 +608,64 @@ $node_publisher->safe_psql('postgres', "DROP DATABASE regress_db");
 
 $node_publisher->stop('fast');
 
+# pg_get_publication_tables race with concurrent DROP TABLE.
+if ($injection_points_supported != 0)
+{
+	$node_publisher->append_conf('postgresql.conf',
+		"shared_preload_libraries = 'injection_points'");
+	$node_publisher->start();
+
+	my $pub_db = 'concurrent_drop_table_db';
+
+	$node_publisher->safe_psql('postgres', "CREATE DATABASE $pub_db");
+
+	$node_publisher->safe_psql(
+		$pub_db, qq{
+		CREATE EXTENSION injection_points;
+		CREATE PUBLICATION pub_all FOR ALL TABLES;
+		CREATE TABLE t_dropme (id int, data text);
+	});
+
+	# Pause pg_get_publication_tables after building the table OID list.
+	$node_publisher->safe_psql($pub_db,
+		"SELECT injection_points_attach('pg-get-publication-tables-build-list', 'wait');"
+	);
+
+	# Background session queries pg_publication_tables; it will block at
+	# the injection point.
+	my $bgpsql =
+	  $node_publisher->background_psql($pub_db, on_error_stop => 0);
+	$bgpsql->query_until(
+		qr/querying_publication_tables/,
+		qq{\\echo querying_publication_tables
+SELECT count(*) FROM pg_publication_tables WHERE pubname = 'pub_all';
+});
+
+	$node_publisher->wait_for_event('client backend',
+		'pg-get-publication-tables-build-list');
+
+	# Drop the table while the SRF is paused.
+	$node_publisher->safe_psql($pub_db, "DROP TABLE t_dropme");
+
+	# Resume and detach.
+	$node_publisher->safe_psql($pub_db,
+		"SELECT injection_points_wakeup('pg-get-publication-tables-build-list');
+		 SELECT injection_points_detach('pg-get-publication-tables-build-list');"
+	);
+
+	# Verify the background session completed without error.
+	my (undef, $bg_err) = $bgpsql->query("SELECT 1");
+	$bgpsql->quit;
+
+	is($bg_err, 0,
+		"pg_publication_tables handles concurrently dropped tables");
+
+	# Cleanup
+	$node_publisher->safe_psql($pub_db, "DROP PUBLICATION pub_all");
+	$node_publisher->safe_psql($pub_db, "DROP EXTENSION injection_points");
+	$node_publisher->safe_psql('postgres', "DROP DATABASE $pub_db");
+
+	$node_publisher->stop('fast');
+}
+
 done_testing();
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9f1dd55213d..f7ebf0339e8 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4175,6 +4175,7 @@ pthread_mutex_t
 pthread_once_t
 pthread_t
 ptrdiff_t
+publication_tables_state
 published_rel
 pull_var_clause_context
 pull_varattnos_context
-- 
2.47.3

Reply via email to