From 22760046a3b5dcc1e6123fa413d5ce86190724d0 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Mon, 3 Mar 2025 12:11:42 +0530
Subject: [PATCH v7] Restrict copying of invalidated replication slots.

Previously, invalidated logical and physical replication slots could
be copied using the pg_copy_logical_replication_slot and
pg_copy_physical_replication_slot functions. Replication slots
that were invalidated for reasons other than WAL removal retained their
restart_lsn. This meant that a new slot copied from an invalidated
slot could have a restart_lsn pointing to a WAL segment that might
have already been removed.

This commit restricts the copying of invalidated replication slots.

Backpatch to v16, where slots could retain their restart_lsn when
invalidated for reasons other than WAL removal.

Author: Shlok Kyal <shlok.kyal.oss@gmail.com>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Reviewed-by: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: Peter Smith <smithpb2250@gmail.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Discussion: https://postgr.es/m/CANhcyEU65aH0VYnLiu%3DOhNNxhnhNhwcXBeT-jvRe1OiJTo_Ayg%40mail.gmail.com
Backpatch-through: 16
---
 doc/src/sgml/func.sgml                        |  6 ++--
 src/backend/replication/slotfuncs.c           | 31 ++++++++++++++++++-
 .../t/035_standby_logical_decoding.pl         |  9 ++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 51dd8ad6571..cda85e811ed 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -29486,7 +29486,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
         The copied physical slot starts to reserve WAL from the same <acronym>LSN</acronym> as the
         source slot.
         <parameter>temporary</parameter> is optional. If <parameter>temporary</parameter>
-        is omitted, the same value as the source slot is used.
+        is omitted, the same value as the source slot is used. Copy of an
+        invalidated slot is not allowed.
        </para></entry>
       </row>
 
@@ -29511,7 +29512,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
         The <literal>failover</literal> option of the source logical slot
         is not copied and is set to <literal>false</literal> by default. This
         is to avoid the risk of being unable to continue logical replication
-        after failover to standby where the slot is being synchronized.
+        after failover to standby where the slot is being synchronized. Copy of
+        an invalidated slot is not allowed.
        </para></entry>
       </row>
 
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 146eef5871e..999c14c920d 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -678,6 +678,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("cannot copy a replication slot that doesn't reserve WAL")));
 
+	/* Cannot copy an invalidated replication slot */
+	if (first_slot_contents.data.invalidated != RS_INVAL_NONE)
+		ereport(ERROR,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("cannot copy invalidated replication slot \"%s\"",
+					   NameStr(*src_name)));
+
 	/* Overwrite params from optional arguments */
 	if (PG_NARGS() >= 3)
 		temporary = PG_GETARG_BOOL(2);
@@ -687,7 +694,22 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
 		plugin = NameStr(*(PG_GETARG_NAME(3)));
 	}
 
-	/* Create new slot and acquire it */
+	/*
+	 * Create new slot and acquire it
+	 *
+	 * After slot is created there can be a race condition with function
+	 * InvalidateObsoleteReplicationSlots, when the copied slot appear before
+	 * source slot in array ReplicationSlotCtl->replication_slots.
+	 *
+	 * If just after slot creation, the source slot is invalidated due to
+	 * some operation. The execution of InvalidateObsoleteReplicationSlots will
+	 * wait for this function to release the copied slot. Then the source slot
+	 * will be invalidated. So, the copying of source slot is successful in this
+	 * case.
+	 *
+	 * The wal_status of copied slot is set to lost immediately and hence will
+	 * not have any harm.
+	 */
 	if (logical_slot)
 	{
 		/*
@@ -779,6 +801,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
 							NameStr(*src_name)),
 					 errhint("Retry when the source replication slot's confirmed_flush_lsn is valid.")));
 
+		/* Check if source slot became invalidated during the copy operation */
+		if (second_slot_contents.data.invalidated != RS_INVAL_NONE)
+			ereport(ERROR,
+					errmsg("cannot copy replication slot \"%s\"",
+						   NameStr(*src_name)),
+					errdetail("The source replication slot was invalidated during the copy operation."));
+
 		/* Install copied values again */
 		SpinLockAcquire(&MyReplicationSlot->mutex);
 		MyReplicationSlot->effective_xmin = copy_effective_xmin;
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 8903177d883..44046e6a12a 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -591,6 +591,15 @@ $handle =
 check_pg_recvlogical_stderr($handle,
 	"can no longer access replication slot \"vacuum_full_activeslot\"");
 
+# Attempt to copy an invalidated logical replication slot
+($result, $stdout, $stderr) = $node_standby->psql(
+	'postgres',
+	qq[select pg_copy_logical_replication_slot('vacuum_full_inactiveslot', 'vacuum_full_inactiveslot_copy');],
+	replication => 'database');
+ok( $stderr =~
+	  /ERROR:  cannot copy invalidated replication slot "vacuum_full_inactiveslot"/,
+	"invalidated slot cannot be copied");
+
 # Turn hot_standby_feedback back on
 change_hot_standby_feedback_and_wait_for_xmins(1, 1);
 
-- 
2.34.1

