From b4cac3e24c150c58d3c724e1dc67f5db5d305962 Mon Sep 17 00:00:00 2001
From: Nisha Moond <nisha.moond412@gmail.com>
Date: Wed, 29 Jan 2025 12:12:00 +0530
Subject: [PATCH v65 4/4] Add TAP test for slot invalidation based on inactive
 timeout.

This patch adds the same test, but places it under PG_TEST_EXTRA instead
of using injection points.

Since the minimum value for GUC 'idle_replication_slot_timeout' is one minute,
the test takes more than a minute to complete and is disabled by default.
Use PG_TEST_EXTRA=idle_replication_slot_timeout with "make" to run the test.
---
 .cirrus.tasks.yml                             |   2 +-
 doc/src/sgml/regress.sgml                     |  10 +
 src/test/recovery/README                      |   5 +
 src/test/recovery/meson.build                 |   1 +
 .../045_invalidate_inactive_slots_pg_extra.pl | 208 ++++++++++++++++++
 5 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 src/test/recovery/t/045_invalidate_inactive_slots_pg_extra.pl

diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index 18e944ca89..8d3c13fcee 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -20,7 +20,7 @@ env:
   MTEST_ARGS: --print-errorlogs --no-rebuild -C build
   PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests
   TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf
-  PG_TEST_EXTRA: kerberos ldap ssl libpq_encryption load_balance
+  PG_TEST_EXTRA: kerberos ldap ssl libpq_encryption load_balance idle_replication_slot_timeout
 
 
 # What files to preserve in case tests fail
diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml
index 7c474559bd..f5b1f2f353 100644
--- a/doc/src/sgml/regress.sgml
+++ b/doc/src/sgml/regress.sgml
@@ -347,6 +347,16 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption'
       </para>
      </listitem>
     </varlistentry>
+
+    <varlistentry>
+     <term><literal>idle_replication_slot_timeout</literal></term>
+     <listitem>
+      <para>
+       Runs the test <filename>src/test/recovery/t/045_invalidate_inactive_slots_pg_extra.pl</filename>.
+       Not enabled by default because it is time consuming.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
 
    Tests for features that are not supported by the current build
diff --git a/src/test/recovery/README b/src/test/recovery/README
index 896df0ad05..5c066fc41f 100644
--- a/src/test/recovery/README
+++ b/src/test/recovery/README
@@ -30,4 +30,9 @@ PG_TEST_EXTRA=wal_consistency_checking
 to the "make" command.  This is resource-intensive, so it's not done
 by default.
 
+If you want to test idle_replication_slot_timeout, add
+PG_TEST_EXTRA=idle_replication_slot_timeout
+to the "make" command. This test takes over a minutes, so it's not done
+by default.
+
 See src/test/perl/README for more info about running these tests.
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 057bcde143..0a037b4b65 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -53,6 +53,7 @@ tests += {
       't/042_low_level_backup.pl',
       't/043_no_contrecord_switch.pl',
       't/044_invalidate_inactive_slots.pl',
+      't/045_invalidate_inactive_slots_pg_extra.pl',
     ],
   },
 }
diff --git a/src/test/recovery/t/045_invalidate_inactive_slots_pg_extra.pl b/src/test/recovery/t/045_invalidate_inactive_slots_pg_extra.pl
new file mode 100644
index 0000000000..577f69d05d
--- /dev/null
+++ b/src/test/recovery/t/045_invalidate_inactive_slots_pg_extra.pl
@@ -0,0 +1,208 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Test for replication slots invalidation
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::Cluster;
+use Test::More;
+
+# The test takes over two minutes to complete. Run it only if
+# idle_replication_slot_timeout is specified in PG_TEST_EXTRA.
+if (  !$ENV{PG_TEST_EXTRA}
+	|| $ENV{PG_TEST_EXTRA} !~ /\bidle_replication_slot_timeout\b/)
+{
+	plan skip_all =>
+	  'test idle_replication_slot_timeout not enabled in PG_TEST_EXTRA';
+}
+
+# =============================================================================
+# Testcase start
+#
+# Test invalidation of streaming standby slot and logical failover slot on the
+# primary due to idle timeout. Also, test logical failover slot synced to
+# the standby from the primary doesn't get invalidated on its own, but gets the
+# invalidated state from the primary.
+
+# Initialize primary
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(allows_streaming => 'logical');
+
+# Avoid unpredictability
+$primary->append_conf(
+	'postgresql.conf', qq{
+checkpoint_timeout = 1h
+idle_replication_slot_timeout = 1
+});
+$primary->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$primary->backup($backup_name);
+
+# Create sync slot on the primary
+$primary->psql('postgres',
+	q{SELECT pg_create_logical_replication_slot('sync_slot1', 'test_decoding', false, false, true);}
+);
+
+# Create standby1's slot on the primary
+$primary->safe_psql(
+	'postgres', qq[
+    SELECT pg_create_physical_replication_slot(slot_name := 'sb_slot1', immediately_reserve := true);
+]);
+
+# Create standby2's slot on the primary
+$primary->safe_psql(
+	'postgres', qq[
+    SELECT pg_create_physical_replication_slot(slot_name := 'sb_slot2', immediately_reserve := true);
+]);
+
+# Create standby1
+my $standby1 = PostgreSQL::Test::Cluster->new('standby1');
+$standby1->init_from_backup($primary, $backup_name, has_streaming => 1);
+
+my $connstr = $primary->connstr;
+$standby1->append_conf(
+	'postgresql.conf', qq(
+hot_standby_feedback = on
+primary_slot_name = 'sb_slot1'
+idle_replication_slot_timeout = 1
+primary_conninfo = '$connstr dbname=postgres'
+));
+$standby1->start;
+
+# Wait until the standby has replayed enough data
+$primary->wait_for_catchup($standby1);
+
+# Create standby2
+my $standby2 = PostgreSQL::Test::Cluster->new('standby2');
+$standby2->init_from_backup($primary, $backup_name, has_streaming => 1);
+
+$connstr = $primary->connstr;
+$standby2->append_conf(
+	'postgresql.conf', qq(
+hot_standby_feedback = on
+primary_slot_name = 'sb_slot2'
+idle_replication_slot_timeout = 1
+primary_conninfo = '$connstr dbname=postgres'
+));
+$standby2->start;
+
+# Wait until the standby has replayed enough data
+$primary->wait_for_catchup($standby2);
+
+# Set timeout GUC on the standby to verify that the next checkpoint will not
+# invalidate synced slots.
+
+# Make the standby2's slot on the primary inactive
+$standby2->stop;
+
+# Sync the primary slots to the standby
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+# Confirm that the logical failover slot is created on the standby
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1' AND synced
+			AND NOT temporary
+			AND invalidation_reason IS NULL;}
+	),
+	't',
+	'logical slot sync_slot1 is synced to standby');
+
+# Give enough time for inactive_since to exceed the timeout
+sleep(61);
+
+# On standby, synced slots are not invalidated by the idle timeout
+# until the invalidation state is propagated from the primary.
+$standby1->safe_psql('postgres', "CHECKPOINT");
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1'
+			AND invalidation_reason IS NULL;}
+	),
+	't',
+	'check that synced slot sync_slot1 has not been invalidated on standby');
+
+my $logstart = -s $primary->logfile;
+
+# Wait for logical failover slot to become inactive on the primary. Note that
+# nobody has acquired the slot yet, so it must get invalidated due to
+# idle timeout as 61 seconds has elapsed and wait for another 10 seconds
+# to make test reliable.
+wait_for_slot_invalidation($primary, 'sync_slot1', $logstart, 10);
+
+# Re-sync the primary slots to the standby. Note that the primary slot was
+# already invalidated (above) due to idle timeout. The standby must just
+# sync the invalidated state.
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1'
+			AND invalidation_reason = 'idle_timeout';}
+	),
+	"t",
+	'check that invalidation of synced slot sync_slot1 is synced on standby');
+
+# By now standby2's slot must be invalidated due to idle timeout,
+# check for invalidation.
+wait_for_slot_invalidation($primary, 'sb_slot2', $logstart, 1);
+
+# Testcase end
+# =============================================================================
+
+# Wait for slot to first become idle and then get invalidated
+sub wait_for_slot_invalidation
+{
+	my ($node, $slot, $offset, $wait_time_secs) = @_;
+	my $node_name = $node->name;
+
+	trigger_slot_invalidation($node, $slot, $offset, $wait_time_secs);
+
+	# Check that an invalidated slot cannot be acquired
+	my ($result, $stdout, $stderr);
+	($result, $stdout, $stderr) = $node->psql(
+		'postgres', qq[
+			SELECT pg_replication_slot_advance('$slot', '0/1');
+	]);
+	ok( $stderr =~ /can no longer access replication slot "$slot"/,
+		"detected error upon trying to acquire invalidated slot $slot on node $node_name"
+	  )
+	  or die
+	  "could not detect error upon trying to acquire invalidated slot $slot on node $node_name";
+}
+
+# Trigger slot invalidation and confirm it in the server log
+sub trigger_slot_invalidation
+{
+	my ($node, $slot, $offset, $wait_time_secs) = @_;
+	my $node_name = $node->name;
+	my $invalidated = 0;
+
+	# Give enough time for inactive_since to exceed the timeout
+	sleep($wait_time_secs);
+
+	# Run a checkpoint
+	$node->safe_psql('postgres', "CHECKPOINT");
+
+	# The slot's invalidation should be logged
+	$node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/,
+		$offset);
+
+	# Check that the invalidation reason is 'idle_timeout'
+	$node->poll_query_until(
+		'postgres', qq[
+		SELECT COUNT(slot_name) = 1 FROM pg_replication_slots
+			WHERE slot_name = '$slot' AND
+			invalidation_reason = 'idle_timeout';
+	])
+	  or die
+	  "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name";
+}
+
+done_testing();
-- 
2.34.1

