From 0680bf499ea28db064b37fc5d820250d866e0df9 Mon Sep 17 00:00:00 2001
From: Zhijie Hou <houzj.fnst@fujitsu.com>
Date: Tue, 16 Sep 2025 11:24:20 +0800
Subject: [PATCH v2] Stablize the tests in 035_conflicts

The test used VACUUM to remove delete column, while that might not be stable due
to concurrent bgwriter or checkpoint that would lock the page where the deleted
tuple exists. Since the test has already confirmed that the replication
slot.xmin has advanced, which should be sufficient to prove that the feature
works correctly. This commit removes these unstable VACUUM tests.

Additionally, the test has a check to verify the resumption of retention for
conflict-relevant information after setting max_retention_duration to 0.
However, in some cases, the apply worker resumes retention immediately after the
inactive slot is removed from the synchronized_standby_slots configuration,
prior to setting max_retention_duration to 0. This can occur if the system
applies remote changes in less than 1ms, leading to test timeouts while waiting
for the resumption log at a later position. To ensure stability, this commit
delays the removal of synchronized_standby_slots until after
max_retention_duration has been set to 0.
---
 src/test/subscription/t/035_conflicts.pl | 31 +++++++-----------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl
index f2aee0f70df..21e992cea50 100644
--- a/src/test/subscription/t/035_conflicts.pl
+++ b/src/test/subscription/t/035_conflicts.pl
@@ -342,15 +342,6 @@ ok( $node_A->poll_query_until(
 	),
 	"the xmin value of slot 'pg_conflict_detection' is updated on Node A");
 
-# Confirm that the dead tuple can be removed now
-($cmdret, $stdout, $stderr) = $node_A->psql(
-	'postgres', qq(VACUUM (verbose) public.tab;)
-);
-
-ok( $stderr =~
-	  qr/1 removed, 1 remain, 0 are dead but not yet removable/,
-	'the deleted column is removed');
-
 ###############################################################################
 # Ensure that the deleted tuple needed to detect an update_deleted conflict is
 # accessible via a sequential table scan.
@@ -555,13 +546,6 @@ if ($injection_points_supported != 0)
 		"the xmin value of slot 'pg_conflict_detection' is updated on subscriber"
 	);
 
-	# Confirm that the dead tuple can be removed now
-	($cmdret, $stdout, $stderr) =
-	  $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;));
-
-	ok($stderr =~ qr/1 removed, 0 remain, 0 are dead but not yet removable/,
-		'the deleted column is removed');
-
 	# Get the commit timestamp for the publisher's update
 	my $pub_ts = $node_B->safe_psql('postgres',
 		"SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;");
@@ -625,12 +609,6 @@ $result = $node_A->safe_psql('postgres',
 	"SELECT subretentionactive FROM pg_subscription WHERE subname='$subname_AB';");
 is($result, qq(f), 'retention is inactive');
 
-# Drop the physical slot and reset the synchronized_standby_slots setting
-$node_B->safe_psql('postgres',
-	"SELECT * FROM pg_drop_replication_slot('blocker');");
-$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''");
-$node_B->reload;
-
 ###############################################################################
 # Check that dead tuple retention resumes when the max_retention_duration is set
 # 0.
@@ -642,6 +620,15 @@ $log_offset = -s $node_A->logfile;
 $node_A->safe_psql('postgres',
 	"ALTER SUBSCRIPTION $subname_AB SET (max_retention_duration = 0);");
 
+# Drop the physical slot and reset the synchronized_standby_slots setting. We
+# change this after setting max_retention_duration to 0, ensuring the apply
+# worker does not resume prematurely without noticing the updated
+# max_retention_duration value.
+$node_B->safe_psql('postgres',
+	"SELECT * FROM pg_drop_replication_slot('blocker');");
+$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''");
+$node_B->reload;
+
 # Confirm that the retention resumes
 $node_A->wait_for_log(
 	qr/logical replication worker for subscription "tap_sub_a_b" will resume retaining the information for detecting conflicts
-- 
2.51.0.windows.1

