From ce0425e698b9da5201a1bbd6565b1087c065c2c0 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sun, 5 Apr 2026 14:46:47 +0300
Subject: [PATCH v3 2/3] Use WAIT FOR LSN in
 PostgreSQL::Test::Cluster::wait_for_catchup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the standby is passed as a PostgreSQL::Test::Cluster instance,
use the WAIT FOR LSN command on the standby server to implement
wait_for_catchup() for replay, write, and flush modes.  This is more
efficient than polling pg_stat_replication on the upstream, as the
WAIT FOR LSN command uses a latch-based wakeup mechanism.

The optimization applies when:
- The standby is passed as a Cluster object (not just a name string)
- The mode is 'replay', 'write', or 'flush' (not 'sent')

Rather than pre-checking pg_is_in_recovery() on the standby (which
would add an extra round-trip on every call), we issue WAIT FOR LSN
directly and handle the 'not_in_recovery' result as a signal to fall
back to polling.

For 'sent' mode, when the standby is passed as a string (e.g., a
subscription name for logical replication), when the standby has been
promoted, or when WAIT FOR LSN is interrupted by a recovery conflict,
the function falls back to the original polling-based approach using
pg_stat_replication on the upstream.  The recovery conflict fallback
is necessary because some conflicts are unavoidable — for example,
ResolveRecoveryConflictWithTablespace() kills all backends
unconditionally, regardless of what they are doing.

The recovery conflict detection matches the English error message
"conflict with recovery", which is reliable because the test suite
runs with LC_MESSAGES=C.

Discussion: https://postgr.es/m/CABPTF7UiArgW-sXj9CNwRzUhYOQrevLzkYcgBydmX5oDes1sjg%40mail.gmail.com
Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Alvaro Herrera <alvherre@kurilemu.de>
---
 src/test/perl/PostgreSQL/Test/Cluster.pm | 94 +++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 54e6b646e8f..f51b8a65e58 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -3355,6 +3355,15 @@ If you pass an explicit value of target_lsn, it should almost always be
 the primary's write LSN; so this parameter is seldom needed except when
 querying some intermediate replication node rather than the primary.
 
+When the standby is passed as a PostgreSQL::Test::Cluster instance, this
+function attempts to use the WAIT FOR LSN command on the standby for modes
+replay, write, and flush.  This is more efficient than polling
+pg_stat_replication on the upstream, as WAIT FOR LSN uses a latch-based
+wakeup mechanism.  If the standby has been promoted, or if the session is
+interrupted by a recovery conflict, it falls back to polling.  For 'sent'
+mode, or when the standby is passed as a string (e.g., a subscription
+name), polling is used directly.
+
 If there is no active replication connection from this peer, waits until
 poll_query_until timeout.
 
@@ -3374,10 +3383,13 @@ sub wait_for_catchup
 	  . join(', ', keys(%valid_modes))
 	  unless exists($valid_modes{$mode});
 
-	# Allow passing of a PostgreSQL::Test::Cluster instance as shorthand
+	# Keep a reference to the standby node if passed as an object, so we can
+	# use WAIT FOR LSN on it later.
+	my $standby_node;
 	if (blessed($standby_name)
 		&& $standby_name->isa("PostgreSQL::Test::Cluster"))
 	{
+		$standby_node = $standby_name;
 		$standby_name = $standby_name->name;
 	}
 	if (!defined($target_lsn))
@@ -3402,6 +3414,86 @@ sub wait_for_catchup
 	  . $self->name . "\n";
 	# Before release 12 walreceiver just set the application name to
 	# "walreceiver"
+
+	# Use WAIT FOR LSN on the standby when:
+	# - The standby was passed as a Cluster object (so we can connect to it)
+	# - The mode is replay, write, or flush (not 'sent')
+	# This is more efficient than polling pg_stat_replication on the upstream,
+	# as WAIT FOR LSN uses a latch-based wakeup mechanism.
+	#
+	# We skip the pg_is_in_recovery() pre-check and just attempt WAIT FOR
+	# LSN directly.  If the standby was promoted, it returns 'not_in_recovery'
+	# and we fall back to polling.
+	if (defined($standby_node) && ($mode ne 'sent'))
+	{
+		# Map mode names to WAIT FOR LSN mode names
+		my %mode_map = (
+			'replay' => 'standby_replay',
+			'write' => 'standby_write',
+			'flush' => 'standby_flush',);
+		my $wait_mode = $mode_map{$mode};
+		my $timeout = $PostgreSQL::Test::Utils::timeout_default;
+		my $wait_query =
+		  qq[WAIT FOR LSN '${target_lsn}' WITH (MODE '${wait_mode}', timeout '${timeout}s', no_throw);];
+
+		# Try WAIT FOR LSN. If it succeeds, we're done. If it returns
+		# 'not_in_recovery' (standby was promoted), fall back to polling.
+		# If the session is interrupted (e.g., killed by recovery conflict),
+		# fall back to polling on the upstream which is immune to standby-
+		# side conflicts.
+		my $output;
+		local $@;
+		my $wait_succeeded = eval {
+			$output = $standby_node->safe_psql('postgres', $wait_query);
+			chomp($output);
+			1;
+		};
+
+		if ($wait_succeeded && $output eq 'success')
+		{
+			print "done\n";
+			return;
+		}
+
+		# 'not_in_recovery' means the standby was promoted; fall back to
+		# polling pg_stat_replication on the upstream.
+		if ($wait_succeeded && $output eq 'not_in_recovery')
+		{
+			diag
+			  "WAIT FOR LSN returned 'not_in_recovery', falling back to polling";
+		}
+		# 'timeout' is a hard failure — no point falling back to polling.
+		elsif ($wait_succeeded)
+		{
+			my $details = $self->safe_psql('postgres',
+				"SELECT * FROM pg_catalog.pg_stat_replication");
+			diag qq(WAIT FOR LSN returned '$output'
+pg_stat_replication on upstream:
+${details});
+			croak
+			  "WAIT FOR LSN '$wait_mode' to '$target_lsn' returned '$output'";
+		}
+		# WAIT FOR LSN was interrupted.  Fall back to polling if this
+		# looks like a recovery conflict.  We match the English error
+		# message "conflict with recovery" which is reliable because the
+		# test suite runs with LC_MESSAGES=C.  Other errors should fail
+		# immediately rather than being masked by a silent fallback.
+		elsif ($@ =~ /conflict with recovery/i)
+		{
+			diag qq(WAIT FOR LSN interrupted, falling back to polling:
+$@);
+		}
+		else
+		{
+			croak "WAIT FOR LSN failed: $@";
+		}
+	}
+
+	# Fall back to polling pg_stat_replication on the upstream for:
+	# - 'sent' mode (no corresponding WAIT FOR LSN mode)
+	# - When standby_name is a string (e.g., subscription name)
+	# - When the standby is no longer in recovery (was promoted)
+	# - When WAIT FOR LSN was interrupted (e.g., killed by a recovery conflict)
 	my $query = qq[SELECT '$target_lsn' <= ${mode}_lsn AND state = 'streaming'
          FROM pg_catalog.pg_stat_replication
          WHERE application_name IN ('$standby_name', 'walreceiver')];
-- 
2.39.5 (Apple Git-154)