From 6213bc603c00d1277bdb1808f5879bdf5a735946 Mon Sep 17 00:00:00 2001
From: Asim R P <apraveen@pivotal.io>
Date: Fri, 6 Sep 2019 18:32:47 +0530
Subject: [PATCH v2 6/6] TAP test to demonstrate remote fault injector
 interface

The test verifies that commit on master waits until a synchronous
standby has flush WAL up to or greater than commit LSN.  A new fault
point 'standby_flush' is defined in WAL receiver.  When it is enabled
with type 'skip', the WAL receiver responds with stale flush LSN value.

The test invokes faultinjector interface on master to inject the fault
on standby.  The faultinjector interface can connect to a remote server
based on hostname and port.

Note: the test depends on non-core Perl module IPC::Run.  The
IPC::Run::start interface is used to start a psql session in background.
If this is not acceptable, I am sure there are in-core options to
achieve background subprocesses, however, with my limited Perl skills
(as evident from the test), this seems to serve the purpose as a first
attempt.
---
 src/backend/replication/walreceiver.c |  13 ++++
 src/test/recovery/t/007_sync_rep.pl   | 125 +++++++++++++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6abc780778..8e03a82c5c 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -64,6 +64,7 @@
 #include "storage/pmsignal.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
+#include "utils/faultinjector.h"
 #include "utils/guc.h"
 #include "utils/pg_lsn.h"
 #include "utils/ps_status.h"
@@ -999,6 +1000,18 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 static void
 XLogWalRcvFlush(bool dying)
 {
+#ifdef FAULT_INJECTOR
+	if (SIMPLE_FAULT_INJECTOR("standby_flush") == FaultInjectorTypeSkip)
+	{
+		/* Skip flush but respond to master. */
+		if (!dying)
+		{
+			XLogWalRcvSendReply(false, false);
+			XLogWalRcvSendHSFeedback(false);
+		}
+		return;	
+	}
+#endif
 	if (LogstreamResult.Flush < LogstreamResult.Write)
 	{
 		WalRcvData *walrcv = WalRcv;
diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl
index 05803bed4e..e21085f511 100644
--- a/src/test/recovery/t/007_sync_rep.pl
+++ b/src/test/recovery/t/007_sync_rep.pl
@@ -3,7 +3,9 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 11;
+use Test::More tests => 15;
+
+use IPC::Run qw( start pump finish );
 
 # Query checking sync_priority and sync_state of each standby
 my $check_sql =
@@ -45,6 +47,123 @@ sub start_standby_and_wait
 	return;
 }
 
+sub startpsql
+{
+	my ($host, $port) = @_;
+	unless (defined($host) && defined($port))
+	{
+		die "host and port must be specified";
+	}
+
+	my %ret;
+	my $in;
+	my $out;
+	my $err;
+	my $harness;
+	my @psql = qw( psql -d postgres -h );
+	$psql[++$#psql] = $host;
+	$psql[++$#psql] = '-p';
+	$psql[++$#psql] = $port;
+
+	$ret{"harness"} = start \@psql, \$in, \$out, \$err;
+	$ret{"in"} = \$in;
+	$ret{"out"} = \$out;
+	$ret{"err"} = \$err;
+	return \%ret;
+}
+
+sub sendSQL
+{
+	my $session = $_[0];
+	my $outref = $session->{out};
+	my $errref = $session->{err};
+
+	# Reset output and error buffers so that they will only contain
+	# the results of this SQL command.
+	$$outref = "";
+	$$errref = "";
+
+	# Assigning the SQL statement to $inref causes it to be sent to
+	# the psql child process.
+	my $inref = $session->{in};
+	$$inref = $_[1];
+
+	pump $session->{harness} while length $$inref;
+}
+
+sub getResults
+{
+	my $session = $_[0];
+	my $inref = $session->{in};
+	my $outref = $session->{out};
+	my $errref = $session->{err};
+
+	while ($$outref !~ /$_[1]/ && $$errref !~ /ERR/)
+	{
+		pump $session->{harness};
+	}
+	die "psql failed:\n", $$errref if length $$errref;
+	return $$outref;
+}
+
+# This test injects a fault in a standby by invoking faultinjector
+# interface on master.  The fault causes standby to respond with stale
+# flush LSN value, simulating the case that it has not caught up.  If
+# the standby is synchronous, commits on master should wait until
+# standby confirms it has flush WAL greater than or up to commit LSN.
+sub test_sync_commit
+{
+	my ($master, $standby) = @_;
+
+	# inject fault remotely on standby1 such that it replies with the same
+	# LSN as the last time, in spite of having flushed newer WAL records
+	# received from master.
+	my ($cmdret, $stdout, $stderr) =
+	  $master->psql('postgres', 'create extension faultinjector;', on_error_die => 1);
+	
+	my $sql = sprintf(
+		"select inject_fault_infinite('standby_flush', 'skip', '%s', %d)",
+		$standby->host, $standby->port);
+	($cmdret, $stdout, $stderr) = $master->psql('postgres', $sql);
+	ok($stdout =~ /Success/, 'inject skip fault in standby');
+
+ 	# commit a transaction on master, the master backend should wait
+ 	# because standby1 hasn't acknowledged the receipt of the commit LSN.
+ 	my $background_psql = startpsql($master->host, $master->port);
+ 	sendSQL $background_psql, "create table test_sync_commit(a int);\n";
+
+	# Checkpoint so as to advance sent_lsn.  Due to the fault,
+	# flush_lsn should remain unchanged.
+	($cmdret, $stdout, $stderr) =
+	  $master->psql('postgres', 'checkpoint;', on_error_die => 1);
+	($cmdret, $stdout, $stderr) =
+	  $master->psql(
+		  'postgres',
+		  qq(select case when sent_lsn > flush_lsn then 'Success'
+ else 'Failure' end from pg_stat_replication),
+		  on_error_die => 1);
+	ok($stdout =~ /Success/, 'master WAL has moved ahead of standby');
+
+	# Verify that the create table transaction started in background
+	# is waiting for sync rep.
+	($cmdret, $stdout, $stderr) =
+	  $master->psql(
+		  'postgres',
+		  qq(select query from pg_stat_activity where wait_event = 'SyncRep'),
+		  on_error_die => 1);
+	ok($stdout =~ /create table test_sync_commit/, 'commit waits for standby');
+
+	# Remove the fault from standby so that it starts responding with
+	# the real write and flush LSN values.
+	$sql =~ s/skip/reset/;
+	$sql =~ s/_infinite//;
+	($cmdret, $stdout, $stderr) = $master->psql('postgres', $sql);
+	ok($stdout =~ /Success/, ' fault removed from standby');
+
+	# Wait for the create table transaction to commit.
+	getResults($background_psql, 'CREATE TABLE');
+}
+
 # Initialize master node
 my $node_master = get_new_node('master');
 $node_master->init(allows_streaming => 1);
@@ -96,6 +215,10 @@ standby3|1|potential),
 	'asterisk in synchronous_standby_names',
 	'*');
 
+# Now that standby1 is considered synchronous, check if commits made
+# on master wait for standby1 to catch up.
+test_sync_commit($node_master, $node_standby_1);
+
 # Stop and start standbys to rearrange the order of standbys
 # in WalSnd array. Now, if standbys have the same priority,
 # standby2 is selected preferentially and standby3 is next.
-- 
2.14.3 (Apple Git-98)

