From 113be17c13255362e90bcc1c4b30096472f14b08 Mon Sep 17 00:00:00 2001
From: Osumi Takamichi <osumi.takamichi@fujitsu.com>
Date: Sun, 4 Apr 2021 01:02:59 +0000
Subject: [PATCH v06] Safeguard for archive recovery not to miss data

This disables the server to start up when it
detects WAL generated with wal_level=minimal during archive recovery.
This should be done regardless of the value of EnableHotStandby,
because we don't think the scenario to go through the period of
wal_level=minimal happens.

The motivation of this patch is to protect user ends up with getting replica
that could miss data in standby mode and getting the server to miss data in recovery mode.

Author: Takamichi Osumi <osumi.takamichi@fujitsu.com>
Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>
Reviewed-by: David Steele <david@pgmasters.net>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://www.postgresql.org/message-id/OSBPR01MB4888CBE1DA08818FD2D90ED8EDF90%40OSBPR01MB4888.jpnprd01.prod.outlook.com
---
 src/backend/access/transam/xlog.c           |  13 ++--
 src/test/recovery/t/024_archive_recovery.pl | 103 ++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 8 deletions(-)
 create mode 100644 src/test/recovery/t/024_archive_recovery.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6f8810e..27d9ec9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6403,9 +6403,11 @@ CheckRequiredParameterValues(void)
 	 */
 	if (ArchiveRecoveryRequested && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
 	{
-		ereport(WARNING,
-				(errmsg("WAL was generated with wal_level=minimal, data may be missing"),
-				 errhint("This happens if you temporarily set wal_level=minimal without taking a new base backup.")));
+		ereport(FATAL,
+				(errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
+				 errdetail("This happens if you temporarily set wal_level=minimal on the server."),
+				 errhint("Use a backup taken after setting wal_level to higher than minimal "
+						 "or recover to the point in time before wal_level was changed to minimal even though it may cause data loss.")));
 	}
 
 	/*
@@ -6414,11 +6416,6 @@ CheckRequiredParameterValues(void)
 	 */
 	if (ArchiveRecoveryRequested && EnableHotStandby)
 	{
-		if (ControlFile->wal_level < WAL_LEVEL_REPLICA)
-			ereport(ERROR,
-					(errmsg("hot standby is not possible because wal_level was not set to \"replica\" or higher on the primary server"),
-					 errhint("Either set wal_level to \"replica\" on the primary, or turn off hot_standby here.")));
-
 		/* We ignore autovacuum_max_workers when we make this test. */
 		RecoveryRequiresIntParameter("max_connections",
 									 MaxConnections,
diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl
new file mode 100644
index 0000000..b75c81c
--- /dev/null
+++ b/src/test/recovery/t/024_archive_recovery.pl
@@ -0,0 +1,103 @@
+# Prohibit archive recovery when the server detects WAL generated with wal_level=minimal
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 5;
+use Time::HiRes qw(usleep);
+
+# Initialize a node
+my $node = get_new_node('orig');
+my $backup_name = 'my_backup';
+my $replica_config = q[
+wal_level = replica
+archive_mode = on
+max_wal_senders = 10
+hot_standby = off
+];
+
+# Start up the server with wal_level = replica
+$node->init(has_archiving => 1);
+$node->append_conf('postgresql.conf', $replica_config);
+$node->start;
+
+# Check the wal_level and get a backup
+check_wal_level('replica', 'wal_level is replica at first');
+$node->backup($backup_name);
+
+# Change the wal_level from replica to minimal
+$node->append_conf(
+	'postgresql.conf', q[
+wal_level = minimal
+archive_mode = off
+max_wal_senders = 0
+]);
+$node->restart;
+check_wal_level('minimal', 'wal_level has become minimal');
+
+# Make the wal_level back to replica and ensure that
+# the WAL file containing the record about the change of wal_level
+# to minimal is archived, by checking pg_stat_archiver
+$node->append_conf('postgresql.conf', $replica_config);
+$node->restart;
+check_wal_level('replica', 'wal_level went back to replica again');
+my $walfile_to_be_archived = $node->safe_psql('postgres',
+	"SELECT pg_walfile_name(pg_current_wal_lsn());");
+$node->safe_psql('postgres', 'SELECT pg_switch_wal()');
+my $archive_wait_query
+  = "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
+$node->poll_query_until('postgres', $archive_wait_query)
+  or die "Timed out while waiting for WAL segment to be archived";
+$node->stop;
+
+# Execute an archive recovery in standby mode
+my $new_node = get_new_node('new_node');
+$new_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1);
+run_log(
+	['pg_ctl','-D', $new_node->data_dir, '-l',
+	 $new_node->logfile, 'start']);
+
+# Wait up to 180s for postgres to terminate
+foreach my $i (0 .. 1800)
+{
+    last if !-f $new_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+# Confirm that the archive recovery fails with an error
+my $logfile = slurp_file($new_node->logfile());
+ok( $logfile =~
+      qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in standby mode with WAL generated during wal_level=minimal');
+
+# This protection should apply to recovery mode
+my $another_node = get_new_node('another_node');
+$another_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1, standby => 0);
+run_log(
+	['pg_ctl', '-D', $another_node->data_dir, '-l',
+	 $another_node->logfile, 'start']);
+
+foreach my $i (0 .. 1800)
+{
+    last if !-f $another_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+my $log = slurp_file($another_node->logfile());
+ok( $log =~
+	qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in recovery mode with WAL generated during wal_level=minimal');
+
+sub check_wal_level
+{
+	my ($target_wal_level, $explanation) = @_;
+
+	is( $node->safe_psql(
+			'postgres', q{show wal_level}),
+        $target_wal_level,
+        $explanation);
+}
-- 
2.2.0

