From ec08206273397d271002c38fdf4108cb250407ed Mon Sep 17 00:00:00 2001
From: Osumi Takamichi <osumi.takamichi@fujitsu.com>
Date: Wed, 31 Mar 2021 13:29:38 +0000
Subject: [PATCH v05] Safeguard for archive recovery not to miss data

This disables the server to start up when it
detects WAL generated with wal_level=minimal during archive recovery.
This should be done regardless of the value of EnableHotStandby,
because we don't think the scenario to go through the period of
wal_level=minimal happens in any case.

The motivation of this patch is to protect user ends up with getting replica
that could miss data in standby mode and getting the server to miss data in recovery mode.

Author: Takamichi Osumi <osumi.takamichi@fujitsu.com>
Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>
Reviewed-by: David Steele <david@pgmasters.net>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://www.postgresql.org/message-id/OSBPR01MB4888CBE1DA08818FD2D90ED8EDF90%40OSBPR01MB4888.jpnprd01.prod.outlook.com
---
 src/backend/access/transam/xlog.c           |  13 ++--
 src/test/recovery/t/024_archive_recovery.pl | 108 ++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 8 deletions(-)
 create mode 100644 src/test/recovery/t/024_archive_recovery.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6f8810e..363ade5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6403,9 +6403,11 @@ CheckRequiredParameterValues(void)
 	 */
 	if (ArchiveRecoveryRequested && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
 	{
-		ereport(WARNING,
-				(errmsg("WAL was generated with wal_level=minimal, data may be missing"),
-				 errhint("This happens if you temporarily set wal_level=minimal without taking a new base backup.")));
+		ereport(FATAL,
+				(errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
+				 errdetail("This happens if you temporarily set wal_level=minimal on the server."),
+				 errhint("Use a backup taken after setting wal_level to higher than minimal "
+						 "or recover to the point in time before wal_level becomes minimal even though it causes data loss")));
 	}
 
 	/*
@@ -6414,11 +6416,6 @@ CheckRequiredParameterValues(void)
 	 */
 	if (ArchiveRecoveryRequested && EnableHotStandby)
 	{
-		if (ControlFile->wal_level < WAL_LEVEL_REPLICA)
-			ereport(ERROR,
-					(errmsg("hot standby is not possible because wal_level was not set to \"replica\" or higher on the primary server"),
-					 errhint("Either set wal_level to \"replica\" on the primary, or turn off hot_standby here.")));
-
 		/* We ignore autovacuum_max_workers when we make this test. */
 		RecoveryRequiresIntParameter("max_connections",
 									 MaxConnections,
diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl
new file mode 100644
index 0000000..a9cafbc
--- /dev/null
+++ b/src/test/recovery/t/024_archive_recovery.pl
@@ -0,0 +1,108 @@
+# Prohibit archive recovery when the server detects WAL generated with wal_level=minimal
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 7;
+use Config;
+use Time::HiRes qw(usleep);
+
+# Initialize a node
+my $node = get_new_node('orig');
+my $backup_name = 'my_backup';
+my $replica_config = q[
+wal_level = replica
+archive_mode = on
+max_wal_senders = 10
+hot_standby = off
+];
+
+# Start up the server with wal_level = replica
+$node->init(has_archiving => 1);
+$node->append_conf('postgresql.conf', $replica_config);
+$node->start;
+
+# Check the wal_level and get a backup
+check_wal_level('replica', 'wal_level is replica at first');
+$node->backup($backup_name);
+
+# Change the wal_level from replica to minimal
+$node->append_conf(
+	'postgresql.conf', q[
+wal_level = minimal
+archive_mode = off
+max_wal_senders = 0
+]);
+$node->restart;
+check_wal_level('minimal', 'wal_level has become minimal');
+
+# Make the wal_level back to replica
+$node->append_conf('postgresql.conf', $replica_config);
+$node->restart;
+check_wal_level('replica', 'wal_level went back to replica again');
+$node->stop;
+
+# Execute an archive recovery in standby mode
+my $new_node = get_new_node('new_node');
+$new_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1);
+
+# Check if standby.signal exists
+my $pgdata = $new_node->data_dir;
+ok (-f "${pgdata}/standby.signal", 'standby.signal was created');
+
+run_log(
+	[
+	 'pg_ctl',               '-D', $new_node->data_dir, '-l',
+	 $new_node->logfile, 'start'
+	]);
+
+# Wait up to 180s for postgres to terminate
+foreach my $i (0 .. 1800)
+{
+    last if !-f $new_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+# Confirm that the archive recovery fails with an error
+my $logfile = slurp_file($new_node->logfile());
+ok( $logfile =~
+      qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in standby mode with WAL generated during wal_level=minimal');
+
+# This protection should apply to recovery mode
+my $another_node = get_new_node('another_node');
+$another_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1, standby => 0);
+
+# Check if recovery.signal exists
+my $path = $another_node->data_dir;
+ok (-f "${path}/recovery.signal", 'recovery.signal was created');
+run_log(
+	[
+	 'pg_ctl',               '-D', $another_node->data_dir, '-l',
+	 $another_node->logfile, 'start'
+	]);
+
+foreach my $i (0 .. 1800)
+{
+    last if !-f $another_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+my $log = slurp_file($another_node->logfile());
+ok( $log =~
+	qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in recovery mode with WAL generated during wal_level=minimal');
+
+sub check_wal_level
+{
+	my ($target_wal_level, $explanation) = @_;
+
+	is( $node->safe_psql(
+			'postgres', q{show wal_level}),
+        $target_wal_level,
+        $explanation);
+}
-- 
2.2.0

