From 86d1ba8eb5d1f764c47ef71569f1112c1e1687aa Mon Sep 17 00:00:00 2001
From: Osumi Takamichi <osumi.takamichi@fujitsu.com>
Date: Mon, 25 Jan 2021 07:52:37 +0000
Subject: [PATCH v04] Safeguard for archive recovery not to miss data

This disables the server to start up when it
detects WAL generated with wal_level=minimal during archive recovery.
This should be done regardless of the value of EnableHotStandby,
because we don't think the scenario to go through the period of
wal_level=minimal happens in any case.

The motivation of this patch is to protect user ends up with getting replica
that could miss data in standby mode and getting the server to miss data in recovery mode.

Author: Takamichi Osumi <osumi.takamichi@fujitsu.com>
Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Discussion: https://www.postgresql.org/message-id/OSBPR01MB4888CBE1DA08818FD2D90ED8EDF90%40OSBPR01MB4888.jpnprd01.prod.outlook.com
---
 src/backend/access/transam/xlog.c           |  68 +++++++++---------
 src/test/recovery/t/022_archive_recovery.pl | 108 ++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 36 deletions(-)
 create mode 100644 src/test/recovery/t/022_archive_recovery.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 470e113..6d6fcc6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6329,44 +6329,40 @@ RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue
 static void
 CheckRequiredParameterValues(void)
 {
-	/*
-	 * For archive recovery, the WAL must be generated with at least 'replica'
-	 * wal_level.
-	 */
-	if (ArchiveRecoveryRequested && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
+	if (ArchiveRecoveryRequested)
 	{
-		ereport(WARNING,
-				(errmsg("WAL was generated with wal_level=minimal, data may be missing"),
-				 errhint("This happens if you temporarily set wal_level=minimal without taking a new base backup.")));
-	}
+		/*
+		 * For archive recovery, the WAL must be generated with at least 'replica'
+		 * wal_level.
+		 */
+		if (ControlFile->wal_level == WAL_LEVEL_MINIMAL)
+			ereport(FATAL,
+					(errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
+					 errdetail("This happens if you temporarily set wal_level=minimal on the server."),
+					 errhint("Run recovery again from a new base backup taken after setting wal_level higher than minimal")));
 
-	/*
-	 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
-	 * must have at least as many backend slots as the primary.
-	 */
-	if (ArchiveRecoveryRequested && EnableHotStandby)
-	{
-		if (ControlFile->wal_level < WAL_LEVEL_REPLICA)
-			ereport(ERROR,
-					(errmsg("hot standby is not possible because wal_level was not set to \"replica\" or higher on the primary server"),
-					 errhint("Either set wal_level to \"replica\" on the primary, or turn off hot_standby here.")));
-
-		/* We ignore autovacuum_max_workers when we make this test. */
-		RecoveryRequiresIntParameter("max_connections",
-									 MaxConnections,
-									 ControlFile->MaxConnections);
-		RecoveryRequiresIntParameter("max_worker_processes",
-									 max_worker_processes,
-									 ControlFile->max_worker_processes);
-		RecoveryRequiresIntParameter("max_wal_senders",
-									 max_wal_senders,
-									 ControlFile->max_wal_senders);
-		RecoveryRequiresIntParameter("max_prepared_transactions",
-									 max_prepared_xacts,
-									 ControlFile->max_prepared_xacts);
-		RecoveryRequiresIntParameter("max_locks_per_transaction",
-									 max_locks_per_xact,
-									 ControlFile->max_locks_per_xact);
+		/*
+		 * For Hot Standby, we must have at least as many backend slots as the primary.
+		 */
+		if (EnableHotStandby)
+		{
+			/* We ignore autovacuum_max_workers when we make this test. */
+			RecoveryRequiresIntParameter("max_connections",
+										 MaxConnections,
+										 ControlFile->MaxConnections);
+			RecoveryRequiresIntParameter("max_worker_processes",
+										 max_worker_processes,
+										 ControlFile->max_worker_processes);
+			RecoveryRequiresIntParameter("max_wal_senders",
+										 max_wal_senders,
+										 ControlFile->max_wal_senders);
+			RecoveryRequiresIntParameter("max_prepared_transactions",
+										 max_prepared_xacts,
+										 ControlFile->max_prepared_xacts);
+			RecoveryRequiresIntParameter("max_locks_per_transaction",
+										 max_locks_per_xact,
+										 ControlFile->max_locks_per_xact);
+		}
 	}
 }
 
diff --git a/src/test/recovery/t/022_archive_recovery.pl b/src/test/recovery/t/022_archive_recovery.pl
new file mode 100644
index 0000000..1e46539
--- /dev/null
+++ b/src/test/recovery/t/022_archive_recovery.pl
@@ -0,0 +1,108 @@
+# Prohibit archive recovery when the server detects WAL generated with wal_level=minimal
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 7;
+use Config;
+use Time::HiRes qw(usleep);
+
+# Initialize a node
+my $node = get_new_node('orig');
+my $backup_name = 'my_backup';
+my $replica_config = q[
+wal_level = replica
+archive_mode = on
+max_wal_senders = 10
+hot_standby = off
+];
+
+# Start up the server with wal_level = replica
+$node->init(has_archiving => 1);
+$node->append_conf('postgresql.conf', $replica_config);
+$node->start;
+
+# Check the wal_level and get a backup
+check_wal_level('replica', 'wal_level is replica at first');
+$node->backup($backup_name);
+
+# Change the wal_level from replica to minimal
+$node->append_conf(
+	'postgresql.conf', q[
+wal_level = minimal
+archive_mode = off
+max_wal_senders = 0
+]);
+$node->restart;
+check_wal_level('minimal', 'wal_level has become minimal');
+
+# Make the wal_level back to replica
+$node->append_conf('postgresql.conf', $replica_config);
+$node->restart;
+check_wal_level('replica', 'wal_level went back to replica again');
+$node->stop;
+
+# Execute an archive recovery in standby mode
+my $new_node = get_new_node('new_node');
+$new_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1);
+
+# Check if standby.signal exists
+my $pgdata = $new_node->data_dir;
+ok (-f "${pgdata}/standby.signal", 'standby.signal was created');
+
+run_log(
+	[
+	 'pg_ctl',               '-D', $new_node->data_dir, '-l',
+	 $new_node->logfile, 'start'
+	]);
+
+# Wait up to 180s for postgres to terminate
+foreach my $i (0 .. 1800)
+{
+    last if !-f $new_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+# Confirm that the archive recovery fails with an error
+my $logfile = slurp_file($new_node->logfile());
+ok( $logfile =~
+      qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in standby mode with WAL generated during wal_level=minimal');
+
+# This protection shold apply to recovery mode
+my $another_node = get_new_node('another_node');
+$another_node->init_from_backup(
+	$node, $backup_name,
+	has_restoring => 1, standby => 0);
+
+# Check if recovery.signal exists
+my $path = $another_node->data_dir;
+ok (-f "${path}/recovery.signal", 'recovery.signal was created');
+run_log(
+	[
+	 'pg_ctl',               '-D', $another_node->data_dir, '-l',
+	 $another_node->logfile, 'start'
+	]);
+
+foreach my $i (0 .. 1800)
+{
+    last if !-f $another_node->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+my $log = slurp_file($another_node->logfile());
+ok( $log =~
+	qr/FATAL:  WAL was generated with wal_level=minimal, cannot continue recovering/,
+    'Archive recovery fails in recovery mode with WAL generated during wal_level=minimal');
+
+sub check_wal_level
+{
+	my ($target_wal_level, $explanation) = @_;
+
+	is( $node->safe_psql(
+			'postgres', q{show wal_level}),
+        $target_wal_level,
+        $explanation);
+}
-- 
2.2.0

