Hello

Currently during point-in-time recovery with recovery_target_action = 'pause' 
we print log lines:

> LOG: recovery has paused
> HINT: Execute pg_wal_replay_resume() to continue.

My colleague told me that this is a terrible moment: to continue what exactly? 
It sounds like "to continue replay", similar to normal 
pg_wal_replay_pause/pg_wal_replay_resume behavior. We have just small note in 
documentation:

> The paused state can be resumed by using pg_wal_replay_resume() (see Table 
> 9.81), which then causes recovery to end.

But I think this is important place and can be improved.

Also the database does not respond to the promote signals at this stage. 
Attached patch 0001 with the test will fail.

0002 patch contains my proposed ideas:
- introduce separate message for pause due pg_wal_replay_pause call and for 
recovery_target_action.
- check for standby triggers only for recovery_target_action - I am not sure 
this would be safe for pg_wal_replay_pause() call case

Maybe more verbose hint would be appropriate:

> Execute pg_promote() to end recovery or shut down the server, change the 
> recovery target settings to a later target and restart to continue recovery

Thoughts?

regards, Sergei
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3813eadfb4..5ab09917c7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -860,7 +860,7 @@ static void validateRecoveryParameters(void);
 static void exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog);
 static bool recoveryStopsBefore(XLogReaderState *record);
 static bool recoveryStopsAfter(XLogReaderState *record);
-static void recoveryPausesHere(void);
+static void recoveryPausesHere(bool isRecoveryTargetAction);
 static bool recoveryApplyDelay(XLogReaderState *record);
 static void SetLatestXTime(TimestampTz xtime);
 static void SetCurrentChunkStartTime(TimestampTz xtime);
@@ -5912,20 +5912,28 @@ recoveryStopsAfter(XLogReaderState *record)
  * anyone cares about server power consumption in.
  */
 static void
-recoveryPausesHere(void)
+recoveryPausesHere(bool isRecoveryTargetAction)
 {
 	/* Don't pause unless users can connect! */
 	if (!LocalHotStandbyActive)
 		return;
 
-	ereport(LOG,
-			(errmsg("recovery has paused"),
-			 errhint("Execute pg_wal_replay_resume() to continue.")));
+	if (isRecoveryTargetAction)
+		ereport(LOG,
+				(errmsg("recovery has paused"),
+				errhint("Execute pg_wal_replay_resume() to promote.")));
+	else
+		ereport(LOG,
+				(errmsg("recovery has paused"),
+				errhint("Execute pg_wal_replay_resume() to continue.")));
 
 	while (RecoveryIsPaused())
 	{
 		pg_usleep(1000000L);	/* 1000 ms */
 		HandleStartupProcInterrupts();
+		/* handle promote requests */
+		if (isRecoveryTargetAction && CheckForStandbyTrigger())
+			SetRecoveryPause(false);
 	}
 }
 
@@ -7096,7 +7104,7 @@ StartupXLOG(void)
 				 * adding another spinlock cycle to prevent that.
 				 */
 				if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
-					recoveryPausesHere();
+					recoveryPausesHere(false);
 
 				/*
 				 * Have we reached our recovery target?
@@ -7121,7 +7129,7 @@ StartupXLOG(void)
 					 * work.
 					 */
 					if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
-						recoveryPausesHere();
+						recoveryPausesHere(false);
 				}
 
 				/* Setup error traceback support for ereport() */
@@ -7295,7 +7303,7 @@ StartupXLOG(void)
 
 					case RECOVERY_TARGET_ACTION_PAUSE:
 						SetRecoveryPause(true);
-						recoveryPausesHere();
+						recoveryPausesHere(true);
 
 						/* drop into promote */
 
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index fd14bab208..85afc71c66 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -167,3 +167,22 @@ foreach my $i (0..100)
 $logfile = slurp_file($node_standby->logfile());
 ok($logfile =~ qr/FATAL:  recovery ended before configured recovery target was reached/,
 	'recovery end before target reached is a fatal error');
+
+# react to promote on recovery_target_action = pause
+
+$node_standby = get_new_node('standby_9');
+$node_standby->init_from_backup($node_master, 'my_backup',
+								has_restoring => 1, standby => 1);
+$node_standby->append_conf('postgresql.conf',
+						   "recovery_target_name = '$recovery_name'");
+$node_standby->append_conf('postgresql.conf',
+						   "recovery_target_action = 'pause'");
+$node_standby->start;
+
+# Wait until standby has replayed enough data
+my $caughtup_query =
+  "SELECT '$lsn4'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+  or die "Timed out while waiting for standby to catch up";
+
+$node_standby->promote;

Reply via email to