On Thu, Oct 22, 2020 at 7:50 PM Dilip Kumar <dilipbal...@gmail.com> wrote:
>
> On Thu, Oct 22, 2020 at 6:59 AM Kyotaro Horiguchi
> <horikyota....@gmail.com> wrote:
> >
> > At Wed, 21 Oct 2020 11:14:24 -0400, Robert Haas <robertmh...@gmail.com> 
> > wrote in
> > > On Wed, Oct 21, 2020 at 7:16 AM Dilip Kumar <dilipbal...@gmail.com> wrote:
> > > > One idea could be, if the recovery process is waiting for WAL and a
> > > > recovery pause is requested then we can assume that the recovery is
> > > > paused because before processing the next wal it will always check
> > > > whether the recovery pause is requested or not.
> > ..
> > > However, it might be better to implement this by having the system
> > > absorb the pause immediately when it's in this state, rather than
> > > trying to detect this state and treat it specially.
> >
> > The paused state is shown in pg_stat_activity.wait_event and it is
> > strange that pg_is_wal_replay_paused() is inconsistent with the
> > column.
>
> Right
>
> To make them consistent, we need to call recoveryPausesHere()
> > at the end of WaitForWALToBecomeAvailable() and let
> > pg_wal_replay_pause() call WakeupRecovery().
> >
> > I think we don't need a separate function to find the state.
>
> The idea makes sense to me.  I will try to change the patch as per the
> suggestion.

Here is the patch based on this idea.

-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com
From 08ab47fdae6007f847804929b63de4bba22de495 Mon Sep 17 00:00:00 2001
From: Dilip Kumar <dilipkumar@localhost.localdomain>
Date: Tue, 20 Oct 2020 14:10:14 +0530
Subject: [PATCH v1] pg_is_wal_replay_paused will wait for recovery to pause

Currently, pg_is_wal_replay_paused, just check whether the recovery
pause is requested or not but it doesn't actually wait for recovery
to get paused.  With this patch if recovery pause is not requested
the api will return false otherwise it will wait for recovery to get
paused.
---
 src/backend/access/transam/xlog.c      | 48 +++++++++++++++++++++++++++++-----
 src/backend/access/transam/xlogfuncs.c | 33 ++++++++++++++++++++---
 src/include/access/xlog.h              | 11 +++++++-
 3 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 52a67b1..ff595f7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -714,8 +714,8 @@ typedef struct XLogCtlData
 	 * only relevant for replication or archive recovery
 	 */
 	TimestampTz currentChunkStartTime;
-	/* Are we requested to pause recovery? */
-	bool		recoveryPause;
+	/* Recovery pause state */
+	RecoveryPauseState		recoveryPause;
 
 	/*
 	 * lastFpwDisableRecPtr points to the start of the last replayed
@@ -5994,6 +5994,16 @@ recoveryStopsAfter(XLogReaderState *record)
 	return false;
 }
 
+static void
+CheckAndSetRecoveryPause(void)
+{
+	/* If recovery pause is requested then set it paused */
+	SpinLockAcquire(&XLogCtl->info_lck);
+	if (XLogCtl->recoveryPause == RECOVERY_PAUSE_REQUESTED)
+		XLogCtl->recoveryPause = RECOVERY_PAUSED;
+	SpinLockRelease(&XLogCtl->info_lck);
+}
+
 /*
  * Wait until shared recoveryPause flag is cleared.
  *
@@ -6025,12 +6035,20 @@ recoveryPausesHere(bool endOfRecovery)
 				(errmsg("recovery has paused"),
 				 errhint("Execute pg_wal_replay_resume() to continue.")));
 
-	while (RecoveryIsPaused())
+	while (RecoveryPauseRequested())
 	{
+
 		HandleStartupProcInterrupts();
+
 		if (CheckForStandbyTrigger())
 			return;
 		pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
+
+		/*
+		 * While we are in the loop, user might resume and pause again so set
+		 * this everytime.
+		 */
+		CheckAndSetRecoveryPause();
 		pg_usleep(1000000L);	/* 1000 ms */
 		pgstat_report_wait_end();
 	}
@@ -6039,17 +6057,29 @@ recoveryPausesHere(bool endOfRecovery)
 bool
 RecoveryIsPaused(void)
 {
+	bool	recoveryPause;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	recoveryPause = (XLogCtl->recoveryPause == RECOVERY_PAUSED) ? true : false;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return recoveryPause;
+}
+
+bool
+RecoveryPauseRequested(void)
+{
 	bool		recoveryPause;
 
 	SpinLockAcquire(&XLogCtl->info_lck);
-	recoveryPause = XLogCtl->recoveryPause;
+	recoveryPause = (XLogCtl->recoveryPause != RECOVERY_IN_PROGRESS) ? true : false;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	return recoveryPause;
 }
 
 void
-SetRecoveryPause(bool recoveryPause)
+SetRecoveryPause(RecoveryPauseState recoveryPause)
 {
 	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->recoveryPause = recoveryPause;
@@ -7423,7 +7453,7 @@ StartupXLOG(void)
 						proc_exit(3);
 
 					case RECOVERY_TARGET_ACTION_PAUSE:
-						SetRecoveryPause(true);
+						SetRecoveryPause(RECOVERY_PAUSE_REQUESTED);
 						recoveryPausesHere(true);
 
 						/* drop into promote */
@@ -12510,6 +12540,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				elog(ERROR, "unexpected WAL source %d", currentSource);
 		}
 
+		/* test for recovery pause if user has requested the pause */
+		if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
+			recoveryPausesHere(false);
+
+		now = GetCurrentTimestamp();
+
 		/*
 		 * This possibly-long loop needs to handle interrupts of startup
 		 * process.
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 290658b..a82528f 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -538,7 +538,10 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS)
 				 errhint("%s cannot be executed after promotion is triggered.",
 						 "pg_wal_replay_pause()")));
 
-	SetRecoveryPause(true);
+	SetRecoveryPause(RECOVERY_PAUSE_REQUESTED);
+
+	/* wake up the recovery process so that it can process the pause request */
+	WakeupRecovery();
 
 	PG_RETURN_VOID();
 }
@@ -565,7 +568,7 @@ pg_wal_replay_resume(PG_FUNCTION_ARGS)
 				 errhint("%s cannot be executed after promotion is triggered.",
 						 "pg_wal_replay_resume()")));
 
-	SetRecoveryPause(false);
+	SetRecoveryPause(RECOVERY_IN_PROGRESS);
 
 	PG_RETURN_VOID();
 }
@@ -582,7 +585,31 @@ pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
 				 errmsg("recovery is not in progress"),
 				 errhint("Recovery control functions can only be executed during recovery.")));
 
-	PG_RETURN_BOOL(RecoveryIsPaused());
+	if (!RecoveryPauseRequested())
+		PG_RETURN_BOOL(false);
+
+	/* loop until the recovery is actually paused */
+	while(!RecoveryIsPaused())
+	{
+		pg_usleep(10000L);	/* wait for 10 msec */
+
+		/* meanwhile if recovery is resume requested then return false */
+		if (!RecoveryPauseRequested())
+			PG_RETURN_BOOL(false);
+
+		/*
+		 * If recovery is not in progress anymore then report an error this
+		 * could happen if the standby is promoted while we were waiting for
+		 * recovery to get paused.
+		 */
+		if (!RecoveryInProgress())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					errmsg("recovery is not in progress"),
+					errhint("Recovery control functions can only be executed during recovery.")));			
+	}
+
+	PG_RETURN_BOOL(true);
 }
 
 /*
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 221af87..cd7904e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -174,6 +174,14 @@ typedef enum RecoveryState
 	RECOVERY_STATE_DONE			/* currently in production */
 } RecoveryState;
 
+/* Recovery pause states */
+typedef enum RecoveryPauseState
+{
+	RECOVERY_IN_PROGRESS = 0,
+	RECOVERY_PAUSE_REQUESTED,
+	RECOVERY_PAUSED,
+} RecoveryPauseState;
+
 extern PGDLLIMPORT int wal_level;
 
 /* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -311,7 +319,8 @@ extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
 extern XLogRecPtr GetXLogInsertRecPtr(void);
 extern XLogRecPtr GetXLogWriteRecPtr(void);
 extern bool RecoveryIsPaused(void);
-extern void SetRecoveryPause(bool recoveryPause);
+extern bool RecoveryPauseRequested(void);
+extern void SetRecoveryPause(RecoveryPauseState recoveryPause);
 extern TimestampTz GetLatestXTime(void);
 extern TimestampTz GetCurrentChunkReplayStartTime(void);
 
-- 
1.8.3.1

Reply via email to