On Mon, Aug 29, 2016 at 11:16 PM, Fujii Masao <masao.fu...@gmail.com> wrote:
> You seem to add another entry for this patch into CommitFest.
> Either of them needs to be removed.
> https://commitfest.postgresql.org/10/698/

Indeed. I just removed this one.

> This patch prevents pg_stop_backup from starting while pg_start_backup
> is running. OTOH, we also should prevent pg_start_backup from starting
> until "previous" pg_stop_backup has completed? What happens if
> pg_start_backup starts while pg_stop_backup is running?
> As far as I read the current code, ISTM that there is no need to do that,
> but it's better to do the double check.

I don't agree about doing that. The on-disk presence of the
backup_label file is what prevents pg_start_backup to run should
pg_stop_backup have already decremented its counters but not unlinked
yet the backup_label, so this insurance is really enough IMO. One good
reason to keep pg_stop_backup as-is in its failure handling is that if
for example it fails to remove the backup_label file, it is still
possible to do again a backup by just removing manually the existing
backup_label file *without* restarting the server. If you have an
in-memory state it would not be possible to fallback to this method
and you'd need a method to clean up the in-memory state.

Now, well, with the patch as well as HEAD, it could be possible that
the backup counters are decremented, but pg_stop_backup *fails* to
remove the backup_label file which would prevent any subsequent
pg_start_backup to run, because there is still a backup_label file, as
well as any other pg_stop_backup to run, because there is no backup
running per the in-memory state. We could improve the in-memory state
by:
- Having an extra exclusive backup status to mark an exclusive backup
as stopping, say EXCLUSIVE_BACKUP_STOPPING. Then mark the exclusive
backup status as such when do_pg_stop_backup starts.
- delaying counter decrementation in pg_stop_backup to the moment when
the backup_label file has been removed.
- Taking an extra time the exclusive WAL insert lock after
backup_label is removed, and decrement the counters.
- During the time the backup_label file is removed, we need an error
callback to switch back to BACKUP_RUNNING in case of error, similarly
to do_pg_start_backup.
Which is more or less the attached patch. Now, if pg_stop_backup
fails, this has the disadvantage to force a server restart to clean up
the in-memory state, instead of just removing manually the existing
backup_file.

For those reasons I still strongly recommend using v3, and not meddle
with the error handling of pg_stop_backup. Because, well, that's
actually not necessary, and this would just hurt the error handling of
exclusive backups.
-- 
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index acd95aa..ad04e3e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -460,6 +460,28 @@ typedef union WALInsertLockPadded
 } WALInsertLockPadded;
 
 /*
+ * State of an exclusive backup
+ *
+ * EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually
+ * running, to be more precise pg_start_backup() is not being executed for
+ * an exclusive backup and there is exclusive backup in progress.
+ * EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is running and
+ * that is is not done yet.
+ * EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is running and
+ * that is is not done yet.
+ * EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished
+ * running and an exclusive backup is in progress. pg_stop_backup() is
+ * needed to finish it.
+ */
+typedef enum ExclusiveBackupState
+{
+	EXCLUSIVE_BACKUP_NONE = 0,
+	EXCLUSIVE_BACKUP_STARTING,
+	EXCLUSIVE_BACKUP_STOPPING,
+	EXCLUSIVE_BACKUP_IN_PROGRESS
+} ExclusiveBackupState;
+
+/*
  * Shared state data for WAL insertion.
  */
 typedef struct XLogCtlInsert
@@ -500,13 +522,14 @@ typedef struct XLogCtlInsert
 	bool		fullPageWrites;
 
 	/*
-	 * exclusiveBackup is true if a backup started with pg_start_backup() is
-	 * in progress, and nonExclusiveBackups is a counter indicating the number
+	 * exclusiveBackupState is changed to EXCLUSIVE_BACKUP_STARTING for the
+	 * duration of pg_start_backup(), then to EXCLUSIVE_BACKUP_IN_PROGRESS once
+	 * it is done, and nonExclusiveBackups is a counter indicating the number
 	 * of streaming base backups currently in progress. forcePageWrites is set
 	 * to true when either of these is non-zero. lastBackupStart is the latest
 	 * checkpoint redo location used as a starting point for an online backup.
 	 */
-	bool		exclusiveBackup;
+	ExclusiveBackupState exclusiveBackupState;
 	int			nonExclusiveBackups;
 	XLogRecPtr	lastBackupStart;
 
@@ -842,6 +865,7 @@ static void xlog_outrec(StringInfo buf, XLogReaderState *record);
 #endif
 static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
 static void pg_start_backup_callback(int code, Datum arg);
+static void pg_stop_backup_callback(int code, Datum arg);
 static bool read_backup_label(XLogRecPtr *checkPointLoc,
 				  bool *backupEndRequired, bool *backupFromStandby);
 static bool read_tablespace_map(List **tablespaces);
@@ -9831,7 +9855,21 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 	WALInsertLockAcquireExclusive();
 	if (exclusive)
 	{
-		if (XLogCtl->Insert.exclusiveBackup)
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING)
+		{
+			WALInsertLockRelease();
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("a backup is already starting")));
+		}
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING)
+		{
+			WALInsertLockRelease();
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("a backup is currently stopping")));
+		}
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_IN_PROGRESS)
 		{
 			WALInsertLockRelease();
 			ereport(ERROR,
@@ -9839,7 +9877,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 					 errmsg("a backup is already in progress"),
 					 errhint("Run pg_stop_backup() and try again.")));
 		}
-		XLogCtl->Insert.exclusiveBackup = true;
+		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING;
 	}
 	else
 		XLogCtl->Insert.nonExclusiveBackups++;
@@ -10094,7 +10132,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 		{
 			/*
 			 * Check for existing backup label --- implies a backup is already
-			 * running.  (XXX given that we checked exclusiveBackup above,
+			 * running.  (XXX given that we checked exclusiveBackupState above,
 			 * maybe it would be OK to just unlink any such label file?)
 			 */
 			if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
@@ -10176,6 +10214,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 	PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
 
 	/*
+	 * Mark that start phase has correctly finished for an exclusive backup.
+	 */
+	if (exclusive)
+	{
+		WALInsertLockAcquireExclusive();
+		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+		WALInsertLockRelease();
+	}
+
+	/*
 	 * We're done.  As a convenience, return the starting WAL location.
 	 */
 	if (starttli_p)
@@ -10193,8 +10241,8 @@ pg_start_backup_callback(int code, Datum arg)
 	WALInsertLockAcquireExclusive();
 	if (exclusive)
 	{
-		Assert(XLogCtl->Insert.exclusiveBackup);
-		XLogCtl->Insert.exclusiveBackup = false;
+		Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING);
+		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
 	}
 	else
 	{
@@ -10202,7 +10250,7 @@ pg_start_backup_callback(int code, Datum arg)
 		XLogCtl->Insert.nonExclusiveBackups--;
 	}
 
-	if (!XLogCtl->Insert.exclusiveBackup &&
+	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
 		XLogCtl->Insert.nonExclusiveBackups == 0)
 	{
 		XLogCtl->Insert.forcePageWrites = false;
@@ -10211,6 +10259,19 @@ pg_start_backup_callback(int code, Datum arg)
 }
 
 /*
+ * Error cleanup callback for pg_stop_backup, should be used only for
+ * exclusive backups.
+ */
+static void
+pg_stop_backup_callback(int code, Datum arg)
+{
+	/* Update backup status on failure */
+	WALInsertLockAcquireExclusive();
+	XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+	WALInsertLockRelease();
+}
+
+/*
  * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup()
  * function.
 
@@ -10273,19 +10334,103 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
 
 	/*
-	 * OK to update backup counters and forcePageWrites
+	 * Remove the backup_label file for an exclusive backup. Before doing
+	 * anything the status of the backup is switched to ensure that there
+	 * is no concurrent operation during this operation. Backup counters
+	 * are updated after this phase to be able to rollback in case of
+	 * failure.
 	 */
-	WALInsertLockAcquireExclusive();
 	if (exclusive)
 	{
-		if (!XLogCtl->Insert.exclusiveBackup)
+		WALInsertLockAcquireExclusive();
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE)
 		{
 			WALInsertLockRelease();
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 					 errmsg("exclusive backup not in progress")));
 		}
-		XLogCtl->Insert.exclusiveBackup = false;
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING)
+		{
+			WALInsertLockRelease();
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("exclusive backup currently starting")));
+		}
+		if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING)
+		{
+			WALInsertLockRelease();
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("exclusive backup currently stopping")));
+		}
+		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING;
+		WALInsertLockRelease();
+
+		/* remove backup_label */
+		PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) 0);
+		{
+			/*
+			 * Read the existing label file into memory.
+			 */
+			struct stat statbuf;
+			int			r;
+
+			if (stat(BACKUP_LABEL_FILE, &statbuf))
+			{
+				/* should not happen per the upper checks */
+				if (errno != ENOENT)
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("could not stat file \"%s\": %m",
+									BACKUP_LABEL_FILE)));
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("a backup is not in progress")));
+			}
+
+			lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
+			if (!lfp)
+			{
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m",
+								BACKUP_LABEL_FILE)));
+			}
+			labelfile = palloc(statbuf.st_size + 1);
+			r = fread(labelfile, statbuf.st_size, 1, lfp);
+			labelfile[statbuf.st_size] = '\0';
+
+			/*
+			 * Close and remove the backup label file
+			 */
+			if (r != 1 || ferror(lfp) || FreeFile(lfp))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m",
+								BACKUP_LABEL_FILE)));
+			if (unlink(BACKUP_LABEL_FILE) != 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not remove file \"%s\": %m",
+								BACKUP_LABEL_FILE)));
+
+			/*
+			 * Remove tablespace_map file if present, it is created only if there
+			 * are tablespaces.
+			 */
+			unlink(TABLESPACE_MAP);
+		}
+		PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) 0);
+	}
+
+	/*
+	 * OK to update backup counters and forcePageWrites
+	 */
+	WALInsertLockAcquireExclusive();
+	if (exclusive)
+	{
+		XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
 	}
 	else
 	{
@@ -10299,66 +10444,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 		XLogCtl->Insert.nonExclusiveBackups--;
 	}
 
-	if (!XLogCtl->Insert.exclusiveBackup &&
+	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
 		XLogCtl->Insert.nonExclusiveBackups == 0)
 	{
 		XLogCtl->Insert.forcePageWrites = false;
 	}
 	WALInsertLockRelease();
 
-	if (exclusive)
-	{
-		/*
-		 * Read the existing label file into memory.
-		 */
-		struct stat statbuf;
-		int			r;
-
-		if (stat(BACKUP_LABEL_FILE, &statbuf))
-		{
-			if (errno != ENOENT)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not stat file \"%s\": %m",
-								BACKUP_LABEL_FILE)));
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("a backup is not in progress")));
-		}
-
-		lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
-		if (!lfp)
-		{
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m",
-							BACKUP_LABEL_FILE)));
-		}
-		labelfile = palloc(statbuf.st_size + 1);
-		r = fread(labelfile, statbuf.st_size, 1, lfp);
-		labelfile[statbuf.st_size] = '\0';
-
-		/*
-		 * Close and remove the backup label file
-		 */
-		if (r != 1 || ferror(lfp) || FreeFile(lfp))
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m",
-							BACKUP_LABEL_FILE)));
-		if (unlink(BACKUP_LABEL_FILE) != 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not remove file \"%s\": %m",
-							BACKUP_LABEL_FILE)));
-
-		/*
-		 * Remove tablespace_map file if present, it is created only if there
-		 * are tablespaces.
-		 */
-		unlink(TABLESPACE_MAP);
-	}
-
 	/*
 	 * Read and parse the START WAL LOCATION line (this code is pretty crude,
 	 * but we are not expecting any variability in the file format).
@@ -10595,7 +10687,7 @@ do_pg_abort_backup(void)
 	Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
 	XLogCtl->Insert.nonExclusiveBackups--;
 
-	if (!XLogCtl->Insert.exclusiveBackup &&
+	if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
 		XLogCtl->Insert.nonExclusiveBackups == 0)
 	{
 		XLogCtl->Insert.forcePageWrites = false;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to