The cascading replication patch made a change to the way WAL files restored from archive are handled. Since then, when a WAL file is restored from archive, it's copied under the correct filename to pg_xlog. Aside from supporting cascading replication, this has the advantage that if the archive subsequently goes offline, and the standby is restarted, it can still recover back up to the point where it was before. It also means that you can take an offline backup of the standby, and pg_xlog includes all the files required to restore.

However, timeline history files are still not retained. When a standby restores a timeline history file from the archive, it's restored under a temporary filename, and discarded after it's read. That means that if the latest checkpoint is on a WAL segment that includes an earlier timeline switch, you again need the archive to be online to restore from that, or you get an error like:

LOG:  unexpected timeline ID 1 in log file 0, segment 3, offset 0

This is a pre-existing issue in 9.2. In git master, it also means that if a standby follows a master through the archive, a cascading standby won't find the timeline history files in the 1st standby, and won't be able to follow timeline switches.

The straightforward fix is treat timeline history files the same WAL files, and copy them into pg_xlog when they're restored from the archive. Patch attached, and a test script I used to test this. Barring objections, I'll commit the patch tomorrow.

- Heikki
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ae370fb..920bcc8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -648,6 +648,7 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
 			 bool randAccess);
 static int	emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
 static void XLogFileClose(void);
+static void KeepFileRestoredFromArchive(char * path, char *xlogfname);
 static bool RestoreArchivedFile(char *path, const char *xlogfname,
 					const char *recovername, off_t expectedSize);
 static void ExecuteRecoveryCommand(char *command, char *commandName,
@@ -2841,76 +2842,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
 	 * xlog segment (if any) with the archival version.
 	 */
 	if (source == XLOG_FROM_ARCHIVE)
-	{
-		char		xlogfpath[MAXPGPATH];
-		bool		reload = false;
-		struct stat statbuf;
-
-		XLogFilePath(xlogfpath, tli, log, seg);
-		if (stat(xlogfpath, &statbuf) == 0)
-		{
-			char oldpath[MAXPGPATH];
-#ifdef WIN32
-			static unsigned int deletedcounter = 1;
-			/*
-			 * On Windows, if another process (e.g a walsender process) holds
-			 * the file open in FILE_SHARE_DELETE mode, unlink will succeed,
-			 * but the file will still show up in directory listing until the
-			 * last handle is closed, and we cannot rename the new file in its
-			 * place until that. To avoid that problem, rename the old file to
-			 * a temporary name first. Use a counter to create a unique
-			 * filename, because the same file might be restored from the
-			 * archive multiple times, and a walsender could still be holding
-			 * onto an old deleted version of it.
-			 */
-			snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
-					 xlogfpath, deletedcounter++);
-			if (rename(xlogfpath, oldpath) != 0)
-			{
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not rename file \"%s\" to \"%s\": %m",
-								xlogfpath, oldpath)));
-			}
-#else
-			strncpy(oldpath, xlogfpath, MAXPGPATH);
-#endif
-			if (unlink(oldpath) != 0)
-				ereport(FATAL,
-						(errcode_for_file_access(),
-						 errmsg("could not remove file \"%s\": %m",
-								xlogfpath)));
-			reload = true;
-		}
-
-		if (rename(path, xlogfpath) < 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not rename file \"%s\" to \"%s\": %m",
-							path, xlogfpath)));
-
-		/*
-		 * Set path to point at the new file in pg_xlog.
-		 */
-		strncpy(path, xlogfpath, MAXPGPATH);
-
-		/*
-		 * Create .done file forcibly to prevent the restored segment from
-		 * being archived again later.
-		 */
-		XLogArchiveForceDone(xlogfname);
-
-		/*
-		 * If the existing segment was replaced, since walsenders might have
-		 * it open, request them to reload a currently-open segment.
-		 */
-		if (reload)
-			WalSndRqstFileReload();
-
-		/* Signal walsender that new WAL has arrived */
-		if (AllowCascadeReplication())
-			WalSndWakeup();
-	}
+		KeepFileRestoredFromArchive(path, xlogfname);
 
 	fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
 	if (fd >= 0)
@@ -3025,6 +2957,88 @@ XLogFileClose(void)
 }
 
 /*
+ * A file was restored from the archive under a temporary filename (path),
+ * and now we want to keep it. Rename it under the permanent filename in
+ * in pg_xlog (xlogfname), replacing any existing file with the same name.
+ */
+static void
+KeepFileRestoredFromArchive(char *path, char *xlogfname)
+{
+	bool		reload = false;
+	struct stat statbuf;
+	char		xlogfpath[MAXPGPATH];
+
+	snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+
+	if (stat(xlogfpath, &statbuf) == 0)
+	{
+		char oldpath[MAXPGPATH];
+#ifdef WIN32
+		static unsigned int deletedcounter = 1;
+		/*
+		 * On Windows, if another process (e.g a walsender process) holds
+		 * the file open in FILE_SHARE_DELETE mode, unlink will succeed,
+		 * but the file will still show up in directory listing until the
+		 * last handle is closed, and we cannot rename the new file in its
+		 * place until that. To avoid that problem, rename the old file to
+		 * a temporary name first. Use a counter to create a unique
+		 * filename, because the same file might be restored from the
+		 * archive multiple times, and a walsender could still be holding
+		 * onto an old deleted version of it.
+		 */
+		snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
+				 xlogfpath, deletedcounter++);
+		if (rename(xlogfpath, oldpath) != 0)
+		{
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not rename file \"%s\" to \"%s\": %m",
+							xlogfpath, oldpath)));
+		}
+#else
+		strncpy(oldpath, xlogfpath, MAXPGPATH);
+#endif
+		if (unlink(oldpath) != 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not remove file \"%s\": %m",
+							xlogfpath)));
+		reload = true;
+	}
+
+	if (rename(path, xlogfpath) < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not rename file \"%s\" to \"%s\": %m",
+						path, xlogfpath)));
+
+	/*
+	 * Set path to point at the new file in pg_xlog.
+	 */
+	strncpy(path, xlogfpath, MAXPGPATH);
+
+	/*
+	 * Create .done file forcibly to prevent the restored segment from
+	 * being archived again later.
+	 */
+	XLogArchiveForceDone(xlogfname);
+
+	/*
+	 * If the existing file was replaced, since walsenders might have it
+	 * open, request them to reload a currently-open segment. This is only
+	 * required for WAL segments, walsenders don't hold other files open,
+	 * but there's no harm in doing this too often, and we don't know what
+	 * kind of a file we're dealing with here.
+	 */
+	if (reload)
+		WalSndRqstFileReload();
+
+	/* Signal walsender that new WAL has arrived */
+	if (AllowCascadeReplication())
+		WalSndWakeup();
+}
+
+/*
  * Attempt to retrieve the specified file from off-line archival storage.
  * If successful, fill "path" with its complete path (note that this will be
  * a temp file name that doesn't follow the normal naming convention), and
@@ -4356,6 +4370,7 @@ readTimeLineHistory(TimeLineID targetTLI)
 	char		histfname[MAXFNAMELEN];
 	char		fline[MAXPGPATH];
 	FILE	   *fd;
+	bool		fromArchive = false;
 
 	/* Timeline 1 does not have a history file, so no need to check */
 	if (targetTLI == 1)
@@ -4364,7 +4379,8 @@ readTimeLineHistory(TimeLineID targetTLI)
 	if (InArchiveRecovery)
 	{
 		TLHistoryFileName(histfname, targetTLI);
-		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+		fromArchive =
+			RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
 	}
 	else
 		TLHistoryFilePath(path, targetTLI);
@@ -4433,6 +4449,13 @@ readTimeLineHistory(TimeLineID targetTLI)
 			(errmsg_internal("history of timeline %u is %s",
 							 targetTLI, nodeToString(result))));
 
+	/*
+	 * If the history file was fetched from archive, save it in pg_xlog for
+	 * future reference.
+	 */
+	if (fromArchive)
+		KeepFileRestoredFromArchive(path, histfname);
+
 	return result;
 }
 

Attachment: history-files-not-kept-in-pg_xlog.sh
Description: Bourne shell script

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to