> > I guess we still have to pick one or the other, but I don't really
> > know how to do that, since both methods seem to be relatively fine,
> > and the scenarios where one is better than the other all feel a little
> > bit contrived. I guess if no clear consensus emerges in the next week
> > or so, I'll just pick one and commit it. Not quite sure yet how I'll
> > do the picking, but we seem to all agree that something is better than
> > nothing, so hopefully nobody will be too sad if I make an arbitrary
> > decision. And if some clear agreement emerges before then, even
> > better.
>
> I will be happy to see this fixed either way.

+1

> > I agree, but it should probably be something like DEBUG3 instead of
> > LOG.
>
> I will update it in the next patch.

Updated log level to DEBUG3 and rebased the patch. PFA patch.

Thanks,
Dipesh
From e0ce2b85f9122963d820d005ca093e6c667ca7e6 Mon Sep 17 00:00:00 2001
From: Dipesh Pandit <dipesh.pan...@enterprisedb.com>
Date: Wed, 8 Sep 2021 21:47:16 +0530
Subject: [PATCH] keep trying the next file approach

---
 src/backend/access/transam/xlog.c        |  20 ++++
 src/backend/access/transam/xlogarchive.c |  23 ++++
 src/backend/postmaster/pgarch.c          | 195 ++++++++++++++++++++++++++-----
 src/include/access/xlogdefs.h            |   2 +
 src/include/postmaster/pgarch.h          |   4 +
 5 files changed, 217 insertions(+), 27 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e51a7a7..7dd4b96 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9285,6 +9285,16 @@ CreateCheckPoint(int flags)
 	RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
 
 	/*
+	 * Force the archiver to perform a directory scan.
+	 *
+	 * Ordinarily, this should not be needed, but it seems like a good idea
+	 * to make sure we scan the archive_status directory every once in a
+	 * while to make sure we haven't left anything behind.  Calling it here
+	 * ensures we do a directory scan at least once per checkpoint.
+	 */
+	PgArchForceDirScan();
+
+	/*
 	 * Make more log segments if needed.  (Do this after recycling old log
 	 * segments, since that may supply some of the needed files.)
 	 */
@@ -9650,6 +9660,16 @@ CreateRestartPoint(int flags)
 	RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr);
 
 	/*
+	 * Force the archiver to perform a directory scan.
+	 *
+	 * Ordinarily, this should not be needed, but it seems like a good idea
+	 * to make sure we scan the archive_status directory every once in a
+	 * while to make sure we haven't left anything behind.  Calling it here
+	 * ensures we do a directory scan at least once per restartpoint.
+	 */
+	PgArchForceDirScan();
+
+	/*
 	 * Make more log segments if needed.  (Do this after recycling old log
 	 * segments, since that may supply some of the needed files.)
 	 */
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 26b023e..0969f42 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -489,6 +489,29 @@ XLogArchiveNotify(const char *xlog)
 		return;
 	}
 
+	/* Force a directory scan if we are archiving anything but a regular
+	 * WAL file or if this WAL file is being created out-of-order.
+	 */
+	if (!IsXLogFileName(xlog))
+		PgArchForceDirScan();
+	else
+	{
+		TimeLineID tli;
+		XLogSegNo this_segno;
+		XLogSegNo arch_segno;
+
+		XLogFromFileName(xlog, &tli, &this_segno, wal_segment_size);
+		arch_segno = PgArchGetCurrentSegno();
+
+		/*
+		 * We must use <= because the archiver may have just completed a
+		 * directory scan and found a later segment (but hasn't updated
+		 * shared memory yet).
+		 */
+		if (this_segno <= arch_segno)
+			PgArchForceDirScan();
+	}
+
 	/* Notify archiver that it's got something to do */
 	if (IsUnderPostmaster)
 		PgArchWakeup();
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 74a7d7c..84ba1e0 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -47,6 +47,7 @@
 #include "storage/proc.h"
 #include "storage/procsignal.h"
 #include "storage/shmem.h"
+#include "storage/spin.h"
 #include "utils/guc.h"
 #include "utils/ps_status.h"
 
@@ -76,6 +77,20 @@
 typedef struct PgArchData
 {
 	int			pgprocno;		/* pgprocno of archiver process */
+
+	/*
+	 * Forces a directory scan in pgarch_readyXlog().  Protected by
+	 * arch_lck.
+	 */
+	bool		force_dir_scan;
+
+	/*
+	 * Current archiver state.  Protected by arch_lck.
+	 */
+	TimeLineID	lastTli;
+	XLogSegNo	lastSegNo;
+
+	slock_t		arch_lck;
 } PgArchData;
 
 
@@ -103,6 +118,7 @@ static bool pgarch_readyXlog(char *xlog);
 static void pgarch_archiveDone(char *xlog);
 static void pgarch_die(int code, Datum arg);
 static void HandlePgArchInterrupts(void);
+static bool higher_arch_priority(const char *a, const char *b);
 
 /* Report shared memory space needed by PgArchShmemInit */
 Size
@@ -129,6 +145,9 @@ PgArchShmemInit(void)
 		/* First time through, so initialize */
 		MemSet(PgArch, 0, PgArchShmemSize());
 		PgArch->pgprocno = INVALID_PGPROCNO;
+		PgArch->lastTli = MaxTimeLineID;
+		PgArch->lastSegNo = MaxXLogSegNo;
+		SpinLockInit(&PgArch->arch_lck);
 	}
 }
 
@@ -596,30 +615,93 @@ pgarch_archiveXlog(char *xlog)
  * larger ID; the net result being that past timelines are given higher
  * priority for archiving.  This seems okay, or at least not obviously worth
  * changing.
+ *
+ * WAL files are generated in a specific order of log segment number. The
+ * directory scan for each WAL file can be minimised by identifying the next
+ * WAL file in the sequence. This can be achieved by maintaining log segment
+ * number and timeline ID corresponding to WAL file currently being archived.
+ * The log segment number of current WAL file can be incremented by '1' to
+ * point to the next WAL file in a sequence. Full directory scan can be avoided
+ * by checking the availability of next WAL file. "xlogState" specifies the
+ * segment number and timeline ID corresponding to the next WAL file.
+ *
+ * However, a full directory scan is performed in some special cases where it
+ * requires us to archive files which takes precedence over the next anticipated
+ * log segment. For example, history file takes precedence over archiving WAL
+ * files on older timeline or an older WAL file which is being left out because
+ * corresponding .ready file is created out of order or archiving a backup
+ * history file created during backup.
+ *
+ * Returns "true" if a segment is ready for archival, "xlog" represents the
+ * name of the segment.
  */
 static bool
 pgarch_readyXlog(char *xlog)
 {
-	/*
-	 * open xlog status directory and read through list of xlogs that have the
-	 * .ready suffix, looking for earliest file. It is possible to optimise
-	 * this code, though only a single file is expected on the vast majority
-	 * of calls, so....
-	 */
+	char		basename[MAX_XFN_CHARS + 1];
 	char		XLogArchiveStatusDir[MAXPGPATH];
 	DIR		   *rldir;
 	struct dirent *rlde;
 	bool		found = false;
-	bool		historyFound = false;
+	bool		force_dir_scan;
+	TimeLineID	lastTli;
+	XLogSegNo	lastSegNo;
+
+	/* Obtain current archiver state and reset force_dir_scan. */
+	SpinLockAcquire(&PgArch->arch_lck);
+
+	/*
+	 * Scan archive status directory if it is directed by shared memory flag or
+	 * until we find the first log segment.
+	 */
+	force_dir_scan = PgArch->force_dir_scan ||
+					 PgArch->lastSegNo == MaxXLogSegNo;
+	lastTli = PgArch->lastTli;
+	lastSegNo = PgArch->lastSegNo;
+
+	PgArch->force_dir_scan = false;
+
+	SpinLockRelease(&PgArch->arch_lck);
 
+	/* Try to skip the directory scan if possible. */
+	if (!force_dir_scan)
+	{
+		struct stat	st;
+		char		readyfile[MAXPGPATH];
+
+		Assert(lastTli != MaxTimeLineID);
+		Assert(lastSegNo != MaxXLogSegNo);
+
+		lastSegNo++;
+		XLogFileName(basename, lastTli, lastSegNo, wal_segment_size);
+
+		StatusFilePath(readyfile, basename, ".ready");
+		if (stat(readyfile, &st) == 0)
+		{
+			strcpy(xlog, basename);
+
+			SpinLockAcquire(&PgArch->arch_lck);
+			PgArch->lastSegNo++;
+			SpinLockRelease(&PgArch->arch_lck);
+
+			return true;
+		}
+		else if (errno != ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat \"%s\": %m", readyfile)));
+	}
+
+	/*
+	 * Open the archive status directory and read through the list of files
+	 * with the .ready suffix, looking for the earliest file.
+	 */
 	snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
 	rldir = AllocateDir(XLogArchiveStatusDir);
 
 	while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
 	{
 		int			basenamelen = (int) strlen(rlde->d_name) - 6;
-		char		basename[MAX_XFN_CHARS + 1];
-		bool		ishistory;
 
 		/* Ignore entries with unexpected number of characters */
 		if (basenamelen < MIN_XFN_CHARS ||
@@ -638,35 +720,67 @@ pgarch_readyXlog(char *xlog)
 		memcpy(basename, rlde->d_name, basenamelen);
 		basename[basenamelen] = '\0';
 
-		/* Is this a history file? */
-		ishistory = IsTLHistoryFileName(basename);
-
-		/*
-		 * Consume the file to archive.  History files have the highest
-		 * priority.  If this is the first file or the first history file
-		 * ever, copy it.  In the presence of a history file already chosen as
-		 * target, ignore all other files except history files which have been
-		 * generated for an older timeline than what is already chosen as
-		 * target to archive.
-		 */
-		if (!found || (ishistory && !historyFound))
+		if (!found || higher_arch_priority(basename, xlog))
 		{
 			strcpy(xlog, basename);
 			found = true;
-			historyFound = ishistory;
-		}
-		else if (ishistory || !historyFound)
-		{
-			if (strcmp(basename, xlog) < 0)
-				strcpy(xlog, basename);
 		}
 	}
 	FreeDir(rldir);
 
+	if (found)
+	{
+		SpinLockAcquire(&PgArch->arch_lck);
+
+		if (IsXLogFileName(xlog))
+			XLogFromFileName(xlog, &PgArch->lastTli, &PgArch->lastSegNo, wal_segment_size);
+		else
+			PgArch->force_dir_scan = true;
+
+		SpinLockRelease(&PgArch->arch_lck);
+
+		ereport(DEBUG3,
+				errmsg("directory scan to archive write ahead log file \"%s\"", xlog));
+	}
+
 	return found;
 }
 
 /*
+ * higher_arch_priority
+ *
+ * Compares archival priority of the two file names.  If "a" has a higher
+ * priority than "b", true is returned.  If "b" has a higher priority than
+ * "a" false is returned.
+ */
+static bool
+higher_arch_priority(const char *a, const char *b)
+{
+	bool a_ishistory = IsTLHistoryFileName(a);
+	bool b_ishistory = IsTLHistoryFileName(b);
+	bool a_isbackuphistory = IsBackupHistoryFileName(a);
+	bool b_isbackuphistory = IsBackupHistoryFileName(b);
+
+	/*
+	 * Timeline history files have a higher priority than everything else.
+	 * Backup history files are given the second highest priority so that
+	 * the archiver picks them up when a directory scan is forced.
+	 */
+	if (a_ishistory || b_ishistory)
+	{
+		if (a_ishistory != b_ishistory)
+			return a_ishistory;
+	}
+	else if (a_isbackuphistory || b_isbackuphistory)
+	{
+		if (a_isbackuphistory != b_isbackuphistory)
+			return a_isbackuphistory;
+	}
+
+	return (strcmp(a, b) < 0);
+}
+
+/*
  * pgarch_archiveDone
  *
  * Emit notification that an xlog file has been successfully archived.
@@ -716,3 +830,30 @@ HandlePgArchInterrupts(void)
 		ProcessConfigFile(PGC_SIGHUP);
 	}
 }
+
+/*
+ * PgArchForceDirScan
+ *
+ * When called, the next call to pgarch_readyXlog() will perform a
+ * directory scan.  This is useful for ensuring that imporant files such
+ * as timeline history files are archived as quickly as possible.
+ */
+void
+PgArchForceDirScan(void)
+{
+	SpinLockAcquire(&PgArch->arch_lck);
+	PgArch->force_dir_scan = true;
+	SpinLockRelease(&PgArch->arch_lck);
+}
+
+XLogSegNo
+PgArchGetCurrentSegno(void)
+{
+	XLogSegNo ret;
+
+	SpinLockAcquire(&PgArch->arch_lck);
+	ret = PgArch->lastSegNo;
+	SpinLockRelease(&PgArch->arch_lck);
+
+	return ret;
+}
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index 60348d1..6122891 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -46,6 +46,7 @@ typedef uint64 XLogRecPtr;
  * XLogSegNo - physical log file sequence number.
  */
 typedef uint64 XLogSegNo;
+#define MaxXLogSegNo    ((XLogSegNo) 0xFFFFFFFFFFFFFFFF)
 
 /*
  * TimeLineID (TLI) - identifies different database histories to prevent
@@ -57,6 +58,7 @@ typedef uint64 XLogSegNo;
  * sequence that was generated in the previous incarnation.
  */
 typedef uint32 TimeLineID;
+#define MaxTimeLineID	((TimeLineID) 0xFFFFFFFF)
 
 /*
  * Replication origin id - this is located in this file to avoid having to
diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h
index 1e47a14..483f012 100644
--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -13,6 +13,8 @@
 #ifndef _PGARCH_H
 #define _PGARCH_H
 
+#include "access/xlogdefs.h"
+
 /* ----------
  * Archiver control info.
  *
@@ -31,5 +33,7 @@ extern void PgArchShmemInit(void);
 extern bool PgArchCanRestart(void);
 extern void PgArchiverMain(void) pg_attribute_noreturn();
 extern void PgArchWakeup(void);
+extern void PgArchForceDirScan(void);
+extern XLogSegNo PgArchGetCurrentSegno(void);
 
 #endif							/* _PGARCH_H */
-- 
1.8.3.1

Reply via email to