Hi,

> I agree, I missed this part. The .history file should be given higher
preference.
> I will take care of it in the next patch.

Archiver does not have access to shared memory and the current timeline ID
is not available at archiver. In order to keep track of timeline switch we
have
to push a notification from backend to archiver.  Backend can send a signal
to notify archiver about the timeline change. Archiver can register this
notification and perform a full directory scan to make sure that archiving
history files take precedence over archiving WAL files.

> If a history file is found we are not updating curFileTLI and
> nextLogSegNo, so it will attempt the previously found segment.  This
> is fine because it will not find that segment and it will rescan the
> directory.  But I think we can do better, instead of searching the
> same old segment in the previous timeline we can search that old
> segment in the new TL so that if the TL switch happened within the
> segment then we will find the segment and we will avoid the directory
> search.

This could have been done with the approach mentioned in patch v1 but now
considering archiving history file takes precedence over WAL files we cannot
update the "curFileTLI" whenever a history file is found.

> So everytime archiver will start with searching segno=0 in timeline=0.
> Instead of doing this can't we first scan the directory and once we
> get the first segment to archive then only we can start predicting the
> next wal segment?

Done.

> This comment is a bit confusing with the name of the variable
nextLogSegNo.
> I think the name of the variable is appropriate here, but maybe we can
reword
> the comment something like:

Done.

I have incorporated these changes and updated a new patch. PFA, patch v2.

Thanks,
Dipesh
From 22b0e8fb9c778fbfdc945a647f82f6bbd8d6ec0a Mon Sep 17 00:00:00 2001
From: Dipesh Pandit <dipesh.pan...@enterprisedb.com>
Date: Wed, 30 Jun 2021 14:05:58 +0530
Subject: [PATCH] mitigate directory scan for WAL archiver

WAL archiver scans the status directory to identify the next WAL file
that needs to be archived. This directory scan can be minimised by
maintaining the log segment number of current file which is being
archived and incrementing it by '1' to get the next WAL file.
Archiver can check the availability of next file and in case if the
file is not available then it should fall-back to directory scan to
get the oldest WAL file.

If there is a timeline switch then backend sends a notification to
archiver. Archiver registers the timeline switch and performs a full
directory scan to make sure that archiving history files takes
precedence over archiving WAL files
---
 src/backend/access/transam/xlog.c        |   6 ++
 src/backend/access/transam/xlogarchive.c |  10 +++
 src/backend/postmaster/pgarch.c          | 124 ++++++++++++++++++++++++++++---
 src/include/access/xlogarchive.h         |   1 +
 src/include/postmaster/pgarch.h          |   1 +
 5 files changed, 133 insertions(+), 9 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c7c928f..40969a9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8130,6 +8130,12 @@ StartupXLOG(void)
 	WalSndWakeup();
 
 	/*
+	 * If archiver is active, send notification that timeline has switched.
+	 */
+	if (XLogArchivingActive() && ArchiveRecoveryRequested)
+		XLogArchiveNotifyTLISwitch();
+
+	/*
 	 * If this was a promotion, request an (online) checkpoint now. This isn't
 	 * required for consistency, but the last restartpoint might be far back,
 	 * and in case of a crash, recovering from it might take a longer than is
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 26b023e..1968872 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -507,6 +507,16 @@ XLogArchiveNotifySeg(XLogSegNo segno)
 }
 
 /*
+ * Signal archiver to notify timeline switch
+ */
+void
+XLogArchiveNotifyTLISwitch(void)
+{
+	if (IsUnderPostmaster)
+		PgArchNotifyTLISwitch();
+}
+
+/*
  * XLogArchiveForceDone
  *
  * Emit notification forcibly that an XLOG segment file has been successfully
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 74a7d7c..e23b9bc 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -90,12 +90,22 @@ static PgArchData *PgArch = NULL;
  * Flags set by interrupt handlers for later service in the main loop.
  */
 static volatile sig_atomic_t ready_to_stop = false;
+static volatile sig_atomic_t timeline_switch = false;
+
+/*
+ * Log segment number to get next WAL file in a sequence.
+ */
+static XLogSegNo nextLogSegNo = 0;
+
+/* Flag to specify a full directory scan to find next log file */
+static bool dirScan = true;
 
 /* ----------
  * Local function forward declarations
  * ----------
  */
 static void pgarch_waken_stop(SIGNAL_ARGS);
+static void pgarch_timeline_switch(SIGNAL_ARGS);
 static void pgarch_MainLoop(void);
 static void pgarch_ArchiverCopyLoop(void);
 static bool pgarch_archiveXlog(char *xlog);
@@ -169,10 +179,11 @@ PgArchiverMain(void)
 {
 	/*
 	 * Ignore all signals usually bound to some action in the postmaster,
-	 * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
+	 * except for SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
 	 */
 	pqsignal(SIGHUP, SignalHandlerForConfigReload);
-	pqsignal(SIGINT, SIG_IGN);
+	/* Archiver is notified by backend if there is a timeline switch */
+	pqsignal(SIGINT, pgarch_timeline_switch);
 	pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
 	/* SIGQUIT handler was already set up by InitPostmasterChild */
 	pqsignal(SIGALRM, SIG_IGN);
@@ -221,6 +232,23 @@ PgArchWakeup(void)
 		SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch);
 }
 
+/*
+ * Called by backend process to notify a timeline switch.
+ */
+void
+PgArchNotifyTLISwitch(void)
+{
+	int			arch_pgprocno = PgArch->pgprocno;
+
+	if (arch_pgprocno != INVALID_PGPROCNO)
+	{
+		int		archiver_pid = ProcGlobal->allProcs[arch_pgprocno].pid;
+
+		if (kill(archiver_pid, SIGINT) < 0)
+			elog(ERROR, "could not notify timeline change to archiver");
+	}
+}
+
 
 /* SIGUSR2 signal handler for archiver process */
 static void
@@ -236,6 +264,23 @@ pgarch_waken_stop(SIGNAL_ARGS)
 }
 
 /*
+ * Interrupt handler for archiver
+ *
+ * There is a timeline switch and we have been notified by backend.
+ */
+static void
+pgarch_timeline_switch(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	/* Set the flag to register a timeline switch */
+	timeline_switch = true;
+	SetLatch(MyLatch);
+
+	errno = save_errno;
+}
+
+/*
  * pgarch_MainLoop
  *
  * Main loop for archiver
@@ -336,6 +381,13 @@ pgarch_ArchiverCopyLoop(void)
 		int			failures = 0;
 		int			failures_orphan = 0;
 
+		if (timeline_switch)
+		{
+			/* Perform a full directory scan in next cycle */
+			dirScan = true;
+			timeline_switch = false;
+		}
+
 		for (;;)
 		{
 			struct stat stat_buf;
@@ -411,6 +463,10 @@ pgarch_ArchiverCopyLoop(void)
 				/* successful */
 				pgarch_archiveDone(xlog);
 
+				/* Increment log segment number to point to the next WAL file */
+				if (!IsTLHistoryFileName(xlog))
+					nextLogSegNo++;
+
 				/*
 				 * Tell the collector about the WAL file that we successfully
 				 * archived
@@ -596,29 +652,63 @@ pgarch_archiveXlog(char *xlog)
  * larger ID; the net result being that past timelines are given higher
  * priority for archiving.  This seems okay, or at least not obviously worth
  * changing.
+ *
+ * WAL files are generated in a specific order of log segment number. The
+ * directory scan for each WAL file can be minimised by identifying the next
+ * WAL file in the sequence. This can be achieved by maintaining log segment
+ * number and timeline ID corresponding to WAL file currently being archived.
+ * The log segment number of current WAL file can be incremented by '1' upon
+ * successful archival to point to the next WAL file. The full directory scan
+ * can be avoided by checking the availability of next WAL file.
+ *
+ * However, a full directory scan is performed in case if there is a timeline
+ * switch to make sure that archiving history file takes precedence over
+ * archiving WAL files from older timeline.
  */
 static bool
 pgarch_readyXlog(char *xlog)
 {
-	/*
-	 * open xlog status directory and read through list of xlogs that have the
-	 * .ready suffix, looking for earliest file. It is possible to optimise
-	 * this code, though only a single file is expected on the vast majority
-	 * of calls, so....
-	 */
+	char		basename[MAX_XFN_CHARS + 1];
+	char		xlogready[MAXPGPATH];
 	char		XLogArchiveStatusDir[MAXPGPATH];
 	DIR		   *rldir;
 	struct dirent *rlde;
+	struct stat	st;
+	static TimeLineID curFileTLI = 0;
 	bool		found = false;
 	bool		historyFound = false;
 
+	if (!dirScan)
+	{
+		/*
+		 * We already have the next anticipated log segment and timeline, check
+		 * if this WAL is ready to be archived. If yes, skip the directory
+		 * scan.
+		 */
+		XLogFileName(basename, curFileTLI, nextLogSegNo, wal_segment_size);
+		StatusFilePath(xlogready, basename, ".ready");
+
+		if (stat(xlogready, &st) == 0)
+		{
+			strcpy(xlog, basename);
+			return true;
+		}
+	}
+
+	/*
+	 * Fall-back to directory scan
+	 *
+	 * open xlog status directory and read through list of xlogs that have the
+	 * .ready suffix, looking for earliest file. It is possible to optimise
+	 * this code, though only a single file is expected on the vast majority
+	 * of calls, so....
+	 */
 	snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
 	rldir = AllocateDir(XLogArchiveStatusDir);
 
 	while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
 	{
 		int			basenamelen = (int) strlen(rlde->d_name) - 6;
-		char		basename[MAX_XFN_CHARS + 1];
 		bool		ishistory;
 
 		/* Ignore entries with unexpected number of characters */
@@ -661,6 +751,22 @@ pgarch_readyXlog(char *xlog)
 				strcpy(xlog, basename);
 		}
 	}
+
+	/*
+	 * Found the oldest WAL, reset timeline ID and log segment number to generate
+	 * the next WAL file in the sequence.
+	 */
+	if (found && !historyFound)
+	{
+		XLogFromFileName(xlog, &curFileTLI, &nextLogSegNo, wal_segment_size);
+		ereport(LOG,
+				(errmsg("directory scan to archive write-ahead log file \"%s\"",
+						xlog)));
+
+		/* Disable full directory scan until there is a timeline switch */
+		dirScan = false;
+	}
+
 	FreeDir(rldir);
 
 	return found;
diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h
index 3edd1a9..51e2796 100644
--- a/src/include/access/xlogarchive.h
+++ b/src/include/access/xlogarchive.h
@@ -25,6 +25,7 @@ extern void ExecuteRecoveryCommand(const char *command, const char *commandName,
 extern void KeepFileRestoredFromArchive(const char *path, const char *xlogfname);
 extern void XLogArchiveNotify(const char *xlog);
 extern void XLogArchiveNotifySeg(XLogSegNo segno);
+extern void XLogArchiveNotifyTLISwitch(void);
 extern void XLogArchiveForceDone(const char *xlog);
 extern bool XLogArchiveCheckDone(const char *xlog);
 extern bool XLogArchiveIsBusy(const char *xlog);
diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h
index 1e47a14..e6d2e18 100644
--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -31,5 +31,6 @@ extern void PgArchShmemInit(void);
 extern bool PgArchCanRestart(void);
 extern void PgArchiverMain(void) pg_attribute_noreturn();
 extern void PgArchWakeup(void);
+extern void PgArchNotifyTLISwitch(void);
 
 #endif							/* _PGARCH_H */
-- 
1.8.3.1

Reply via email to