Hi, > I agree, I missed this part. The .history file should be given higher preference. > I will take care of it in the next patch.
Archiver does not have access to shared memory and the current timeline ID is not available at archiver. In order to keep track of timeline switch we have to push a notification from backend to archiver. Backend can send a signal to notify archiver about the timeline change. Archiver can register this notification and perform a full directory scan to make sure that archiving history files take precedence over archiving WAL files. > If a history file is found we are not updating curFileTLI and > nextLogSegNo, so it will attempt the previously found segment. This > is fine because it will not find that segment and it will rescan the > directory. But I think we can do better, instead of searching the > same old segment in the previous timeline we can search that old > segment in the new TL so that if the TL switch happened within the > segment then we will find the segment and we will avoid the directory > search. This could have been done with the approach mentioned in patch v1 but now considering archiving history file takes precedence over WAL files we cannot update the "curFileTLI" whenever a history file is found. > So everytime archiver will start with searching segno=0 in timeline=0. > Instead of doing this can't we first scan the directory and once we > get the first segment to archive then only we can start predicting the > next wal segment? Done. > This comment is a bit confusing with the name of the variable nextLogSegNo. > I think the name of the variable is appropriate here, but maybe we can reword > the comment something like: Done. I have incorporated these changes and updated a new patch. PFA, patch v2. Thanks, Dipesh
From 22b0e8fb9c778fbfdc945a647f82f6bbd8d6ec0a Mon Sep 17 00:00:00 2001 From: Dipesh Pandit <dipesh.pan...@enterprisedb.com> Date: Wed, 30 Jun 2021 14:05:58 +0530 Subject: [PATCH] mitigate directory scan for WAL archiver WAL archiver scans the status directory to identify the next WAL file that needs to be archived. This directory scan can be minimised by maintaining the log segment number of current file which is being archived and incrementing it by '1' to get the next WAL file. Archiver can check the availability of next file and in case if the file is not available then it should fall-back to directory scan to get the oldest WAL file. If there is a timeline switch then backend sends a notification to archiver. Archiver registers the timeline switch and performs a full directory scan to make sure that archiving history files takes precedence over archiving WAL files --- src/backend/access/transam/xlog.c | 6 ++ src/backend/access/transam/xlogarchive.c | 10 +++ src/backend/postmaster/pgarch.c | 124 ++++++++++++++++++++++++++++--- src/include/access/xlogarchive.h | 1 + src/include/postmaster/pgarch.h | 1 + 5 files changed, 133 insertions(+), 9 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c7c928f..40969a9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8130,6 +8130,12 @@ StartupXLOG(void) WalSndWakeup(); /* + * If archiver is active, send notification that timeline has switched. + */ + if (XLogArchivingActive() && ArchiveRecoveryRequested) + XLogArchiveNotifyTLISwitch(); + + /* * If this was a promotion, request an (online) checkpoint now. This isn't * required for consistency, but the last restartpoint might be far back, * and in case of a crash, recovering from it might take a longer than is diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 26b023e..1968872 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -507,6 +507,16 @@ XLogArchiveNotifySeg(XLogSegNo segno) } /* + * Signal archiver to notify timeline switch + */ +void +XLogArchiveNotifyTLISwitch(void) +{ + if (IsUnderPostmaster) + PgArchNotifyTLISwitch(); +} + +/* * XLogArchiveForceDone * * Emit notification forcibly that an XLOG segment file has been successfully diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 74a7d7c..e23b9bc 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -90,12 +90,22 @@ static PgArchData *PgArch = NULL; * Flags set by interrupt handlers for later service in the main loop. */ static volatile sig_atomic_t ready_to_stop = false; +static volatile sig_atomic_t timeline_switch = false; + +/* + * Log segment number to get next WAL file in a sequence. + */ +static XLogSegNo nextLogSegNo = 0; + +/* Flag to specify a full directory scan to find next log file */ +static bool dirScan = true; /* ---------- * Local function forward declarations * ---------- */ static void pgarch_waken_stop(SIGNAL_ARGS); +static void pgarch_timeline_switch(SIGNAL_ARGS); static void pgarch_MainLoop(void); static void pgarch_ArchiverCopyLoop(void); static bool pgarch_archiveXlog(char *xlog); @@ -169,10 +179,11 @@ PgArchiverMain(void) { /* * Ignore all signals usually bound to some action in the postmaster, - * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT. + * except for SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT. */ pqsignal(SIGHUP, SignalHandlerForConfigReload); - pqsignal(SIGINT, SIG_IGN); + /* Archiver is notified by backend if there is a timeline switch */ + pqsignal(SIGINT, pgarch_timeline_switch); pqsignal(SIGTERM, SignalHandlerForShutdownRequest); /* SIGQUIT handler was already set up by InitPostmasterChild */ pqsignal(SIGALRM, SIG_IGN); @@ -221,6 +232,23 @@ PgArchWakeup(void) SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch); } +/* + * Called by backend process to notify a timeline switch. + */ +void +PgArchNotifyTLISwitch(void) +{ + int arch_pgprocno = PgArch->pgprocno; + + if (arch_pgprocno != INVALID_PGPROCNO) + { + int archiver_pid = ProcGlobal->allProcs[arch_pgprocno].pid; + + if (kill(archiver_pid, SIGINT) < 0) + elog(ERROR, "could not notify timeline change to archiver"); + } +} + /* SIGUSR2 signal handler for archiver process */ static void @@ -236,6 +264,23 @@ pgarch_waken_stop(SIGNAL_ARGS) } /* + * Interrupt handler for archiver + * + * There is a timeline switch and we have been notified by backend. + */ +static void +pgarch_timeline_switch(SIGNAL_ARGS) +{ + int save_errno = errno; + + /* Set the flag to register a timeline switch */ + timeline_switch = true; + SetLatch(MyLatch); + + errno = save_errno; +} + +/* * pgarch_MainLoop * * Main loop for archiver @@ -336,6 +381,13 @@ pgarch_ArchiverCopyLoop(void) int failures = 0; int failures_orphan = 0; + if (timeline_switch) + { + /* Perform a full directory scan in next cycle */ + dirScan = true; + timeline_switch = false; + } + for (;;) { struct stat stat_buf; @@ -411,6 +463,10 @@ pgarch_ArchiverCopyLoop(void) /* successful */ pgarch_archiveDone(xlog); + /* Increment log segment number to point to the next WAL file */ + if (!IsTLHistoryFileName(xlog)) + nextLogSegNo++; + /* * Tell the collector about the WAL file that we successfully * archived @@ -596,29 +652,63 @@ pgarch_archiveXlog(char *xlog) * larger ID; the net result being that past timelines are given higher * priority for archiving. This seems okay, or at least not obviously worth * changing. + * + * WAL files are generated in a specific order of log segment number. The + * directory scan for each WAL file can be minimised by identifying the next + * WAL file in the sequence. This can be achieved by maintaining log segment + * number and timeline ID corresponding to WAL file currently being archived. + * The log segment number of current WAL file can be incremented by '1' upon + * successful archival to point to the next WAL file. The full directory scan + * can be avoided by checking the availability of next WAL file. + * + * However, a full directory scan is performed in case if there is a timeline + * switch to make sure that archiving history file takes precedence over + * archiving WAL files from older timeline. */ static bool pgarch_readyXlog(char *xlog) { - /* - * open xlog status directory and read through list of xlogs that have the - * .ready suffix, looking for earliest file. It is possible to optimise - * this code, though only a single file is expected on the vast majority - * of calls, so.... - */ + char basename[MAX_XFN_CHARS + 1]; + char xlogready[MAXPGPATH]; char XLogArchiveStatusDir[MAXPGPATH]; DIR *rldir; struct dirent *rlde; + struct stat st; + static TimeLineID curFileTLI = 0; bool found = false; bool historyFound = false; + if (!dirScan) + { + /* + * We already have the next anticipated log segment and timeline, check + * if this WAL is ready to be archived. If yes, skip the directory + * scan. + */ + XLogFileName(basename, curFileTLI, nextLogSegNo, wal_segment_size); + StatusFilePath(xlogready, basename, ".ready"); + + if (stat(xlogready, &st) == 0) + { + strcpy(xlog, basename); + return true; + } + } + + /* + * Fall-back to directory scan + * + * open xlog status directory and read through list of xlogs that have the + * .ready suffix, looking for earliest file. It is possible to optimise + * this code, though only a single file is expected on the vast majority + * of calls, so.... + */ snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); rldir = AllocateDir(XLogArchiveStatusDir); while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) { int basenamelen = (int) strlen(rlde->d_name) - 6; - char basename[MAX_XFN_CHARS + 1]; bool ishistory; /* Ignore entries with unexpected number of characters */ @@ -661,6 +751,22 @@ pgarch_readyXlog(char *xlog) strcpy(xlog, basename); } } + + /* + * Found the oldest WAL, reset timeline ID and log segment number to generate + * the next WAL file in the sequence. + */ + if (found && !historyFound) + { + XLogFromFileName(xlog, &curFileTLI, &nextLogSegNo, wal_segment_size); + ereport(LOG, + (errmsg("directory scan to archive write-ahead log file \"%s\"", + xlog))); + + /* Disable full directory scan until there is a timeline switch */ + dirScan = false; + } + FreeDir(rldir); return found; diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h index 3edd1a9..51e2796 100644 --- a/src/include/access/xlogarchive.h +++ b/src/include/access/xlogarchive.h @@ -25,6 +25,7 @@ extern void ExecuteRecoveryCommand(const char *command, const char *commandName, extern void KeepFileRestoredFromArchive(const char *path, const char *xlogfname); extern void XLogArchiveNotify(const char *xlog); extern void XLogArchiveNotifySeg(XLogSegNo segno); +extern void XLogArchiveNotifyTLISwitch(void); extern void XLogArchiveForceDone(const char *xlog); extern bool XLogArchiveCheckDone(const char *xlog); extern bool XLogArchiveIsBusy(const char *xlog); diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h index 1e47a14..e6d2e18 100644 --- a/src/include/postmaster/pgarch.h +++ b/src/include/postmaster/pgarch.h @@ -31,5 +31,6 @@ extern void PgArchShmemInit(void); extern bool PgArchCanRestart(void); extern void PgArchiverMain(void) pg_attribute_noreturn(); extern void PgArchWakeup(void); +extern void PgArchNotifyTLISwitch(void); #endif /* _PGARCH_H */ -- 1.8.3.1