Fujii Masao wrote:
> On second thought, the following lines seem to be necessary just after
> calling XLogPageRead() since it reads new WAL file from another source.
>
>> if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE)
>> emode = PANIC;
>> else
>> emode = emode_arg;
Yep.
Here's an updated patch, with these changes since the last patch:
* Fix the bug of a spurious PANIC in archive recovery, if the WAL ends
in the middle of a WAL record that continues over a WAL segment boundary.
* If a corrupt WAL record is found in archive or streamed from master in
standby mode, throw WARNING instead of PANIC, and keep trying. In
archive recovery (ie. standby_mode=off) it's still a PANIC. We can make
it a WARNING too, which gives the pre-9.0 behavior of starting up the
server on corruption. I prefer PANIC but the discussion is still going on.
* Small code changes to handling of failedSources, inspired by your
comment. No change in functionality.
This is also available in my git repository at
git://git.postgresql.org/git/users/heikki/postgres.git, branch "xlogchanges"
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e57f22e..4aa1870 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -450,20 +450,33 @@ static uint32 openLogSeg = 0;
static uint32 openLogOff = 0;
/*
+ * Codes indicating where we got a WAL file from during recovery, or where
+ * to attempt to get one.
+ */
+#define XLOG_FROM_ARCHIVE (1<<0) /* Restored using restore_command */
+#define XLOG_FROM_PG_XLOG (1<<1) /* Existing file in pg_xlog */
+#define XLOG_FROM_STREAM (1<<2) /* Streamed from master */
+
+/*
* These variables are used similarly to the ones above, but for reading
* the XLOG. Note, however, that readOff generally represents the offset
* of the page just read, not the seek position of the FD itself, which
* will be just past that page. readLen indicates how much of the current
- * page has been read into readBuf.
+ * page has been read into readBuf, and readSource indicates where we got
+ * the currently open file from.
*/
static int readFile = -1;
static uint32 readId = 0;
static uint32 readSeg = 0;
static uint32 readOff = 0;
static uint32 readLen = 0;
+static int readSource = 0; /* XLOG_FROM_* code */
-/* Is the currently open segment being streamed from primary? */
-static bool readStreamed = false;
+/*
+ * Keeps track of which sources we've tried to read the current WAL
+ * record from and failed.
+ */
+static int failedSources = 0;
/* Buffer for currently read page (XLOG_BLCKSZ bytes) */
static char *readBuf = NULL;
@@ -517,11 +530,12 @@ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
bool find_free, int *max_advance,
bool use_lock);
static int XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
- bool fromArchive, bool notexistOk);
+ int source, bool notexistOk);
static int XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode,
- bool fromArchive);
+ int sources);
static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
bool randAccess);
+static int emode_for_corrupt_record(int endofwalmode);
static void XLogFileClose(void);
static bool RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername, off_t expectedSize);
@@ -2573,7 +2587,7 @@ XLogFileOpen(uint32 log, uint32 seg)
*/
static int
XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
- bool fromArchive, bool notfoundOk)
+ int source, bool notfoundOk)
{
char xlogfname[MAXFNAMELEN];
char activitymsg[MAXFNAMELEN + 16];
@@ -2582,23 +2596,28 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
XLogFileName(xlogfname, tli, log, seg);
- if (fromArchive)
+ switch (source)
{
- /* Report recovery progress in PS display */
- snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
- xlogfname);
- set_ps_display(activitymsg, false);
+ case XLOG_FROM_ARCHIVE:
+ /* Report recovery progress in PS display */
+ snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
+ xlogfname);
+ set_ps_display(activitymsg, false);
- restoredFromArchive = RestoreArchivedFile(path, xlogfname,
- "RECOVERYXLOG",
- XLogSegSize);
- if (!restoredFromArchive)
- return -1;
- }
- else
- {
- XLogFilePath(path, tli, log, seg);
- restoredFromArchive = false;
+ restoredFromArchive = RestoreArchivedFile(path, xlogfname,
+ "RECOVERYXLOG",
+ XLogSegSize);
+ if (!restoredFromArchive)
+ return -1;
+ break;
+
+ case XLOG_FROM_PG_XLOG:
+ XLogFilePath(path, tli, log, seg);
+ restoredFromArchive = false;
+ break;
+
+ default:
+ elog(ERROR, "invalid XLogFileRead source %d", source);
}
fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
@@ -2612,6 +2631,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
xlogfname);
set_ps_display(activitymsg, false);
+ readSource = source;
+
return fd;
}
if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
@@ -2630,7 +2651,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
* searched in pg_xlog if not found in archive.
*/
static int
-XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive)
+XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, int sources)
{
char path[MAXPGPATH];
ListCell *cell;
@@ -2653,20 +2674,19 @@ XLogFileReadAnyTLI(uint32 log, uint32 seg, int emode, bool fromArchive)
if (tli < curFileTLI)
break; /* don't bother looking at too-old TLIs */
- fd = XLogFileRead(log, seg, emode, tli, fromArchive, true);
- if (fd != -1)
- return fd;
+ if (sources & XLOG_FROM_ARCHIVE)
+ {
+ fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_ARCHIVE, true);
+ if (fd != -1)
+ {
+ elog(DEBUG1, "got WAL segment from archive");
+ return fd;
+ }
+ }
- /*
- * If not in StandbyMode, fall back to searching pg_xlog. In
- * StandbyMode we're streaming segments from the primary to pg_xlog,
- * and we mustn't confuse the (possibly partial) segments in pg_xlog
- * with complete segments ready to be applied. We rather wait for the
- * records to arrive through streaming.
- */
- if (!StandbyMode && fromArchive)
+ if (sources & XLOG_FROM_PG_XLOG)
{
- fd = XLogFileRead(log, seg, emode, tli, false, true);
+ fd = XLogFileRead(log, seg, emode, tli, XLOG_FROM_PG_XLOG, true);
if (fd != -1)
return fd;
}
@@ -3520,7 +3540,7 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode)
* the returned record pointer always points there.
*/
static XLogRecord *
-ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
+ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
{
XLogRecord *record;
char *buffer;
@@ -3530,17 +3550,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
total_len;
uint32 targetRecOff;
uint32 pageHeaderSize;
- int emode;
-
- /*
- * We don't expect any invalid records during streaming recovery: we
- * should never hit the end of WAL because we wait for it to be streamed.
- * Therefore treat any broken WAL as PANIC, instead of failing over.
- */
- if (StandbyMode)
- emode = PANIC;
- else
- emode = emode_arg;
if (readBuf == NULL)
{
@@ -3593,6 +3602,9 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
randAccess = true; /* allow curFileTLI to go backwards too */
}
+ /* This is the first try to read this page. */
+ failedSources = 0;
+retry:
/* Read the page containing the record */
if (!XLogPageRead(RecPtr, emode, fetching_ckpt, randAccess))
return NULL;
@@ -3611,7 +3623,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
}
else if (targetRecOff < pageHeaderSize)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("invalid record offset at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3619,7 +3631,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
targetRecOff == pageHeaderSize)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("contrecord is requested by %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3634,7 +3646,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
{
if (record->xl_len != 0)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("invalid xlog switch record at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3642,7 +3654,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
}
else if (record->xl_len == 0)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("record with zero length at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3651,14 +3663,14 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("invalid record length at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
if (record->xl_rmid > RM_MAX_ID)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("invalid resource manager ID %u at %X/%X",
record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3671,7 +3683,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
*/
if (!XLByteLT(record->xl_prev, *RecPtr))
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("record with incorrect prev-link %X/%X at %X/%X",
record->xl_prev.xlogid, record->xl_prev.xrecoff,
RecPtr->xlogid, RecPtr->xrecoff)));
@@ -3687,7 +3699,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
*/
if (!XLByteEQ(record->xl_prev, ReadRecPtr))
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("record with incorrect prev-link %X/%X at %X/%X",
record->xl_prev.xlogid, record->xl_prev.xrecoff,
RecPtr->xlogid, RecPtr->xrecoff)));
@@ -3716,7 +3728,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
{
readRecordBufSize = 0;
/* We treat this as a "bogus data" condition */
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("record length %u at %X/%X too long",
total_len, RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
@@ -3756,7 +3768,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
/* Check that the continuation record looks valid */
if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("there is no contrecord flag in log file %u, segment %u, offset %u",
readId, readSeg, readOff)));
goto next_record_is_invalid;
@@ -3766,7 +3778,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
if (contrecord->xl_rem_len == 0 ||
total_len != (contrecord->xl_rem_len + gotlen))
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u",
contrecord->xl_rem_len,
readId, readSeg, readOff)));
@@ -3784,7 +3796,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
contrecord->xl_rem_len);
break;
}
- if (!RecordIsValid(record, *RecPtr, emode))
+ if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode)))
goto next_record_is_invalid;
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
EndRecPtr.xlogid = readId;
@@ -3798,7 +3810,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
}
/* Record does not cross a page boundary */
- if (!RecordIsValid(record, *RecPtr, emode))
+ if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode)))
goto next_record_is_invalid;
EndRecPtr.xlogid = RecPtr->xlogid;
EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
@@ -3824,13 +3836,20 @@ ReadRecord(XLogRecPtr *RecPtr, int emode_arg, bool fetching_ckpt)
}
return (XLogRecord *) buffer;
-next_record_is_invalid:;
+next_record_is_invalid:
+ failedSources |= readSource;
+
if (readFile >= 0)
{
close(readFile);
readFile = -1;
}
- return NULL;
+
+ /* In standby-mode, keep trying */
+ if (StandbyMode)
+ goto retry;
+ else
+ return NULL;
}
/*
@@ -8731,10 +8750,15 @@ StartupProcessMain(void)
/*
* Read the XLOG page containing RecPtr into readBuf (if not read already).
- * Returns true if successful, false otherwise or fails if emode is PANIC.
+ * Returns true if the page is read successfully.
*
* This is responsible for restoring files from archive as needed, as well
* as for waiting for the requested WAL record to arrive in standby mode.
+ *
+ * 'emode' specifies the log level used for reporting "file not found" or
+ * "end of WAL" situations in archive recovery, or in standby mode if a trigger
+ * file is found. If set to WARNING or below, XLogPageRead() returns false
+ * in those situations, otherwise the ereport() will cause an error exit.
*/
static bool
XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
@@ -8746,13 +8770,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
uint32 targetRecOff;
uint32 targetId;
uint32 targetSeg;
+ static pg_time_t last_fail_time = 0;
XLByteToSeg(*RecPtr, targetId, targetSeg);
targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ;
/* Fast exit if we have read the record in the current buffer already */
- if (targetId == readId && targetSeg == readSeg &&
+ if (failedSources == 0 && targetId == readId && targetSeg == readSeg &&
targetPageOff == readOff && targetRecOff < readLen)
return true;
@@ -8764,18 +8789,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
{
close(readFile);
readFile = -1;
+ readSource = 0;
}
XLByteToSeg(*RecPtr, readId, readSeg);
+retry:
/* See if we need to retrieve more data */
if (readFile < 0 ||
- (readStreamed && !XLByteLT(*RecPtr, receivedUpto)))
+ (readSource == XLOG_FROM_STREAM && !XLByteLT(*RecPtr, receivedUpto)))
{
if (StandbyMode)
{
- bool last_restore_failed = false;
-
/*
* In standby mode, wait for the requested record to become
* available, either via restore_command succeeding to restore the
@@ -8800,15 +8825,16 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
{
readFile =
XLogFileRead(readId, readSeg, PANIC,
- recoveryTargetTLI, false, false);
+ recoveryTargetTLI,
+ XLOG_FROM_PG_XLOG, false);
switched_segment = true;
- readStreamed = true;
+ readSource = XLOG_FROM_STREAM;
}
break;
}
if (CheckForStandbyTrigger())
- goto next_record_is_invalid;
+ goto triggered;
/*
* When streaming is active, we want to react quickly when
@@ -8818,6 +8844,9 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
}
else
{
+ int sources;
+ pg_time_t now;
+
/*
* Until walreceiver manages to reconnect, poll the
* archive.
@@ -8830,48 +8859,73 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
/* Reset curFileTLI if random fetch. */
if (randAccess)
curFileTLI = 0;
- readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2, true);
- switched_segment = true;
- readStreamed = false;
- if (readFile != -1)
- {
- elog(DEBUG1, "got WAL segment from archive");
- break;
- }
/*
- * If we succeeded restoring some segments from archive
- * since the last connection attempt (or we haven't tried
- * streaming yet, retry immediately. But if we haven't,
- * assume the problem is persistent, so be less
- * aggressive.
+ * Try to restore the file from archive, or read an
+ * existing file from pg_xlog.
*/
- if (last_restore_failed)
+ sources = XLOG_FROM_ARCHIVE | XLOG_FROM_PG_XLOG;
+ if (!(sources & ~failedSources))
{
/*
- * Check to see if the trigger file exists. Note that
- * we do this only after failure, so when you create
- * the trigger file, we still finish replaying as much
- * as we can before failover.
+ * We've exhausted all options for retrieving the
+ * file. Retry ...
*/
- if (CheckForStandbyTrigger())
- goto next_record_is_invalid;
- pg_usleep(5000000L); /* 5 seconds */
+ failedSources = 0;
+
+ /*
+ * ... but sleep first if it hasn't been long since
+ * last attempt.
+ */
+ now = (pg_time_t) time(NULL);
+ if ((now - last_fail_time) < 5)
+ {
+ pg_usleep(1000000L * (5 - (now - last_fail_time)));
+ now = (pg_time_t) time(NULL);
+ }
+ last_fail_time = now;
+
+ /*
+ * If primary_conninfo is set, launch walreceiver to
+ * try to stream the missing WAL, before retrying
+ * to restore from archive/pg_xlog.
+ *
+ * If fetching_ckpt is TRUE, RecPtr points to the
+ * initial checkpoint location. In that case, we use
+ * RedoStartLSN as the streaming start position instead
+ * of RecPtr, so that when we later jump backwards to
+ * start redo at RedoStartLSN, we will have the logs
+ * streamed already.
+ */
+ if (PrimaryConnInfo)
+ {
+ RequestXLogStreaming(
+ fetching_ckpt ? RedoStartLSN : *RecPtr,
+ PrimaryConnInfo);
+ continue;
+ }
}
- last_restore_failed = true;
+ /* Don't try to read from a source that just failed */
+ sources &= ~failedSources;
+ readFile = XLogFileReadAnyTLI(readId, readSeg, DEBUG2,
+ sources);
+ switched_segment = true;
+ if (readFile != -1)
+ break;
/*
- * Nope, not found in archive. Try to stream it.
- *
- * If fetching_ckpt is TRUE, RecPtr points to the initial
- * checkpoint location. In that case, we use RedoStartLSN
- * as the streaming start position instead of RecPtr, so
- * that when we later jump backwards to start redo at
- * RedoStartLSN, we will have the logs streamed already.
+ * Nope, not found in archive and/or pg_xlog.
*/
- if (PrimaryConnInfo)
- RequestXLogStreaming(fetching_ckpt ? RedoStartLSN : *RecPtr,
- PrimaryConnInfo);
+ failedSources |= sources;
+
+ /*
+ * Check to see if the trigger file exists. Note that
+ * we do this only after failure, so when you create
+ * the trigger file, we still finish replaying as much
+ * as we can from archive and pg_xlog before failover.
+ */
+ if (CheckForStandbyTrigger())
+ goto triggered;
}
/*
@@ -8886,13 +8940,18 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
/* In archive or crash recovery. */
if (readFile < 0)
{
+ int sources;
/* Reset curFileTLI if random fetch. */
if (randAccess)
curFileTLI = 0;
+
+ sources = XLOG_FROM_PG_XLOG;
+ if (InArchiveRecovery)
+ sources |= XLOG_FROM_ARCHIVE;
+
readFile = XLogFileReadAnyTLI(readId, readSeg, emode,
- InArchiveRecovery);
+ sources);
switched_segment = true;
- readStreamed = false;
if (readFile < 0)
return false;
}
@@ -8900,8 +8959,8 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
}
/*
- * At this point, we have the right segment open and we know the requested
- * record is in it.
+ * At this point, we have the right segment open and if we're streaming
+ * we know the requested record is in it.
*/
Assert(readFile != -1);
@@ -8911,7 +8970,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
* requested record has been received, but this is for the benefit of
* future calls, to allow quick exit at the top of this function.
*/
- if (readStreamed)
+ if (readSource == XLOG_FROM_STREAM)
{
if (RecPtr->xlogid != receivedUpto.xlogid ||
(RecPtr->xrecoff / XLOG_BLCKSZ) != (receivedUpto.xrecoff / XLOG_BLCKSZ))
@@ -8936,13 +8995,14 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
readOff = 0;
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errcode_for_file_access(),
errmsg("could not read from log file %u, segment %u, offset %u: %m",
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
- if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+ if (!ValidXLOGHeader((XLogPageHeader) readBuf,
+ emode_for_corrupt_record(emode)))
goto next_record_is_invalid;
}
@@ -8950,7 +9010,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
readOff = targetPageOff;
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errcode_for_file_access(),
errmsg("could not seek in log file %u, segment %u to offset %u: %m",
readId, readSeg, readOff)));
@@ -8958,13 +9018,13 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
}
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
- ereport(emode,
+ ereport(emode_for_corrupt_record(emode),
(errcode_for_file_access(),
errmsg("could not read from log file %u, segment %u, offset %u: %m",
readId, readSeg, readOff)));
goto next_record_is_invalid;
}
- if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+ if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode_for_corrupt_record(emode)))
goto next_record_is_invalid;
Assert(targetId == readId);
@@ -8975,16 +9035,67 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
return true;
next_record_is_invalid:
+ failedSources |= readSource;
+
if (readFile >= 0)
close(readFile);
readFile = -1;
- readStreamed = false;
readLen = 0;
+ readSource = 0;
+
+ /* In standby-mode, keep trying */
+ if (StandbyMode)
+ goto retry;
+ else
+ return false;
+
+triggered:
+ if (readFile >= 0)
+ close(readFile);
+ readFile = -1;
+ readLen = 0;
+ readSource = 0;
return false;
}
/*
+ * Determine what log level should be used to report a corrupt WAL record
+ * in the current WAL page, previously read by XLogPageRead().
+ *
+ * 'emode' is the error mode that would be used to report a file-not-found
+ * or legitimate end-of-WAL situation. It is upgraded to WARNING or PANIC
+ * if the an corrupt record is not expected at this point.
+ */
+static int
+emode_for_corrupt_record(int emode)
+{
+ /*
+ * We don't expect any invalid records in archive or in records streamed
+ * from master. Files in the archive should be complete, and we should
+ * never hit the end of WAL because we stop and wait for more WAL to
+ * arrive before replaying it.
+ *
+ * In standby mode, throw a WARNING and keep retrying. If we're lucky
+ * it's a transient error and will go away by itself, and in any case
+ * it's better to keep the standby open for any possible read-only
+ * queries. In PITR, however, stop recovery immediately, rather than
+ * fail over.
+ */
+ if (readSource == XLOG_FROM_STREAM || readSource == XLOG_FROM_ARCHIVE)
+ {
+ if (StandbyMode)
+ {
+ if (emode < WARNING)
+ emode = WARNING;
+ }
+ else
+ emode = PANIC;
+ }
+ return emode;
+}
+
+/*
* Check to see if the trigger file exists. If it does, request postmaster
* to shut down walreceiver, wait for it to exit, remove the trigger
* file, and return true.
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers