Antonin Houska <a...@cybertec.at> wrote:

> Alvaro Herrera <alvhe...@2ndquadrant.com> wrote:
> 
> > Not sure about the walsize; maybe it can be a member in XLogReadPos, and
> > given to XLogReadInitPos()?  (Maybe rename XLogReadPos as
> > XLogReadContext or something like that, indicating it's not just the
> > read position.)
> 
> As pointed out by others, XLogReadPos is not necessary. So if XLogRead()
> receives XLogReaderState instead, it can get the segment size from there.

Eventually I found out that it's good to have a separate structure for the
read position because walsender calls the XLogRead() function directly, not
via the XLOG reader. Currently the structure name is XLogSegment (maybe
someone can propose better name) and it's a member of XLogReaderState. No
field of the new structure is duplicated now.

The next version of the patch is attached.

-- 
Antonin Houska
Web: https://www.cybertec-postgresql.com

This change is not necessary for the XLogRead() refactoring itself, but I
noticed the problem while working on it. Not sure it's worth a separate CF
entry.

diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 9196aa3aae..8cb551a837 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -35,6 +35,7 @@ static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 					  XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
 static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
 				XLogRecPtr recptr);
+static void XLogReaderInvalReadState(XLogReaderState *state);
 static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr,
 				 int reqLen);
 static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2, 3);
@@ -620,7 +621,7 @@ err:
 /*
  * Invalidate the xlogreader's read state to force a re-read.
  */
-void
+static void
 XLogReaderInvalReadState(XLogReaderState *state)
 {
 	state->readSegNo = 0;
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index f3bae0bf49..2d3c067135 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -212,9 +212,6 @@ extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
 extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
 							 XLogRecPtr recptr, char *phdr);
 
-/* Invalidate read state */
-extern void XLogReaderInvalReadState(XLogReaderState *state);
-
 #ifdef FRONTEND
 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
 #endif							/* FRONTEND */
The timeline information is available to caller via XLogReaderState. Now that
XLogRead() is gonna be (sometimes) responsible for determining the TLI, it
would have to be added the (TimeLineID *) argument too, just to be consistent
with the current coding style. Since XLogRead() updates also other
position-specific fields of XLogReaderState, it seems simpler if we remove the
output argument from XLogPageReadCB and always report the TLI via
XLogReaderState.

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 19b32e21df..5723aa54a7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -886,8 +886,7 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
 			 int source, bool notfoundOk);
 static int	XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source);
 static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
-			 int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
-			 TimeLineID *readTLI);
+			 int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
 static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 							bool fetching_ckpt, XLogRecPtr tliRecPtr);
 static int	emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
@@ -11509,7 +11508,7 @@ CancelBackup(void)
  */
 static int
 XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
-			 XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI)
+			 XLogRecPtr targetRecPtr, char *readBuf)
 {
 	XLogPageReadPrivate *private =
 	(XLogPageReadPrivate *) xlogreader->private_data;
@@ -11626,7 +11625,7 @@ retry:
 	Assert(targetPageOff == readOff);
 	Assert(reqLen <= readLen);
 
-	*readTLI = curFileTLI;
+	xlogreader->readPageTLI = curFileTLI;
 
 	/*
 	 * Check the page header immediately, so that we can retry immediately if
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 8cb551a837..244fc7d634 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -558,7 +558,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 
 		readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
 								   state->currRecPtr,
-								   state->readBuf, &state->readPageTLI);
+								   state->readBuf);
 		if (readLen < 0)
 			goto err;
 
@@ -576,7 +576,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	 */
 	readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
 							   state->currRecPtr,
-							   state->readBuf, &state->readPageTLI);
+							   state->readBuf);
 	if (readLen < 0)
 		goto err;
 
@@ -595,7 +595,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	{
 		readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr),
 								   state->currRecPtr,
-								   state->readBuf, &state->readPageTLI);
+								   state->readBuf);
 		if (readLen < 0)
 			goto err;
 	}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 10a663bae6..cba180912b 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -909,12 +909,12 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
  */
 int
 read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
-					 int reqLen, XLogRecPtr targetRecPtr, char *cur_page,
-					 TimeLineID *pageTLI)
+					 int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
 {
 	XLogRecPtr	read_upto,
 				loc;
 	int			count;
+	TimeLineID	pageTLI;
 
 	loc = targetPagePtr + reqLen;
 
@@ -934,7 +934,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 		else
 			read_upto = GetXLogReplayRecPtr(&ThisTimeLineID);
 
-		*pageTLI = ThisTimeLineID;
+		pageTLI = ThisTimeLineID;
 
 		/*
 		 * Check which timeline to get the record from.
@@ -991,7 +991,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 			 * nothing cares so long as the timeline doesn't go backwards.  We
 			 * should read the page header instead; FIXME someday.
 			 */
-			*pageTLI = state->currTLI;
+			pageTLI = state->currTLI;
 
 			/* No need to wait on a historical timeline */
 			break;
@@ -1022,8 +1022,9 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 	 * as 'count', read the whole page anyway. It's guaranteed to be
 	 * zero-padded up to the page boundary if it's incomplete.
 	 */
-	XLogRead(cur_page, state->wal_segment_size, *pageTLI, targetPagePtr,
+	XLogRead(cur_page, state->wal_segment_size, pageTLI, targetPagePtr,
 			 XLOG_BLCKSZ);
+	state->readPageTLI = pageTLI;
 
 	/* number of valid bytes in the buffer */
 	return count;
diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c
index d974400d6e..d1cf80d441 100644
--- a/src/backend/replication/logical/logicalfuncs.c
+++ b/src/backend/replication/logical/logicalfuncs.c
@@ -116,10 +116,10 @@ check_permissions(void)
 
 int
 logical_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
-							 int reqLen, XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
+							 int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
 {
 	return read_local_xlog_page(state, targetPagePtr, reqLen,
-								targetRecPtr, cur_page, pageTLI);
+								targetRecPtr, cur_page);
 }
 
 /*
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 09c8b5a5b3..4296fe8fee 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -763,7 +763,7 @@ StartReplication(StartReplicationCmd *cmd)
  */
 static int
 logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
-					   XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
+					   XLogRecPtr targetRecPtr, char *cur_page)
 {
 	XLogRecPtr	flushptr;
 	int			count;
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 04a3535dfb..ecb0533436 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -50,8 +50,7 @@ typedef struct XLogPageReadPrivate
 
 static int SimpleXLogPageRead(XLogReaderState *xlogreader,
 				   XLogRecPtr targetPagePtr,
-				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
-				   TimeLineID *pageTLI);
+				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
 
 /*
  * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
@@ -239,8 +238,7 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 /* XLogreader callback function, to read a WAL page */
 static int
 SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
-				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
-				   TimeLineID *pageTLI)
+				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
 {
 	XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
 	uint32		targetPageOff;
@@ -323,7 +321,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 
 	Assert(targetSegNo == xlogreadsegno);
 
-	*pageTLI = targetHistory[private->tliIndex].tli;
+	xlogreader->readPageTLI = targetHistory[private->tliIndex].tli;
 	return XLOG_BLCKSZ;
 }
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index d37e9f0817..2283c553b5 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -423,7 +423,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
  */
 static int
 XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
-				 XLogRecPtr targetPtr, char *readBuff, TimeLineID *curFileTLI)
+				 XLogRecPtr targetPtr, char *readBuff)
 {
 	XLogDumpPrivate *private = state->private_data;
 	int			count = XLOG_BLCKSZ;
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index 2d3c067135..fade6b2e7d 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -34,8 +34,7 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
 							   XLogRecPtr targetPagePtr,
 							   int reqLen,
 							   XLogRecPtr targetRecPtr,
-							   char *readBuf,
-							   TimeLineID *pageTLI);
+							   char *readBuf);
 
 typedef struct
 {
@@ -95,9 +94,8 @@ struct XLogReaderState
 	 * actual WAL record it's interested in.  In that case, targetRecPtr can
 	 * be used to determine which timeline to read the page from.
 	 *
-	 * The callback shall set *pageTLI to the TLI of the file the page was
-	 * read from.  It is currently used only for error reporting purposes, to
-	 * reconstruct the name of the WAL file where an error occurred.
+	 * The callback shall set ->readPageTLI to the TLI of the file the page
+	 * was read from.
 	 */
 	XLogPageReadCB read_page;
 
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 0ab5ba62f5..d3aaad839c 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -49,8 +49,7 @@ extern void FreeFakeRelcacheEntry(Relation fakerel);
 
 extern int read_local_xlog_page(XLogReaderState *state,
 					 XLogRecPtr targetPagePtr, int reqLen,
-					 XLogRecPtr targetRecPtr, char *cur_page,
-					 TimeLineID *pageTLI);
+					 XLogRecPtr targetRecPtr, char *cur_page);
 
 extern void XLogReadDetermineTimeline(XLogReaderState *state,
 						  XLogRecPtr wantPage, uint32 wantLength);
diff --git a/src/include/replication/logicalfuncs.h b/src/include/replication/logicalfuncs.h
index 3fb7ad5d67..f6fcbc615e 100644
--- a/src/include/replication/logicalfuncs.h
+++ b/src/include/replication/logicalfuncs.h
@@ -14,6 +14,6 @@
 extern int logical_read_local_xlog_page(XLogReaderState *state,
 							 XLogRecPtr targetPagePtr,
 							 int reqLen, XLogRecPtr targetRecPtr,
-							 char *cur_page, TimeLineID *pageTLI);
+							 char *cur_page);
 
 #endif
Introduce XLogSegment structure.

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 5723aa54a7..59df789674 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4296,7 +4296,7 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
 			XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
 			offset = XLogSegmentOffset(xlogreader->latestPagePtr,
 									   wal_segment_size);
-			XLogFileName(fname, xlogreader->readPageTLI, segno,
+			XLogFileName(fname, xlogreader->seg.tli, segno,
 						 wal_segment_size);
 			ereport(emode_for_corrupt_record(emode,
 											 RecPtr ? RecPtr : EndRecPtr),
@@ -7340,7 +7340,7 @@ StartupXLOG(void)
 	 * and we were reading the old WAL from a segment belonging to a higher
 	 * timeline.
 	 */
-	EndOfLogTLI = xlogreader->readPageTLI;
+	EndOfLogTLI = xlogreader->seg.tli;
 
 	/*
 	 * Complain if we did not roll forward far enough to render the backup
@@ -11625,7 +11625,7 @@ retry:
 	Assert(targetPageOff == readOff);
 	Assert(reqLen <= readLen);
 
-	xlogreader->readPageTLI = curFileTLI;
+	xlogreader->seg.tli = curFileTLI;
 
 	/*
 	 * Check the page header immediately, so that we can retry immediately if
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 244fc7d634..98cc5d6d9f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -95,7 +95,9 @@ XLogReaderAllocate(int wal_segment_size, XLogPageReadCB pagereadfunc,
 		return NULL;
 	}
 
-	state->wal_segment_size = wal_segment_size;
+	/* Initialize segment pointer. */
+	XLogSegmentInit(&state->seg, wal_segment_size);
+
 	state->read_page = pagereadfunc;
 	/* system_identifier initialized to zeroes above */
 	state->private_data = private_data;
@@ -489,8 +491,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
 		(record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
 	{
 		/* Pretend it extends to end of segment */
-		state->EndRecPtr += state->wal_segment_size - 1;
-		state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->wal_segment_size);
+		state->EndRecPtr += state->seg.size - 1;
+		state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->seg.size);
 	}
 
 	if (DecodeXLogRecord(state, record, errormsg))
@@ -532,12 +534,12 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 
 	Assert((pageptr % XLOG_BLCKSZ) == 0);
 
-	XLByteToSeg(pageptr, targetSegNo, state->wal_segment_size);
-	targetPageOff = XLogSegmentOffset(pageptr, state->wal_segment_size);
+	XLByteToSeg(pageptr, targetSegNo, state->seg.size);
+	targetPageOff = XLogSegmentOffset(pageptr, state->seg.size);
 
 	/* check whether we have all the requested data already */
-	if (targetSegNo == state->readSegNo && targetPageOff == state->readOff &&
-		reqLen <= state->readLen)
+	if (targetSegNo == state->seg.num &&
+		targetPageOff == state->seg.off && reqLen <= state->readLen)
 		return state->readLen;
 
 	/*
@@ -552,7 +554,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	 * record is.  This is so that we can check the additional identification
 	 * info that is present in the first page's "long" header.
 	 */
-	if (targetSegNo != state->readSegNo && targetPageOff != 0)
+	if (targetSegNo != state->seg.num && targetPageOff != 0)
 	{
 		XLogRecPtr	targetSegmentPtr = pageptr - targetPageOff;
 
@@ -607,8 +609,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 		goto err;
 
 	/* update read state information */
-	state->readSegNo = targetSegNo;
-	state->readOff = targetPageOff;
+	state->seg.num = targetSegNo;
+	state->seg.off = targetPageOff;
 	state->readLen = readLen;
 
 	return readLen;
@@ -624,8 +626,8 @@ err:
 static void
 XLogReaderInvalReadState(XLogReaderState *state)
 {
-	state->readSegNo = 0;
-	state->readOff = 0;
+	state->seg.num = 0;
+	state->seg.off = 0;
 	state->readLen = 0;
 }
 
@@ -744,16 +746,16 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 
 	Assert((recptr % XLOG_BLCKSZ) == 0);
 
-	XLByteToSeg(recptr, segno, state->wal_segment_size);
-	offset = XLogSegmentOffset(recptr, state->wal_segment_size);
+	XLByteToSeg(recptr, segno, state->seg.size);
+	offset = XLogSegmentOffset(recptr, state->seg.size);
 
-	XLogSegNoOffsetToRecPtr(segno, offset, state->wal_segment_size, recaddr);
+	XLogSegNoOffsetToRecPtr(segno, offset, state->seg.size, recaddr);
 
 	if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
 	{
 		char		fname[MAXFNAMELEN];
 
-		XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size);
+		XLogFileName(fname, state->seg.tli, segno, state->seg.size);
 
 		report_invalid_record(state,
 							  "invalid magic number %04X in log segment %s, offset %u",
@@ -767,7 +769,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 	{
 		char		fname[MAXFNAMELEN];
 
-		XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size);
+		XLogFileName(fname, state->seg.tli, segno, state->seg.size);
 
 		report_invalid_record(state,
 							  "invalid info bits %04X in log segment %s, offset %u",
@@ -800,7 +802,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 								  fhdrident_str, sysident_str);
 			return false;
 		}
-		else if (longhdr->xlp_seg_size != state->wal_segment_size)
+		else if (longhdr->xlp_seg_size != state->seg.size)
 		{
 			report_invalid_record(state,
 								  "WAL file is from different database system: incorrect segment size in page header");
@@ -817,7 +819,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 	{
 		char		fname[MAXFNAMELEN];
 
-		XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size);
+		XLogFileName(fname, state->seg.tli, segno, state->seg.size);
 
 		/* hmm, first page of file doesn't have a long header? */
 		report_invalid_record(state,
@@ -837,7 +839,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 	{
 		char		fname[MAXFNAMELEN];
 
-		XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size);
+		XLogFileName(fname, state->seg.tli, segno, state->seg.size);
 
 		report_invalid_record(state,
 							  "unexpected pageaddr %X/%X in log segment %s, offset %u",
@@ -862,7 +864,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		{
 			char		fname[MAXFNAMELEN];
 
-			XLogFileName(fname, state->readPageTLI, segno, state->wal_segment_size);
+			XLogFileName(fname, state->seg.tli, segno, state->seg.size);
 
 			report_invalid_record(state,
 								  "out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u",
@@ -1006,6 +1008,19 @@ out:
 
 #endif							/* FRONTEND */
 
+/*
+ * Initialize the passed segment pointer.
+ */
+void
+XLogSegmentInit(XLogSegment *seg, int size)
+{
+	seg->file = -1;
+	seg->num = 0;
+	seg->off = 0;
+	seg->tli = 0;
+	seg->dir = NULL;
+	seg->size = size;
+}
 
 /* ----------------------------------------
  * Functions for decoding the data and block references in a record.
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index cba180912b..836c2e2927 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -802,8 +802,8 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 void
 XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength)
 {
-	const XLogRecPtr lastReadPage = state->readSegNo *
-	state->wal_segment_size + state->readOff;
+	const XLogRecPtr lastReadPage = state->seg.num *
+	state->seg.size + state->seg.off;
 
 	Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
 	Assert(wantLength <= XLOG_BLCKSZ);
@@ -847,8 +847,8 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 	if (state->currTLIValidUntil != InvalidXLogRecPtr &&
 		state->currTLI != ThisTimeLineID &&
 		state->currTLI != 0 &&
-		((wantPage + wantLength) / state->wal_segment_size) <
-		(state->currTLIValidUntil / state->wal_segment_size))
+		((wantPage + wantLength) / state->seg.size) <
+		(state->currTLIValidUntil / state->seg.size))
 		return;
 
 	/*
@@ -870,11 +870,11 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 		 */
 		List	   *timelineHistory = readTimeLineHistory(ThisTimeLineID);
 
-		XLogRecPtr	endOfSegment = (((wantPage / state->wal_segment_size) + 1)
-									* state->wal_segment_size) - 1;
+		XLogRecPtr	endOfSegment = (((wantPage / state->seg.size) + 1)
+									* state->seg.size) - 1;
 
-		Assert(wantPage / state->wal_segment_size ==
-			   endOfSegment / state->wal_segment_size);
+		Assert(wantPage / state->seg.size ==
+			   endOfSegment / state->seg.size);
 
 		/*
 		 * Find the timeline of the last LSN on the segment containing
@@ -1022,9 +1022,9 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 	 * as 'count', read the whole page anyway. It's guaranteed to be
 	 * zero-padded up to the page boundary if it's incomplete.
 	 */
-	XLogRead(cur_page, state->wal_segment_size, pageTLI, targetPagePtr,
+	XLogRead(cur_page, state->seg.size, state->seg.tli, targetPagePtr,
 			 XLOG_BLCKSZ);
-	state->readPageTLI = pageTLI;
+	state->seg.tli = pageTLI;
 
 	/* number of valid bytes in the buffer */
 	return count;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 4296fe8fee..6dfb525e1a 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -128,16 +128,7 @@ bool		log_replication_commands = false;
  */
 bool		wake_wal_senders = false;
 
-/*
- * These variables are used similarly to openLogFile/SegNo/Off,
- * but for walsender to read the XLOG.
- */
-static int	sendFile = -1;
-static XLogSegNo sendSegNo = 0;
-static uint32 sendOff = 0;
-
-/* Timeline ID of the currently open file */
-static TimeLineID curFileTimeLine = 0;
+static XLogSegment *sendSeg = NULL;
 
 /*
  * These variables keep track of the state of the timeline we're currently
@@ -285,6 +276,10 @@ InitWalSender(void)
 
 	/* Initialize empty timestamp buffer for lag tracking. */
 	lag_tracker = MemoryContextAllocZero(TopMemoryContext, sizeof(LagTracker));
+
+	/* Make sure we can remember the current read position in XLOG. */
+	sendSeg = (XLogSegment *) MemoryContextAlloc(TopMemoryContext, sizeof(XLogSegment));
+	XLogSegmentInit(sendSeg, wal_segment_size);
 }
 
 /*
@@ -301,10 +296,10 @@ WalSndErrorCleanup(void)
 	ConditionVariableCancelSleep();
 	pgstat_report_wait_end();
 
-	if (sendFile >= 0)
+	if (sendSeg->file >= 0)
 	{
-		close(sendFile);
-		sendFile = -1;
+		close(sendSeg->file);
+		sendSeg->file = -1;
 	}
 
 	if (MyReplicationSlot != NULL)
@@ -2378,15 +2373,16 @@ retry:
 
 		startoff = XLogSegmentOffset(recptr, wal_segment_size);
 
-		if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, wal_segment_size))
+		if (sendSeg->file < 0 ||
+			!XLByteInSeg(recptr, sendSeg->num, sendSeg->size))
 		{
 			char		path[MAXPGPATH];
 
 			/* Switch to another logfile segment */
-			if (sendFile >= 0)
-				close(sendFile);
+			if (sendSeg->file >= 0)
+				close(sendSeg->file);
 
-			XLByteToSeg(recptr, sendSegNo, wal_segment_size);
+			XLByteToSeg(recptr, sendSeg->num, sendSeg->size);
 
 			/*-------
 			 * When reading from a historic timeline, and there is a timeline
@@ -2414,20 +2410,20 @@ retry:
 			 * used portion of the old segment is copied to the new file.
 			 *-------
 			 */
-			curFileTimeLine = sendTimeLine;
+			sendSeg->tli = sendTimeLine;
 			if (sendTimeLineIsHistoric)
 			{
 				XLogSegNo	endSegNo;
 
 				XLByteToSeg(sendTimeLineValidUpto, endSegNo, wal_segment_size);
-				if (sendSegNo == endSegNo)
-					curFileTimeLine = sendTimeLineNextTLI;
+				if (sendSeg->num == endSegNo)
+					sendSeg->tli = sendTimeLineNextTLI;
 			}
 
-			XLogFilePath(path, curFileTimeLine, sendSegNo, wal_segment_size);
+			XLogFilePath(path, sendSeg->tli, sendSeg->num, wal_segment_size);
 
-			sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY);
-			if (sendFile < 0)
+			sendSeg->file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+			if (sendSeg->file < 0)
 			{
 				/*
 				 * If the file is not found, assume it's because the standby
@@ -2438,26 +2434,26 @@ retry:
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("requested WAL segment %s has already been removed",
-									XLogFileNameP(curFileTimeLine, sendSegNo))));
+									XLogFileNameP(sendSeg->tli, sendSeg->num))));
 				else
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not open file \"%s\": %m",
 									path)));
 			}
-			sendOff = 0;
+			sendSeg->off = 0;
 		}
 
 		/* Need to seek in the file? */
-		if (sendOff != startoff)
+		if (sendSeg->off != startoff)
 		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
+			if (lseek(sendSeg->file, (off_t) startoff, SEEK_SET) < 0)
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(curFileTimeLine, sendSegNo),
+								XLogFileNameP(sendSeg->tli, sendSeg->num),
 								startoff)));
-			sendOff = startoff;
+			sendSeg->off = startoff;
 		}
 
 		/* How many bytes are within this segment? */
@@ -2467,29 +2463,29 @@ retry:
 			segbytes = nbytes;
 
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendFile, p, segbytes);
+		readbytes = read(sendSeg->file, p, segbytes);
 		pgstat_report_wait_end();
 		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, (Size) segbytes)));
+							XLogFileNameP(sendSeg->tli, sendSeg->num),
+							sendSeg->off, (Size) segbytes)));
 		}
 		else if (readbytes == 0)
 		{
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg("could not read from log segment %s, offset %u: read %d of %zu",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, readbytes, (Size) segbytes)));
+							XLogFileNameP(sendSeg->tli, sendSeg->num),
+							sendSeg->off, readbytes, (Size) segbytes)));
 		}
 
 		/* Update state for read */
 		recptr += readbytes;
 
-		sendOff += readbytes;
+		sendSeg->off += readbytes;
 		nbytes -= readbytes;
 		p += readbytes;
 	}
@@ -2520,10 +2516,10 @@ retry:
 		walsnd->needreload = false;
 		SpinLockRelease(&walsnd->mutex);
 
-		if (reload && sendFile >= 0)
+		if (reload && sendSeg->file >= 0)
 		{
-			close(sendFile);
-			sendFile = -1;
+			close(sendSeg->file);
+			sendSeg->file = -1;
 
 			goto retry;
 		}
@@ -2689,9 +2685,9 @@ XLogSendPhysical(void)
 	if (sendTimeLineIsHistoric && sendTimeLineValidUpto <= sentPtr)
 	{
 		/* close the current file. */
-		if (sendFile >= 0)
-			close(sendFile);
-		sendFile = -1;
+		if (sendSeg->file >= 0)
+			close(sendSeg->file);
+		sendSeg->file = -1;
 
 		/* Send CopyDone */
 		pq_putmessage_noblock('c', NULL, 0);
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index ecb0533436..a8a0cc4da2 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -321,7 +321,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 
 	Assert(targetSegNo == xlogreadsegno);
 
-	xlogreader->readPageTLI = targetHistory[private->tliIndex].tli;
+	xlogreader->seg.tli = targetHistory[private->tliIndex].tli;
 	return XLOG_BLCKSZ;
 }
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 2283c553b5..b31b6cdcaf 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -1098,6 +1098,9 @@ main(int argc, char **argv)
 	if (!xlogreader_state)
 		fatal_error("out of memory");
 
+	/* Finalize the segment pointer. */
+	xlogreader_state->seg.dir = private.inpath;
+
 	/* first find a valid recptr to start from */
 	first_record = XLogFindNextRecord(xlogreader_state, private.startptr);
 
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index fade6b2e7d..0d801d5903 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -27,6 +27,20 @@
 
 #include "access/xlogrecord.h"
 
+/*
+ * Position in XLOG file while reading it.
+ */
+typedef struct XLogSegment
+{
+	int			file;			/* segment file descriptor */
+	XLogSegNo	num;			/* segment number */
+	uint32		off;			/* offset in the segment */
+	TimeLineID	tli;			/* timeline ID of the currently open file */
+
+	char	   *dir;			/* directory (only needed by frontends) */
+	int			size;			/* segment size */
+} XLogSegment;
+
 typedef struct XLogReaderState XLogReaderState;
 
 /* Function type definition for the read_page callback */
@@ -73,11 +87,6 @@ struct XLogReaderState
 	 */
 
 	/*
-	 * Segment size of the to-be-parsed data (mandatory).
-	 */
-	int			wal_segment_size;
-
-	/*
 	 * Data input callback (mandatory).
 	 *
 	 * This callback shall read at least reqLen valid bytes of the xlog page
@@ -94,8 +103,8 @@ struct XLogReaderState
 	 * actual WAL record it's interested in.  In that case, targetRecPtr can
 	 * be used to determine which timeline to read the page from.
 	 *
-	 * The callback shall set ->readPageTLI to the TLI of the file the page
-	 * was read from.
+	 * The callback shall set ->seg.tli to the TLI of the file the page was
+	 * read from.
 	 */
 	XLogPageReadCB read_page;
 
@@ -150,10 +159,8 @@ struct XLogReaderState
 	char	   *readBuf;
 	uint32		readLen;
 
-	/* last read segment, segment offset, TLI for data currently in readBuf */
-	XLogSegNo	readSegNo;
-	uint32		readOff;
-	TimeLineID	readPageTLI;
+	/* last read XLOG position for data currently in readBuf */
+	XLogSegment seg;
 
 	/*
 	 * beginning of prior page read, and its TLI.  Doesn't necessarily
@@ -214,6 +221,8 @@ extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
 #endif							/* FRONTEND */
 
+extern void XLogSegmentInit(XLogSegment *seg, int size);
+
 /* Functions for decoding an XLogRecord */
 
 extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
Use only xlogreader.c:XLogRead().

The implementations in xlogutils.c and walsender.c are just renamed now, to be
removed by the following diff.

diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 98cc5d6d9f..1044d4e4dd 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -17,6 +17,8 @@
  */
 #include "postgres.h"
 
+#include <unistd.h>
+
 #include "access/transam.h"
 #include "access/xlogrecord.h"
 #include "access/xlog_internal.h"
@@ -26,6 +28,7 @@
 #include "replication/origin.h"
 
 #ifndef FRONTEND
+#include "pgstat.h"
 #include "utils/memutils.h"
 #endif
 
@@ -1022,6 +1025,134 @@ XLogSegmentInit(XLogSegment *seg, int size)
 	seg->size = size;
 }
 
+#ifndef FRONTEND
+/*
+ * Backend should have wal_segment_size variable initialized, segsize is not
+ * used.
+ */
+#define XLogFileNameCommon(tli, num, segsize) XLogFileNameP((tli), (num))
+#define xlr_error(...) ereport(ERROR, (errcode_for_file_access(), errmsg(__VA_ARGS__)))
+#else
+static char xlr_error_msg[MAXFNAMELEN];
+#define XLogFileNameCommon(tli, num, segsize) (XLogFileName(xlr_error_msg, (tli), (num), (segsize)),\
+											   xlr_error_msg)
+#include "fe_utils/logging.h"
+/*
+ * Frontend application (currently only pg_waldump.c) cannot catch and further
+ * process errors, so they simply treat them as fatal.
+ */
+#define xlr_error(...) do {pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0)
+#endif							/* FRONTEND */
+
+/*
+ * Read 'count' bytes from WAL into 'buf', starting at location 'startptr'. If
+ * tli is passed, get the data from timeline *tli. 'pos' is the current
+ * position in the XLOG file and openSegment is a callback that opens the next
+ * segment for reading.
+ *
+ * XXX probably this should be improved to suck data directly from the
+ * WAL buffers when possible.
+ */
+void
+XLogRead(char *buf, XLogRecPtr startptr, Size count,
+		 TimeLineID *tli, XLogSegment *seg, XLogOpenSegment openSegment)
+{
+	char	   *p;
+	XLogRecPtr	recptr;
+	Size		nbytes;
+
+	p = buf;
+	recptr = startptr;
+	nbytes = count;
+
+	while (nbytes > 0)
+	{
+		uint32		startoff;
+		int			segbytes;
+		int			readbytes;
+
+		startoff = XLogSegmentOffset(recptr, seg->size);
+
+		if (seg->file < 0 ||
+			!XLByteInSeg(recptr, seg->num, seg->size) ||
+			(tli != NULL && *tli != seg->tli))
+		{
+			XLogSegNo	nextSegNo;
+
+			/* Switch to another logfile segment */
+			if (seg->file >= 0)
+				close(seg->file);
+
+			XLByteToSeg(recptr, nextSegNo, seg->size);
+
+			/* Open the next segment in the caller's way. */
+			openSegment(nextSegNo, tli, seg);
+
+			/*
+			 * If the function is called by the XLOG reader, the reader will
+			 * eventually set both "num" and "off". However we need to care
+			 * about them too because the function can also be used directly,
+			 * see walsender.c.
+			 */
+			seg->num = nextSegNo;
+			seg->off = 0;
+		}
+
+		/* Need to seek in the file? */
+		if (seg->off != startoff)
+		{
+			if (lseek(seg->file, (off_t) startoff, SEEK_SET) < 0)
+				xlr_error("could not seek in log segment %s to offset %u: %m",
+						  XLogFileNameCommon(seg->tli, seg->num, seg->size),
+						  startoff);
+			seg->off = startoff;
+		}
+
+		/* How many bytes are within this segment? */
+		if (nbytes > (seg->size - startoff))
+			segbytes = seg->size - startoff;
+		else
+			segbytes = nbytes;
+
+#ifndef FRONTEND
+		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
+#endif
+
+		readbytes = read(seg->file, p, segbytes);
+
+#ifndef FRONTEND
+		pgstat_report_wait_end();
+#endif
+		if (readbytes < 0)
+		{
+			xlr_error("could not read from log segment %s, offset %u, length %zu: %m",
+					  XLogFileNameCommon(seg->tli, seg->num, seg->size),
+					  seg->off,
+					  (Size) segbytes);
+		}
+		else if (readbytes == 0)
+		{
+			xlr_error("could not read from log segment %s, offset %u: read %d of %zu",
+					  XLogFileNameCommon(seg->tli, seg->num, seg->size),
+					  seg->off,
+					  readbytes,
+					  (Size) segbytes);
+		}
+
+		/* Update state for read */
+		recptr += readbytes;
+		nbytes -= readbytes;
+		p += readbytes;
+
+		/*
+		 * If the function is called by the XLOG reader, the reader will
+		 * eventually set this field. However we need to care about it too
+		 * because the function can also be used directly (see walsender.c).
+		 */
+		seg->off += readbytes;
+	}
+}
+
 /* ----------------------------------------
  * Functions for decoding the data and block references in a record.
  * ----------------------------------------
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 836c2e2927..f4a90a602c 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -653,8 +653,8 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
  * frontend).  Probably these should be merged at some point.
  */
 static void
-XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
-		 Size count)
+XLogReadOld(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
+			Size count)
 {
 	char	   *p;
 	XLogRecPtr	recptr;
@@ -897,6 +897,35 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 }
 
 /*
+ * Callback for XLogRead() to open the next segment.
+ */
+static void
+read_local_xlog_page_open_segment(XLogSegNo nextSegNo, TimeLineID *tli,
+								  XLogSegment *seg)
+{
+	char		path[MAXPGPATH];
+
+	XLogFilePath(path, *tli, nextSegNo, seg->size);
+	seg->file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+
+	if (seg->file < 0)
+	{
+		if (errno == ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("requested WAL segment %s has already been removed",
+							path)));
+		else
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not open file \"%s\": %m",
+							path)));
+	}
+
+	seg->tli = *tli;
+}
+
+/*
  * read_page callback for reading local xlog files
  *
  * Public because it would likely be very helpful for someone writing another
@@ -1022,10 +1051,8 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 	 * as 'count', read the whole page anyway. It's guaranteed to be
 	 * zero-padded up to the page boundary if it's incomplete.
 	 */
-	XLogRead(cur_page, state->seg.size, state->seg.tli, targetPagePtr,
-			 XLOG_BLCKSZ);
-	state->seg.tli = pageTLI;
-
+	XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ, &pageTLI,
+			 &state->seg, read_local_xlog_page_open_segment);
 	/* number of valid bytes in the buffer */
 	return count;
 }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 6dfb525e1a..afafd4082e 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -247,7 +247,9 @@ static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);
 static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);
 static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
 
-static void XLogRead(char *buf, XLogRecPtr startptr, Size count);
+static void WalSndOpenSegment(XLogSegNo nextSegNo, TimeLineID *tli,
+				  XLogSegment *seg);
+static void XLogReadOld(char *buf, XLogRecPtr startptr, Size count);
 
 
 /* Initialize walsender process before entering the main command loop */
@@ -782,7 +784,8 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req
 		count = flushptr - targetPagePtr;	/* part of the page available */
 
 	/* now actually read the data, we know it's there */
-	XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ);
+	XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ, NULL, sendSeg,
+			 WalSndOpenSegment);
 
 	return count;
 }
@@ -2353,7 +2356,7 @@ WalSndKill(int code, Datum arg)
  * more than one.
  */
 static void
-XLogRead(char *buf, XLogRecPtr startptr, Size count)
+XLogReadOld(char *buf, XLogRecPtr startptr, Size count)
 {
 	char	   *p;
 	XLogRecPtr	recptr;
@@ -2527,6 +2530,76 @@ retry:
 }
 
 /*
+ * Callback for XLogRead() to open the next segment.
+ */
+void
+WalSndOpenSegment(XLogSegNo nextSegNo, TimeLineID *tli, XLogSegment *seg)
+{
+	char		path[MAXPGPATH];
+
+	/*
+	 * The timeline is determined below, caller should not do anything about
+	 * it.
+	 */
+	Assert(tli == NULL);
+
+	/*-------
+	 * When reading from a historic timeline, and there is a timeline switch
+	 * within this segment, read from the WAL segment belonging to the new
+	 * timeline.
+	 *
+	 * For example, imagine that this server is currently on timeline 5, and
+	 * we're streaming timeline 4. The switch from timeline 4 to 5 happened at
+	 * 0/13002088. In pg_wal, we have these files:
+	 *
+	 * ...
+	 * 000000040000000000000012
+	 * 000000040000000000000013
+	 * 000000050000000000000013
+	 * 000000050000000000000014
+	 * ...
+	 *
+	 * In this situation, when requested to send the WAL from segment 0x13, on
+	 * timeline 4, we read the WAL from file 000000050000000000000013. Archive
+	 * recovery prefers files from newer timelines, so if the segment was
+	 * restored from the archive on this server, the file belonging to the old
+	 * timeline, 000000040000000000000013, might not exist. Their contents are
+	 * equal up to the switchpoint, because at a timeline switch, the used
+	 * portion of the old segment is copied to the new file.  -------
+	 */
+	seg->tli = sendTimeLine;
+	if (sendTimeLineIsHistoric)
+	{
+		XLogSegNo	endSegNo;
+
+		XLByteToSeg(sendTimeLineValidUpto, endSegNo, seg->size);
+		if (seg->num == endSegNo)
+			seg->tli = sendTimeLineNextTLI;
+	}
+
+	XLogFilePath(path, seg->tli, nextSegNo, seg->size);
+	seg->file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+
+	if (seg->file < 0)
+	{
+		/*
+		 * If the file is not found, assume it's because the standby asked for
+		 * a too old WAL segment that has already been removed or recycled.
+		 */
+		if (errno == ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("requested WAL segment %s has already been removed",
+							XLogFileNameP(seg->tli, seg->num))));
+		else
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not open file \"%s\": %m",
+							path)));
+	}
+}
+
+/*
  * Send out the WAL in its normal physical/stored form.
  *
  * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk,
@@ -2543,6 +2616,7 @@ XLogSendPhysical(void)
 	XLogRecPtr	startptr;
 	XLogRecPtr	endptr;
 	Size		nbytes;
+	XLogSegNo	segno;
 
 	/* If requested switch the WAL sender to the stopping state. */
 	if (got_STOPPING)
@@ -2758,7 +2832,48 @@ XLogSendPhysical(void)
 	 * calls.
 	 */
 	enlargeStringInfo(&output_message, nbytes);
-	XLogRead(&output_message.data[output_message.len], startptr, nbytes);
+
+retry:
+	XLogRead(&output_message.data[output_message.len], startptr, nbytes,
+			 NULL,				/* WalSndOpenSegment will determine TLI */
+			 sendSeg,
+			 WalSndOpenSegment);
+
+	/*
+	 * After reading into the buffer, check that what we read was valid. We do
+	 * this after reading, because even though the segment was present when we
+	 * opened it, it might get recycled or removed while we read it. The
+	 * read() succeeds in that case, but the data we tried to read might
+	 * already have been overwritten with new WAL records.
+	 */
+	XLByteToSeg(startptr, segno, wal_segment_size);
+	CheckXLogRemoved(segno, ThisTimeLineID);
+
+	/*
+	 * During recovery, the currently-open WAL file might be replaced with the
+	 * file of the same name retrieved from archive. So we always need to
+	 * check what we read was valid after reading into the buffer. If it's
+	 * invalid, we try to open and read the file again.
+	 */
+	if (am_cascading_walsender)
+	{
+		WalSnd	   *walsnd = MyWalSnd;
+		bool		reload;
+
+		SpinLockAcquire(&walsnd->mutex);
+		reload = walsnd->needreload;
+		walsnd->needreload = false;
+		SpinLockRelease(&walsnd->mutex);
+
+		if (reload && sendSeg->file >= 0)
+		{
+			close(sendSeg->file);
+			sendSeg->file = -1;
+
+			goto retry;
+		}
+	}
+
 	output_message.len += nbytes;
 	output_message.data[output_message.len] = '\0';
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index b31b6cdcaf..caf5533aeb 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -296,6 +296,45 @@ identify_target_directory(XLogDumpPrivate *private, char *directory,
 		fatal_error("could not find any WAL file");
 }
 
+static void
+XLogDumpOpenSegment(XLogSegNo nextSegNo, TimeLineID *tli, XLogSegment *seg)
+{
+	char		fname[MAXPGPATH];
+	int			tries;
+
+	XLogFileName(fname, *tli, nextSegNo, seg->size);
+
+	/*
+	 * In follow mode there is a short period of time after the server has
+	 * written the end of the previous file before the new file is available.
+	 * So we loop for 5 seconds looking for the file to appear before giving
+	 * up.
+	 */
+	for (tries = 0; tries < 10; tries++)
+	{
+		seg->file = open_file_in_directory(seg->dir, fname);
+		if (seg->file >= 0)
+			break;
+		if (errno == ENOENT)
+		{
+			int			save_errno = errno;
+
+			/* File not there yet, try again */
+			pg_usleep(500 * 1000);
+
+			errno = save_errno;
+			continue;
+		}
+		/* Any other error, fall through and fail */
+		break;
+	}
+
+	if (seg->file < 0)
+		fatal_error("could not find file \"%s\": %s",
+					fname, strerror(errno));
+	seg->tli = *tli;
+}
+
 /*
  * Read count bytes from a segment file in the specified directory, for the
  * given timeline, containing the specified record pointer; store the data in
@@ -441,8 +480,8 @@ XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
 		}
 	}
 
-	XLogDumpXLogRead(private->inpath, private->timeline, targetPagePtr,
-					 readBuff, count);
+	XLogRead(readBuff, targetPagePtr, count, &private->timeline,
+			 &state->seg, XLogDumpOpenSegment);
 
 	return count;
 }
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index 0d801d5903..5ba89a7035 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -221,7 +221,22 @@ extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
 #endif							/* FRONTEND */
 
+/*
+ * Callback to open the specified XLOG segment nextSegNo in timeline *tli for
+ * reading, and assign the descriptor to ->file. BasicOpenFile() is the
+ * preferred way to open the segment file in backend code, whereas open(2)
+ * should be used in frontend.
+ *
+ * If NULL is passed for tli, the callback must determine the timeline
+ * itself. In any case it's supposed to eventually set ->tli.
+ */
+typedef void (*XLogOpenSegment) (XLogSegNo nextSegNo, TimeLineID *tli,
+								 XLogSegment *seg);
+
 extern void XLogSegmentInit(XLogSegment *seg, int size);
+extern void XLogRead(char *buf, XLogRecPtr startptr, Size count,
+		 TimeLineID *tli, XLogSegment *seg,
+		 XLogOpenSegment openSegment);
 
 /* Functions for decoding an XLogRecord */
 
Remove the old implemenations of XLogRead().

Done in a separate patch because the diff looks harder to read if one function
(XLogRead) is removed and another one (the XLogOpenSegment callback) is added
nearby at the same time (the addition and removal of code can get mixed in the diff).

diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index f4a90a602c..3e531be9b0 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -17,14 +17,11 @@
  */
 #include "postgres.h"
 
-#include <unistd.h>
-
 #include "access/timeline.h"
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
 #include "access/xlogutils.h"
 #include "miscadmin.h"
-#include "pgstat.h"
 #include "storage/smgr.h"
 #include "utils/guc.h"
 #include "utils/hsearch.h"
@@ -640,128 +637,6 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
 }
 
 /*
- * Read 'count' bytes from WAL into 'buf', starting at location 'startptr'
- * in timeline 'tli'.
- *
- * Will open, and keep open, one WAL segment stored in the static file
- * descriptor 'sendFile'. This means if XLogRead is used once, there will
- * always be one descriptor left open until the process ends, but never
- * more than one.
- *
- * XXX This is very similar to pg_waldump's XLogDumpXLogRead and to XLogRead
- * in walsender.c but for small differences (such as lack of elog() in
- * frontend).  Probably these should be merged at some point.
- */
-static void
-XLogReadOld(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
-			Size count)
-{
-	char	   *p;
-	XLogRecPtr	recptr;
-	Size		nbytes;
-
-	/* state maintained across calls */
-	static int	sendFile = -1;
-	static XLogSegNo sendSegNo = 0;
-	static TimeLineID sendTLI = 0;
-	static uint32 sendOff = 0;
-
-	Assert(segsize == wal_segment_size);
-
-	p = buf;
-	recptr = startptr;
-	nbytes = count;
-
-	while (nbytes > 0)
-	{
-		uint32		startoff;
-		int			segbytes;
-		int			readbytes;
-
-		startoff = XLogSegmentOffset(recptr, segsize);
-
-		/* Do we need to switch to a different xlog segment? */
-		if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, segsize) ||
-			sendTLI != tli)
-		{
-			char		path[MAXPGPATH];
-
-			if (sendFile >= 0)
-				close(sendFile);
-
-			XLByteToSeg(recptr, sendSegNo, segsize);
-
-			XLogFilePath(path, tli, sendSegNo, segsize);
-
-			sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY);
-
-			if (sendFile < 0)
-			{
-				if (errno == ENOENT)
-					ereport(ERROR,
-							(errcode_for_file_access(),
-							 errmsg("requested WAL segment %s has already been removed",
-									path)));
-				else
-					ereport(ERROR,
-							(errcode_for_file_access(),
-							 errmsg("could not open file \"%s\": %m",
-									path)));
-			}
-			sendOff = 0;
-			sendTLI = tli;
-		}
-
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
-		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-			{
-				char		path[MAXPGPATH];
-				int			save_errno = errno;
-
-				XLogFilePath(path, tli, sendSegNo, segsize);
-				errno = save_errno;
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								path, startoff)));
-			}
-			sendOff = startoff;
-		}
-
-		/* How many bytes are within this segment? */
-		if (nbytes > (segsize - startoff))
-			segbytes = segsize - startoff;
-		else
-			segbytes = nbytes;
-
-		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendFile, p, segbytes);
-		pgstat_report_wait_end();
-		if (readbytes <= 0)
-		{
-			char		path[MAXPGPATH];
-			int			save_errno = errno;
-
-			XLogFilePath(path, tli, sendSegNo, segsize);
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read from log segment %s, offset %u, length %lu: %m",
-							path, sendOff, (unsigned long) segbytes)));
-		}
-
-		/* Update state for read */
-		recptr += readbytes;
-
-		sendOff += readbytes;
-		nbytes -= readbytes;
-		p += readbytes;
-	}
-}
-
-/*
  * Determine which timeline to read an xlog page from and set the
  * XLogReaderState's currTLI to that timeline ID.
  *
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index afafd4082e..c6a1992471 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -249,7 +249,6 @@ static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
 
 static void WalSndOpenSegment(XLogSegNo nextSegNo, TimeLineID *tli,
 				  XLogSegment *seg);
-static void XLogReadOld(char *buf, XLogRecPtr startptr, Size count);
 
 
 /* Initialize walsender process before entering the main command loop */
@@ -2345,191 +2344,6 @@ WalSndKill(int code, Datum arg)
 }
 
 /*
- * Read 'count' bytes from WAL into 'buf', starting at location 'startptr'
- *
- * XXX probably this should be improved to suck data directly from the
- * WAL buffers when possible.
- *
- * Will open, and keep open, one WAL segment stored in the global file
- * descriptor sendFile. This means if XLogRead is used once, there will
- * always be one descriptor left open until the process ends, but never
- * more than one.
- */
-static void
-XLogReadOld(char *buf, XLogRecPtr startptr, Size count)
-{
-	char	   *p;
-	XLogRecPtr	recptr;
-	Size		nbytes;
-	XLogSegNo	segno;
-
-retry:
-	p = buf;
-	recptr = startptr;
-	nbytes = count;
-
-	while (nbytes > 0)
-	{
-		uint32		startoff;
-		int			segbytes;
-		int			readbytes;
-
-		startoff = XLogSegmentOffset(recptr, wal_segment_size);
-
-		if (sendSeg->file < 0 ||
-			!XLByteInSeg(recptr, sendSeg->num, sendSeg->size))
-		{
-			char		path[MAXPGPATH];
-
-			/* Switch to another logfile segment */
-			if (sendSeg->file >= 0)
-				close(sendSeg->file);
-
-			XLByteToSeg(recptr, sendSeg->num, sendSeg->size);
-
-			/*-------
-			 * When reading from a historic timeline, and there is a timeline
-			 * switch within this segment, read from the WAL segment belonging
-			 * to the new timeline.
-			 *
-			 * For example, imagine that this server is currently on timeline
-			 * 5, and we're streaming timeline 4. The switch from timeline 4
-			 * to 5 happened at 0/13002088. In pg_wal, we have these files:
-			 *
-			 * ...
-			 * 000000040000000000000012
-			 * 000000040000000000000013
-			 * 000000050000000000000013
-			 * 000000050000000000000014
-			 * ...
-			 *
-			 * In this situation, when requested to send the WAL from
-			 * segment 0x13, on timeline 4, we read the WAL from file
-			 * 000000050000000000000013. Archive recovery prefers files from
-			 * newer timelines, so if the segment was restored from the
-			 * archive on this server, the file belonging to the old timeline,
-			 * 000000040000000000000013, might not exist. Their contents are
-			 * equal up to the switchpoint, because at a timeline switch, the
-			 * used portion of the old segment is copied to the new file.
-			 *-------
-			 */
-			sendSeg->tli = sendTimeLine;
-			if (sendTimeLineIsHistoric)
-			{
-				XLogSegNo	endSegNo;
-
-				XLByteToSeg(sendTimeLineValidUpto, endSegNo, wal_segment_size);
-				if (sendSeg->num == endSegNo)
-					sendSeg->tli = sendTimeLineNextTLI;
-			}
-
-			XLogFilePath(path, sendSeg->tli, sendSeg->num, wal_segment_size);
-
-			sendSeg->file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
-			if (sendSeg->file < 0)
-			{
-				/*
-				 * If the file is not found, assume it's because the standby
-				 * asked for a too old WAL segment that has already been
-				 * removed or recycled.
-				 */
-				if (errno == ENOENT)
-					ereport(ERROR,
-							(errcode_for_file_access(),
-							 errmsg("requested WAL segment %s has already been removed",
-									XLogFileNameP(sendSeg->tli, sendSeg->num))));
-				else
-					ereport(ERROR,
-							(errcode_for_file_access(),
-							 errmsg("could not open file \"%s\": %m",
-									path)));
-			}
-			sendSeg->off = 0;
-		}
-
-		/* Need to seek in the file? */
-		if (sendSeg->off != startoff)
-		{
-			if (lseek(sendSeg->file, (off_t) startoff, SEEK_SET) < 0)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(sendSeg->tli, sendSeg->num),
-								startoff)));
-			sendSeg->off = startoff;
-		}
-
-		/* How many bytes are within this segment? */
-		if (nbytes > (wal_segment_size - startoff))
-			segbytes = wal_segment_size - startoff;
-		else
-			segbytes = nbytes;
-
-		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendSeg->file, p, segbytes);
-		pgstat_report_wait_end();
-		if (readbytes < 0)
-		{
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
-							XLogFileNameP(sendSeg->tli, sendSeg->num),
-							sendSeg->off, (Size) segbytes)));
-		}
-		else if (readbytes == 0)
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("could not read from log segment %s, offset %u: read %d of %zu",
-							XLogFileNameP(sendSeg->tli, sendSeg->num),
-							sendSeg->off, readbytes, (Size) segbytes)));
-		}
-
-		/* Update state for read */
-		recptr += readbytes;
-
-		sendSeg->off += readbytes;
-		nbytes -= readbytes;
-		p += readbytes;
-	}
-
-	/*
-	 * After reading into the buffer, check that what we read was valid. We do
-	 * this after reading, because even though the segment was present when we
-	 * opened it, it might get recycled or removed while we read it. The
-	 * read() succeeds in that case, but the data we tried to read might
-	 * already have been overwritten with new WAL records.
-	 */
-	XLByteToSeg(startptr, segno, wal_segment_size);
-	CheckXLogRemoved(segno, ThisTimeLineID);
-
-	/*
-	 * During recovery, the currently-open WAL file might be replaced with the
-	 * file of the same name retrieved from archive. So we always need to
-	 * check what we read was valid after reading into the buffer. If it's
-	 * invalid, we try to open and read the file again.
-	 */
-	if (am_cascading_walsender)
-	{
-		WalSnd	   *walsnd = MyWalSnd;
-		bool		reload;
-
-		SpinLockAcquire(&walsnd->mutex);
-		reload = walsnd->needreload;
-		walsnd->needreload = false;
-		SpinLockRelease(&walsnd->mutex);
-
-		if (reload && sendSeg->file >= 0)
-		{
-			close(sendSeg->file);
-			sendSeg->file = -1;
-
-			goto retry;
-		}
-	}
-}
-
-/*
  * Callback for XLogRead() to open the next segment.
  */
 void
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index caf5533aeb..557816aff1 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -14,7 +14,6 @@
 
 #include <dirent.h>
 #include <sys/stat.h>
-#include <unistd.h>
 
 #include "access/xlogreader.h"
 #include "access/xlogrecord.h"
@@ -336,128 +335,6 @@ XLogDumpOpenSegment(XLogSegNo nextSegNo, TimeLineID *tli, XLogSegment *seg)
 }
 
 /*
- * Read count bytes from a segment file in the specified directory, for the
- * given timeline, containing the specified record pointer; store the data in
- * the passed buffer.
- */
-static void
-XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
-				 XLogRecPtr startptr, char *buf, Size count)
-{
-	char	   *p;
-	XLogRecPtr	recptr;
-	Size		nbytes;
-
-	static int	sendFile = -1;
-	static XLogSegNo sendSegNo = 0;
-	static uint32 sendOff = 0;
-
-	p = buf;
-	recptr = startptr;
-	nbytes = count;
-
-	while (nbytes > 0)
-	{
-		uint32		startoff;
-		int			segbytes;
-		int			readbytes;
-
-		startoff = XLogSegmentOffset(recptr, WalSegSz);
-
-		if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, WalSegSz))
-		{
-			char		fname[MAXFNAMELEN];
-			int			tries;
-
-			/* Switch to another logfile segment */
-			if (sendFile >= 0)
-				close(sendFile);
-
-			XLByteToSeg(recptr, sendSegNo, WalSegSz);
-
-			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
-
-			/*
-			 * In follow mode there is a short period of time after the server
-			 * has written the end of the previous file before the new file is
-			 * available. So we loop for 5 seconds looking for the file to
-			 * appear before giving up.
-			 */
-			for (tries = 0; tries < 10; tries++)
-			{
-				sendFile = open_file_in_directory(directory, fname);
-				if (sendFile >= 0)
-					break;
-				if (errno == ENOENT)
-				{
-					int			save_errno = errno;
-
-					/* File not there yet, try again */
-					pg_usleep(500 * 1000);
-
-					errno = save_errno;
-					continue;
-				}
-				/* Any other error, fall through and fail */
-				break;
-			}
-
-			if (sendFile < 0)
-				fatal_error("could not find file \"%s\": %s",
-							fname, strerror(errno));
-			sendOff = 0;
-		}
-
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
-		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-			{
-				int			err = errno;
-				char		fname[MAXPGPATH];
-
-				XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
-
-				fatal_error("could not seek in log file %s to offset %u: %s",
-							fname, startoff, strerror(err));
-			}
-			sendOff = startoff;
-		}
-
-		/* How many bytes are within this segment? */
-		if (nbytes > (WalSegSz - startoff))
-			segbytes = WalSegSz - startoff;
-		else
-			segbytes = nbytes;
-
-		readbytes = read(sendFile, p, segbytes);
-		if (readbytes <= 0)
-		{
-			int			err = errno;
-			char		fname[MAXPGPATH];
-			int			save_errno = errno;
-
-			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
-			errno = save_errno;
-
-			if (readbytes < 0)
-				fatal_error("could not read from log file %s, offset %u, length %d: %s",
-							fname, sendOff, segbytes, strerror(err));
-			else if (readbytes == 0)
-				fatal_error("could not read from log file %s, offset %u: read %d of %zu",
-							fname, sendOff, readbytes, (Size) segbytes);
-		}
-
-		/* Update state for read */
-		recptr += readbytes;
-
-		sendOff += readbytes;
-		nbytes -= readbytes;
-		p += readbytes;
-	}
-}
-
-/*
  * XLogReader read_page callback
  */
 static int

Reply via email to