From f41332146039f4e1dca55f3b2f8e74a3d1a1bd8c Mon Sep 17 00:00:00 2001
From: Polina Bungina <bungina@gmail.com>
Date: Wed, 31 Aug 2022 09:07:16 +0200
Subject: [PATCH v2] Be more picky with WAL segment deletion in pg_rewind

Make pg_rewind to be a bit wiser in terms of creating filemap:
preserve on the target all WAL segments that contain records between the
last common checkpoint and the point of divergence.

Co-authored-by: Alexander Kukushkin <cyberdemn@gmail.com>
---
 src/bin/pg_rewind/filemap.c   |  9 +++++++--
 src/bin/pg_rewind/filemap.h   |  1 +
 src/bin/pg_rewind/parsexlog.c | 21 +++++++++++++++++++++
 src/bin/pg_rewind/pg_rewind.c |  6 +++---
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 269ed6446e6..3d3c8885ea5 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -59,7 +59,6 @@ static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
 						 BlockNumber segno);
 
-static file_entry_t *insert_filehash_entry(const char *path);
 static file_entry_t *lookup_filehash_entry(const char *path);
 static int	final_filemap_cmp(const void *a, const void *b);
 static bool check_file_excluded(const char *path, bool is_source);
@@ -171,7 +170,7 @@ filehash_init(void)
 }
 
 /* Look up entry for 'path', creating a new one if it doesn't exist */
-static file_entry_t *
+file_entry_t *
 insert_filehash_entry(const char *path)
 {
 	file_entry_t *entry;
@@ -795,6 +794,12 @@ decide_file_actions(void)
 	filehash_start_iterate(filehash, &it);
 	while ((entry = filehash_iterate(filehash, &it)) != NULL)
 	{
+		/*
+		 * Some entries (WAL segments) already have an action assigned
+		 * (see SimpleXLogPageRead()).
+		 */
+		if (entry->action == FILE_ACTION_NONE)
+			continue;
 		entry->action = decide_file_action(entry);
 	}
 
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 0e011fbb0b3..79a1e9f90de 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -98,6 +98,7 @@ typedef struct filemap_t
 
 /* Functions for populating the filemap */
 extern void filehash_init(void);
+extern file_entry_t *insert_filehash_entry(const char *path);
 extern void process_source_file(const char *path, file_type_t type,
 								size_t size, const char *link_target);
 extern void process_target_file(const char *path, file_type_t type,
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 53f011a2fe2..a6651c4b910 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -48,6 +48,7 @@ typedef struct XLogPageReadPrivate
 {
 	const char *restoreCommand;
 	int			tliIndex;
+	bool		keepWalSeg;
 } XLogPageReadPrivate;
 
 static int	SimpleXLogPageRead(XLogReaderState *xlogreader,
@@ -73,6 +74,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	private.keepWalSeg = false;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -132,6 +134,7 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	private.keepWalSeg = false;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -192,6 +195,7 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	private.keepWalSeg = true;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -297,6 +301,23 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
 					 xlogreadsegno, WalSegSz);
 
+		if (private->keepWalSeg)
+		{
+			/*
+			 * If called from findLastCheckpoint(), WAL segments between
+			 * the point of divergence and the last common checkpoint
+			 * are handled here. These are the segments we would
+			 * like to keep in case they were not archived and the
+			 * source has already recycled them.
+			 */
+			file_entry_t *entry;
+
+			snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+			entry = insert_filehash_entry(xlogfpath);
+
+			entry->action = FILE_ACTION_NONE;
+		}
+
 		snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
 				 xlogreader->segcxt.ws_dir, xlogfname);
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 3cd77c09b1a..c01ce37112c 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -405,14 +405,14 @@ main(int argc, char **argv)
 		exit(0);
 	}
 
+	/* Initialize the hash table to track the status of each file */
+	filehash_init();
+
 	findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
 					   &chkptrec, &chkpttli, &chkptredo, restore_command);
 	pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
 				LSN_FORMAT_ARGS(chkptrec), chkpttli);
 
-	/* Initialize the hash table to track the status of each file */
-	filehash_init();
-
 	/*
 	 * Collect information about all files in the both data directories.
 	 */
-- 
2.36.1

