From 8ecb293ec830bca4f11e525cb56d99036631c844 Mon Sep 17 00:00:00 2001
From: Polina Bungina <bungina@gmail.com>
Date: Wed, 31 Aug 2022 09:07:16 +0200
Subject: [PATCH v3] Be more picky with WAL segment deletion in pg_rewind

Make pg_rewind to be a bit wiser in terms of creating filemap:
preserve on the target all WAL segments that contain records between the
last common checkpoint and the point of divergence.

Co-authored-by: Alexander Kukushkin <cyberdemn@gmail.com>
---
 src/bin/pg_rewind/filemap.c   | 19 ++++++++++++++++++-
 src/bin/pg_rewind/filemap.h   |  1 +
 src/bin/pg_rewind/parsexlog.c | 17 +++++++++++++++++
 src/bin/pg_rewind/pg_rewind.c |  6 +++---
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 269ed6446e6..d74fbe712a3 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -795,7 +795,12 @@ decide_file_actions(void)
 	filehash_start_iterate(filehash, &it);
 	while ((entry = filehash_iterate(filehash, &it)) != NULL)
 	{
-		entry->action = decide_file_action(entry);
+		/*
+		 * Some entries (WAL segments) already have an action assigned
+		 * (see SimpleXLogPageRead()).
+		 */
+		if (entry->action == FILE_ACTION_UNDECIDED)
+			entry->action = decide_file_action(entry);
 	}
 
 	/*
@@ -818,6 +823,18 @@ decide_file_actions(void)
 	return filemap;
 }
 
+/*
+ * Prevent a given file deletion during rewind
+ */
+void
+preserve_file(char *filepath)
+{
+	file_entry_t *entry;
+
+	entry = insert_filehash_entry(filepath);
+	entry->action = FILE_ACTION_NONE;
+}
+
 
 /*
  * Helper function for filemap hash table.
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 0e011fbb0b3..421e3a7855d 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -109,5 +109,6 @@ extern void process_target_wal_block_change(ForkNumber forknum,
 extern filemap_t *decide_file_actions(void);
 extern void calculate_totals(filemap_t *filemap);
 extern void print_filemap(filemap_t *filemap);
+extern void preserve_file(char *filepath);
 
 #endif							/* FILEMAP_H */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 53f011a2fe2..20e6670c82f 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -48,6 +48,7 @@ typedef struct XLogPageReadPrivate
 {
 	const char *restoreCommand;
 	int			tliIndex;
+	bool		keepWalSeg;
 } XLogPageReadPrivate;
 
 static int	SimpleXLogPageRead(XLogReaderState *xlogreader,
@@ -73,6 +74,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	private.keepWalSeg = false;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -132,6 +134,7 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	private.keepWalSeg = false;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -192,6 +195,11 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 
 	private.tliIndex = tliIndex;
 	private.restoreCommand = restoreCommand;
+	/*
+	 * WAL files read during searching for the last checkpoint are required
+	 * by the next startup recovery of the target cluster.
+	 */
+	private.keepWalSeg = true;
 	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
 									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
 									&private);
@@ -297,6 +305,15 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
 					 xlogreadsegno, WalSegSz);
 
+		if (private->keepWalSeg)
+		{
+			/*
+			 * The caller told us to preserve this file for a future use.
+			 */
+			snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+			preserve_file(xlogfpath);
+		}
+
 		snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
 				 xlogreader->segcxt.ws_dir, xlogfname);
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 3cd77c09b1a..c01ce37112c 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -405,14 +405,14 @@ main(int argc, char **argv)
 		exit(0);
 	}
 
+	/* Initialize the hash table to track the status of each file */
+	filehash_init();
+
 	findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
 					   &chkptrec, &chkpttli, &chkptredo, restore_command);
 	pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
 				LSN_FORMAT_ARGS(chkptrec), chkpttli);
 
-	/* Initialize the hash table to track the status of each file */
-	filehash_init();
-
 	/*
 	 * Collect information about all files in the both data directories.
 	 */
-- 
2.36.1

