Hi,

I've attached an updated version of the patch against master with the changes
suggested.

On Tue, Nov 29, 2022 at 10:03 PM Michael Paquier <mich...@paquier.xyz> wrote:
>
> On Thu, Oct 06, 2022 at 04:08:45PM +0900, Michael Paquier wrote:
>>
>>  There may be something I am missing here, but there is no need to care
>> about segments with a TLI older than lastcommontliIndex, no?

Hard to say. pg_rewind is intended to make the same "copy" of the cluster which
implies pg_wal/ should look the same. There might be use cases around logical
replication where you would want these WAL files to still exist even
across promotions?

>> decide_wal_file_action() assumes that the WAL segment exists on the
>> target and the source.  This looks bug-prone to me without at least an
>> assertion.

>From previous refactors there is now an Assertion in filemap.c
decide_file_action that handles this.

> Assert(entry->target_exists && entry->source_exists);

decide_wal_file_action is called after the assertion.

Thanks,

--
John Hsu - Amazon Web Services
From 1825113812381f52c52e74a3e0e3e4c519f218bc Mon Sep 17 00:00:00 2001
From: John Hsu <johnyvr@gmail.com>
Date: Tue, 1 Jul 2025 18:05:37 +0000
Subject: [PATCH] Avoid copying WAL segments before divergence to speed up
 pg_rewind

Adds a conditional check to avoid unnecessarily copying any
WAL segment files from source to target if they are common
between both servers before the point of WAL divergence
during pg_rewind. On the source server, each WAL file's.
All WAL files that exist on source and target, which fall
before the segment of the first diverged LSN can safely be
skipped from copying to the target.
---
 src/bin/pg_rewind/filemap.c                   | 40 ++++++++++++++++--
 src/bin/pg_rewind/filemap.h                   |  9 +++-
 src/bin/pg_rewind/pg_rewind.c                 | 41 +++++++++++++++++++
 src/bin/pg_rewind/pg_rewind.h                 |  2 +
 src/bin/pg_rewind/t/008_min_recovery_point.pl | 18 +++++++-
 5 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index c933871ca9f..79e5bbd7710 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -58,6 +58,7 @@ static filehash_hash *filehash;
 static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
 						 BlockNumber segno);
+static file_content_type_t getFileType(const char *path);
 
 static file_entry_t *insert_filehash_entry(const char *path);
 static file_entry_t *lookup_filehash_entry(const char *path);
@@ -199,6 +200,28 @@ filehash_init(void)
 	filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
 }
 
+/* Determine the type of file content (relation, WAL, or other) */
+static file_content_type_t
+getFileType(const char *path)
+{
+	if (isRelDataFile(path))
+		return FILE_CONTENT_TYPE_RELATION;
+	else
+	{
+		/* Extract the filename from the path */
+		const char *filename = last_dir_separator(path);
+		if (filename == NULL)
+			filename = path;
+		else
+			filename++;  /* Skip the separator */
+
+		if (IsXLogFileName(filename))
+			return FILE_CONTENT_TYPE_WAL;
+	}
+
+	return FILE_CONTENT_TYPE_OTHER;
+}
+
 /* Look up entry for 'path', creating a new one if it doesn't exist */
 static file_entry_t *
 insert_filehash_entry(const char *path)
@@ -210,7 +233,7 @@ insert_filehash_entry(const char *path)
 	if (!found)
 	{
 		entry->path = pg_strdup(path);
-		entry->isrelfile = isRelDataFile(path);
+		entry->content_type = getFileType(path);
 
 		entry->target_exists = false;
 		entry->target_type = FILE_TYPE_UNDEFINED;
@@ -383,7 +406,7 @@ process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
 	 */
 	if (entry)
 	{
-		Assert(entry->isrelfile);
+		Assert(entry->content_type == FILE_CONTENT_TYPE_RELATION);
 
 		if (entry->target_exists)
 		{
@@ -799,7 +822,18 @@ decide_file_action(file_entry_t *entry)
 			return FILE_ACTION_NONE;
 
 		case FILE_TYPE_REGULAR:
-			if (!entry->isrelfile)
+			if (entry->content_type == FILE_CONTENT_TYPE_WAL)
+			{
+				/* Handle WAL segment file */
+				const char *filename = last_dir_separator(entry->path);
+				if (filename == NULL)
+					filename = entry->path;
+				else
+					filename++;  /* Skip the separator */
+
+				return decide_wal_file_action(filename);
+			}
+			else if (entry->content_type != FILE_CONTENT_TYPE_RELATION)
 			{
 				/*
 				 * It's a non-data file that we have no special processing
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index df78a02e3da..fada420fc23 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -36,6 +36,13 @@ typedef enum
 	FILE_TYPE_SYMLINK,
 } file_type_t;
 
+typedef enum
+{
+	FILE_CONTENT_TYPE_OTHER = 0,
+	FILE_CONTENT_TYPE_RELATION,
+	FILE_CONTENT_TYPE_WAL
+} file_content_type_t;
+
 /*
  * For every file found in the local or remote system, we have a file entry
  * that contains information about the file on both systems.  For relation
@@ -51,7 +58,7 @@ typedef struct file_entry_t
 	uint32		status;			/* hash status */
 
 	const char *path;
-	bool		isrelfile;		/* is it a relation data file? */
+	file_content_type_t content_type;
 
 	/*
 	 * Status of the file in the target.
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 9d16c1e6b47..917bc298cad 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -56,6 +56,9 @@ static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history,
 static void ensureCleanShutdown(const char *argv0);
 static void disconnect_atexit(void);
 
+/* WAL segment files handling */
+file_action_t decide_wal_file_action(const char *fname);
+
 static ControlFileData ControlFile_target;
 static ControlFileData ControlFile_source;
 static ControlFileData ControlFile_source_after;
@@ -88,6 +91,9 @@ uint64		fetch_done;
 static PGconn *conn;
 static rewind_source *source;
 
+/* Segment number of the divergence record */
+XLogSegNo last_common_segno;
+
 static void
 usage(const char *progname)
 {
@@ -397,6 +403,9 @@ main(int argc, char **argv)
 					LSN_FORMAT_ARGS(divergerec),
 					targetHistory[lastcommontliIndex].tli);
 
+		/* Convert divergence LSN to segment number */
+		XLByteToSeg(divergerec, last_common_segno, ControlFile_target.xlog_seg_size);
+
 		/*
 		 * Don't need the source history anymore. The target history is still
 		 * needed by the routines in parsexlog.c, when we read the target WAL.
@@ -1204,3 +1213,35 @@ disconnect_atexit(void)
 	if (conn != NULL)
 		PQfinish(conn);
 }
+
+/*
+ * Decide what to do with a WAL segment file based on its position
+ * relative to the point of divergence.
+ * Caller is responsible for ensuring the file exists on both
+ * source and target.
+ */
+file_action_t
+decide_wal_file_action(const char *fname)
+{
+	TimeLineID  file_tli;
+	XLogSegNo   file_segno;
+
+	/* Get current WAL segment number given current segment file name */
+	XLogFromFileName(fname, &file_tli, &file_segno, ControlFile_target.xlog_seg_size);
+
+	/*
+	 * Avoid copying files before the last common segment.
+	 *
+	 * These files are assumed to exist on source and target.
+	 * Only WAL segment files after the last common segment number on
+	 * the new source need to be copied to the new target.
+	 */
+	if (file_segno < last_common_segno)
+	{
+		pg_log_debug("WAL file entry \"%s\" not copied to target", fname);
+		return FILE_ACTION_NONE;
+	}
+
+	pg_log_debug("WAL file entry \"%s\" is copied to target", fname);
+	return FILE_ACTION_COPY;
+}
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 9cea144d2b2..421b4201998 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -14,6 +14,7 @@
 #include "access/timeline.h"
 #include "common/logging.h"
 #include "common/file_utils.h"
+#include "filemap.h"
 
 /* Configuration options */
 extern char *datadir_target;
@@ -45,6 +46,7 @@ extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
 
 /* in pg_rewind.c */
 extern void progress_report(bool finished);
+extern file_action_t decide_wal_file_action(const char *fname);
 
 /* in timeline.c */
 extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl
index 28496afe350..ea279a9ee4b 100644
--- a/src/bin/pg_rewind/t/008_min_recovery_point.pl
+++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl
@@ -71,6 +71,9 @@ $node_3->start;
 # Wait until node 3 has connected and caught up
 $node_1->wait_for_catchup('node_3');
 
+# Current common WAL segment
+my $wal_seg_skipped = $node_1->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+
 #
 # Swap the roles of node_1 and node_3, so that node_1 follows node_3.
 #
@@ -93,6 +96,10 @@ primary_conninfo='$node_3_connstr'
 ));
 $node_2->restart();
 
+# Confirm WAL segment has advanced for promotion
+my $advanced_wal_segment = $node_3->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+isnt($wal_seg_skipped, $advanced_wal_segment, "Expected WAL segment to have advanced");
+
 #
 # Promote node_1, to create a split-brain scenario.
 #
@@ -140,14 +147,21 @@ copy(
 	"$node_2_pgdata/postgresql.conf",
 	"$tmp_folder/node_2-postgresql.conf.tmp");
 
-command_ok(
+command_checks_all(
 	[
 		'pg_rewind',
 		'--source-server' => $node_1_connstr,
 		'--target-pgdata' => $node_2_pgdata,
 		'--debug',
 	],
-	'run pg_rewind');
+	0,
+	[
+		#  qr/"WAL file entry $wal_seg_skipped not copied to target"/
+		qr//
+	],
+	[qr//],
+	'run pg_rewind'
+);
 
 # Now move back postgresql.conf with old settings
 move(
-- 
2.47.1

Reply via email to