From 1c2d9a0fc6ff551c1eec0c13f44ece813c0ee7b7 Mon Sep 17 00:00:00 2001
From: John Hsu <johnyvr@gmail.com>
Date: Thu, 3 Jul 2025 19:39:37 +0000
Subject: [PATCH] Avoid copying WAL segments before divergence to speed up
 pg_rewind

Adds a conditional check to avoid unnecessarily copying any
WAL segment files from source to target if they are common
between both servers before the point of WAL divergence
during pg_rewind. On the source server, each WAL file's.
All WAL files that exist on source and target, which fall
before the segment of the first diverged LSN can safely be
skipped from copying to the target.
---
 src/bin/pg_rewind/filemap.c                   | 71 ++++++++++++++++++-
 src/bin/pg_rewind/filemap.h                   | 10 ++-
 src/bin/pg_rewind/pg_rewind.c                 |  5 +-
 src/bin/pg_rewind/pg_rewind.h                 |  4 ++
 .../t/011_avoid_copying_common_wals.pl        | 52 ++++++++++++++
 5 files changed, 137 insertions(+), 5 deletions(-)
 create mode 100644 src/bin/pg_rewind/t/011_avoid_copying_common_wals.pl

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index c933871ca9f..14c7643f0e0 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -199,6 +199,28 @@ filehash_init(void)
 	filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
 }
 
+/* Determine the type of file content (relation, WAL, or other) */
+static file_content_type_t
+getFileContentType(const char *path)
+{
+	if (isRelDataFile(path))
+		return FILE_CONTENT_TYPE_RELATION;
+	else
+	{
+		/* Extract the filename from the path */
+		const char *filename = last_dir_separator(path);
+		if (filename == NULL)
+			filename = path;
+		else
+			filename++;  /* Skip the separator */
+
+		if (IsXLogFileName(filename))
+			return FILE_CONTENT_TYPE_WAL;
+	}
+
+	return FILE_CONTENT_TYPE_OTHER;
+}
+
 /* Look up entry for 'path', creating a new one if it doesn't exist */
 static file_entry_t *
 insert_filehash_entry(const char *path)
@@ -210,7 +232,7 @@ insert_filehash_entry(const char *path)
 	if (!found)
 	{
 		entry->path = pg_strdup(path);
-		entry->isrelfile = isRelDataFile(path);
+		entry->content_type = getFileContentType(path);
 
 		entry->target_exists = false;
 		entry->target_type = FILE_TYPE_UNDEFINED;
@@ -383,7 +405,7 @@ process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
 	 */
 	if (entry)
 	{
-		Assert(entry->isrelfile);
+		Assert(entry->content_type == FILE_CONTENT_TYPE_RELATION);
 
 		if (entry->target_exists)
 		{
@@ -693,6 +715,38 @@ final_filemap_cmp(const void *a, const void *b)
 		return strcmp(fa->path, fb->path);
 }
 
+/*
+ * Decide what to do with a WAL segment file based on its position
+ * relative to the point of divergence.
+ * Caller is responsible for ensuring the file exists on both
+ * source and target.
+ */
+static file_action_t
+decide_wal_file_action(const char *fname)
+{
+	TimeLineID  file_tli;
+	XLogSegNo   file_segno;
+
+	/* Get current WAL segment number given current segment file name */
+	XLogFromFileName(fname, &file_tli, &file_segno, WalSegSz);
+
+	/*
+	 * Avoid copying files before the last common segment.
+	 *
+	 * These files are assumed to exist on source and target.
+	 * Only WAL segment files after the last common segment number on
+	 * the new source need to be copied to the new target.
+	 */
+	if (file_segno < last_common_segno)
+	{
+		pg_log_debug("WAL file entry \"%s\" not copied to target", fname);
+		return FILE_ACTION_NONE;
+	}
+
+	pg_log_debug("WAL file entry \"%s\" is copied to target", fname);
+	return FILE_ACTION_COPY;
+}
+
 /*
  * Decide what action to perform to a file.
  */
@@ -799,7 +853,18 @@ decide_file_action(file_entry_t *entry)
 			return FILE_ACTION_NONE;
 
 		case FILE_TYPE_REGULAR:
-			if (!entry->isrelfile)
+			if (entry->content_type == FILE_CONTENT_TYPE_WAL)
+			{
+				/* Handle WAL segment file */
+				const char *filename = last_dir_separator(entry->path);
+				if (filename == NULL)
+					filename = entry->path;
+				else
+					filename++;  /* Skip the separator */
+
+				return decide_wal_file_action(filename);
+			}
+			else if (entry->content_type != FILE_CONTENT_TYPE_RELATION)
 			{
 				/*
 				 * It's a non-data file that we have no special processing
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index df78a02e3da..53082ceba3c 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -11,6 +11,7 @@
 #include "datapagemap.h"
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
+#include "access/xlogdefs.h"
 
 /* these enum values are sorted in the order we want actions to be processed */
 typedef enum
@@ -36,6 +37,13 @@ typedef enum
 	FILE_TYPE_SYMLINK,
 } file_type_t;
 
+typedef enum
+{
+	FILE_CONTENT_TYPE_OTHER = 0,
+	FILE_CONTENT_TYPE_RELATION,
+	FILE_CONTENT_TYPE_WAL
+} file_content_type_t;
+
 /*
  * For every file found in the local or remote system, we have a file entry
  * that contains information about the file on both systems.  For relation
@@ -51,7 +59,7 @@ typedef struct file_entry_t
 	uint32		status;			/* hash status */
 
 	const char *path;
-	bool		isrelfile;		/* is it a relation data file? */
+	file_content_type_t content_type;
 
 	/*
 	 * Status of the file in the target.
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 0c68dd4235e..002e8f667e0 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -15,7 +15,6 @@
 #include <unistd.h>
 
 #include "access/timeline.h"
-#include "access/xlog_internal.h"
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
 #include "common/controldata_utils.h"
@@ -62,6 +61,7 @@ static ControlFileData ControlFile_source_after;
 
 static const char *progname;
 int			WalSegSz;
+XLogSegNo		last_common_segno;
 
 /* Configuration options */
 char	   *datadir_target = NULL;
@@ -397,6 +397,9 @@ main(int argc, char **argv)
 					LSN_FORMAT_ARGS(divergerec),
 					targetHistory[lastcommontliIndex].tli);
 
+		/* Convert divergence LSN to segment number */
+		XLByteToSeg(divergerec, last_common_segno, ControlFile_target.xlog_seg_size);
+
 		/*
 		 * Don't need the source history anymore. The target history is still
 		 * needed by the routines in parsexlog.c, when we read the target WAL.
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 9cea144d2b2..759be379e12 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -12,6 +12,7 @@
 #define PG_REWIND_H
 
 #include "access/timeline.h"
+#include "access/xlog_internal.h"
 #include "common/logging.h"
 #include "common/file_utils.h"
 
@@ -51,4 +52,7 @@ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
 														 TimeLineID targetTLI,
 														 int *nentries);
 
+/* optimize WAL files copied */
+extern XLogSegNo last_common_segno;
+
 #endif							/* PG_REWIND_H */
diff --git a/src/bin/pg_rewind/t/011_avoid_copying_common_wals.pl b/src/bin/pg_rewind/t/011_avoid_copying_common_wals.pl
new file mode 100644
index 00000000000..3c823fc86e2
--- /dev/null
+++ b/src/bin/pg_rewind/t/011_avoid_copying_common_wals.pl
@@ -0,0 +1,52 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+#
+# Test situation where source and target data directory contains
+# the same WAL files
+#
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+use FindBin;
+use lib $FindBin::RealBin;
+use RewindTest;
+
+RewindTest::setup_cluster();
+RewindTest::start_primary();
+
+RewindTest::create_standby();
+
+# advance WAL on primary; this WAL segment will be common between both
+RewindTest::primary_psql("CREATE TABLE t(a int)");
+RewindTest::primary_psql("INSERT INTO t VALUES(0)");
+
+# Common segment to be skipped copying over
+my $wal_seg_skipped = $node_primary->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+
+RewindTest::primary_psql("SELECT pg_switch_wal()");
+
+# last common checkpoint
+RewindTest::primary_psql("CHECKPOINT");
+
+RewindTest::promote_standby;
+
+$node_standby->stop();
+$node_primary->stop();
+
+command_checks_all(
+	[
+		'pg_rewind', '--debug',
+		'--source-pgdata' => $node_standby->data_dir,
+		'--target-pgdata' => $node_primary->data_dir,
+		'--no-sync',
+	],
+	0,
+	[qr//
+	],
+	[qr/WAL file entry \"$wal_seg_skipped\" not copied to target/],
+	'run pg_rewind'
+);
+
+done_testing();
-- 
2.47.1

