From 41c5bbfcb86dc15b20ce210e727ff4296edfafbd Mon Sep 17 00:00:00 2001
From: Movead <lchch1990@sina.cn>
Date: Thu, 15 Jan 2026 23:01:16 +0800
Subject: [PATCH] Patch enable pg_rewind work without wal_log_hints and
 data_checksums

---
 src/bin/pg_rewind/.gitignore  |   3 +
 src/bin/pg_rewind/Makefile    |  15 ++-
 src/bin/pg_rewind/parsexlog.c | 166 +++++++++++++++++++++++++++++++++-
 src/bin/pg_rewind/pg_rewind.c |  34 ++++---
 src/bin/pg_rewind/pg_rewind.h |   5 +-
 5 files changed, 204 insertions(+), 19 deletions(-)

diff --git a/src/bin/pg_rewind/.gitignore b/src/bin/pg_rewind/.gitignore
index 79ddca3eec9..8f8ab3bcfde 100644
--- a/src/bin/pg_rewind/.gitignore
+++ b/src/bin/pg_rewind/.gitignore
@@ -1,6 +1,9 @@
 # Files generated during build
 /xlogreader.c
 /pg_rewind
+/compat.c
+/xactdesc.c
+/standbydesc.c
 
 # Generated by test suite
 /tmp_check/
diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile
index 32a35c57612..c4608494c6c 100644
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@ -19,7 +19,9 @@ override CPPFLAGS := -I$(libpq_srcdir) -DFRONTEND $(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
 
 OBJS = \
+	$(RMGRDESCOBJS) \
 	$(WIN32RES) \
+	compat.o \
 	datapagemap.o \
 	file_ops.o \
 	filemap.o \
@@ -30,7 +32,10 @@ OBJS = \
 	timeline.o \
 	xlogreader.o
 
-EXTRA_CLEAN = xlogreader.c
+EXTRA_CLEAN = xlogreader.c compat.c
+
+RMGRDESCSOURCES = xactdesc.c standbydesc.c
+RMGRDESCOBJS = $(patsubst %.c,%.o,$(RMGRDESCSOURCES))
 
 all: pg_rewind
 
@@ -40,6 +45,12 @@ pg_rewind: $(OBJS) | submake-libpq submake-libpgport
 xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
 	rm -f $@ && $(LN_S) $< .
 
+compat.c: % : $(top_srcdir)/src/bin/pg_waldump/%
+	rm -f $@ && $(LN_S) $< .
+
+$(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/%
+	rm -f $@ && $(LN_S) $< .
+
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)'
 
@@ -50,7 +61,7 @@ uninstall:
 	rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)'
 
 clean distclean:
-	rm -f pg_rewind$(X) $(OBJS) xlogreader.c
+	rm -f pg_rewind$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c compat.c
 	rm -rf tmp_check
 
 check:
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index db7a7e73042..9c36fa44950 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -23,6 +23,8 @@
 #include "fe_utils/archive.h"
 #include "filemap.h"
 #include "pg_rewind.h"
+#include "storage/standbydefs.h"
+#include "access/transam.h"
 
 /*
  * RmgrNames is an array of the built-in resource manager names, to make error
@@ -38,11 +40,14 @@ static const char *const RmgrNames[RM_MAX_ID + 1] = {
 #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
 						RmgrNames[rmid] : "custom")
 
-static void extractPageInfo(XLogReaderState *record);
+static void extractPageInfo(XLogReaderState *record, bool backward);
+static bool RewindTransactionIdPrecedes(TransactionId id1, TransactionId id2);
 
 static int	xlogreadfd = -1;
 static XLogSegNo xlogreadsegno = 0;
 static char xlogfpath[MAXPGPATH];
+static TransactionId min_commited_xid = InvalidTransactionId;
+static TransactionId base_xid = InvalidTransactionId;
 
 typedef struct XLogPageReadPrivate
 {
@@ -63,7 +68,7 @@ static int	SimpleXLogPageRead(XLogReaderState *xlogreader,
  * 'endpoint' is the first one that is not read.
  */
 void
-extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+extractPageMapForward(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 			   XLogRecPtr endpoint, const char *restoreCommand)
 {
 	XLogRecord *record;
@@ -97,7 +102,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 						 LSN_FORMAT_ARGS(errptr));
 		}
 
-		extractPageInfo(xlogreader);
+		extractPageInfo(xlogreader, false);
 	} while (xlogreader->EndRecPtr < endpoint);
 
 	/*
@@ -116,6 +121,72 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 	}
 }
 
+void
+extractPageMapBackward(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+				   const char *restoreCommand)
+{
+	/* Walk backwards, starting from the given record */
+	XLogRecord *record;
+	XLogRecPtr	searchptr;
+	XLogReaderState *xlogreader;
+	char	   *errormsg;
+	XLogPageReadPrivate private;
+
+	/*
+	 * The given fork pointer points to the end of the last common record,
+	 * which is not necessarily the beginning of the next record, if the
+	 * previous record happens to end at a page boundary. Skip over the page
+	 * header in that case to find the next record.
+	 */
+	if (startpoint % XLOG_BLCKSZ == 0)
+	{
+		if (XLogSegmentOffset(startpoint, WalSegSz) == 0)
+			startpoint += SizeOfXLogLongPHD;
+		else
+			startpoint += SizeOfXLogShortPHD;
+	}
+
+	private.tliIndex = tliIndex;
+	private.restoreCommand = restoreCommand;
+	xlogreader = XLogReaderAllocate(WalSegSz, datadir,
+									XL_ROUTINE(.page_read = &SimpleXLogPageRead),
+									&private);
+	if (xlogreader == NULL)
+		pg_fatal("out of memory while allocating a WAL reading processor");
+
+	searchptr = startpoint;
+	for (;;)
+	{
+		XLogBeginRead(xlogreader, searchptr);
+		record = XLogReadRecord(xlogreader, &errormsg);
+
+		if (record == NULL)
+		{
+			if (errormsg)
+				pg_fatal("could not find previous WAL record at %X/%X: %s",
+						 LSN_FORMAT_ARGS(searchptr),
+						 errormsg);
+			else
+				pg_fatal("could not find previous WAL record at %X/%X",
+						 LSN_FORMAT_ARGS(searchptr));
+		}
+
+		extractPageInfo(xlogreader, true);
+		/* We can break if met a safety snapshot */
+		if (base_xid <= min_commited_xid)
+			break;
+		/* Walk backwards to previous record. */
+		searchptr = record->xl_prev;
+	}
+
+	XLogReaderFree(xlogreader);
+	if (xlogreadfd != -1)
+	{
+		close(xlogreadfd);
+		xlogreadfd = -1;
+	}
+}
+
 /*
  * Reads one WAL record. Returns the end position of the record, without
  * doing anything with the record itself.
@@ -270,6 +341,17 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 	}
 }
 
+bool check_rewind_safety(void)
+{
+	/* If no committed transactions, it's safe to rewind */
+	if (min_commited_xid == InvalidTransactionId)
+		return true;
+	/* If base_xid is less than or equal to min_commited_xid, it's safe */
+	if (base_xid <= min_commited_xid)
+		return true;
+	return false;
+}
+
 /* XLogReader callback function, to read a WAL page */
 static int
 SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
@@ -382,11 +464,66 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	return XLOG_BLCKSZ;
 }
 
+static void
+track_rewind_snapshot(XLogReaderState *record, bool backward)
+{
+	uint8		info = 0;
+	RmgrId		rmid = XLogRecGetRmid(record);
+
+	if (rmid != RM_XACT_ID && rmid != RM_STANDBY_ID)
+		return;
+
+	if (rmid == RM_XACT_ID)
+	{
+		xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
+		xl_xact_parsed_commit parsed;
+		TransactionId current_xid = InvalidTransactionId;
+
+		/* We finished tracking during forward read phase */
+		if(backward)
+			return;
+
+		info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
+		ParseCommitRecord(XLogRecGetInfo(record), xlrec, &parsed);
+		if (info == XLOG_XACT_COMMIT)
+		{
+			current_xid = XLogRecGetXid(record);
+		}
+		else if (info == XLOG_XACT_COMMIT_PREPARED)
+		{
+			current_xid = parsed.twophase_xid;
+		}
+		if(min_commited_xid == InvalidTransactionId ||
+		   RewindTransactionIdPrecedes(current_xid, min_commited_xid))
+		{
+			min_commited_xid = current_xid;
+			if (showprogress)
+				pg_log_info("Get min committed xid: %u", min_commited_xid);
+		}
+	}
+	else if (rmid == RM_STANDBY_ID)
+	{
+		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+		if (info == XLOG_RUNNING_XACTS)
+		{
+			xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
+
+			if(base_xid == InvalidTransactionId || RewindTransactionIdPrecedes(xlrec->nextXid, base_xid))
+			{
+				base_xid = xlrec->nextXid;
+				if (showprogress)
+					pg_log_info("Get base xid: %u at %X/%X", base_xid, LSN_FORMAT_ARGS(record->ReadRecPtr));
+			}
+		}
+	}
+}
+
 /*
  * Extract information on which blocks the current record modifies.
  */
 static void
-extractPageInfo(XLogReaderState *record)
+extractPageInfo(XLogReaderState *record, bool backward)
 {
 	int			block_id;
 	RmgrId		rmid = XLogRecGetRmid(record);
@@ -464,6 +601,8 @@ extractPageInfo(XLogReaderState *record)
 				 rmid, RmgrName(rmid), info);
 	}
 
+	track_rewind_snapshot(record, backward);
+
 	for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
 	{
 		RelFileLocator rlocator;
@@ -481,3 +620,22 @@ extractPageInfo(XLogReaderState *record)
 		process_target_wal_block_change(forknum, rlocator, blkno);
 	}
 }
+
+/*
+ * RewindTransactionIdPrecedes --- is id1 logically < id2?
+ */
+static bool
+RewindTransactionIdPrecedes(TransactionId id1, TransactionId id2)
+{
+	/*
+	 * If either ID is a permanent XID then we can just do unsigned
+	 * comparison.  If both are normal, do a modulo-2^32 comparison.
+	 */
+	int32		diff;
+
+	if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
+		return (id1 < id2);
+
+	diff = (int32) (id1 - id2);
+	return (diff < 0);
+}
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 31693843b3c..18266d688b1 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -74,6 +74,7 @@ static bool debug = false;
 bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
+bool		deep_dig = false;
 static bool restore_wal = false;
 DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
@@ -103,6 +104,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync                  do not wait for changes to be written\n"
 			 "                                 safely to disk\n"));
 	printf(_("  -P, --progress                 write progress messages\n"));
+	printf(_("  -d, --deep-dig                 perform a deep dig for more wal\n"));
 	printf(_("  -R, --write-recovery-conf      write configuration for replication\n"
 			 "                                 (requires --source-server)\n"));
 	printf(_("      --config-file=FILENAME     use specified main server configuration\n"
@@ -135,6 +137,7 @@ main(int argc, char **argv)
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
 		{"sync-method", required_argument, NULL, 6},
+		{"deep-dig", no_argument, NULL, 'd'},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -153,6 +156,7 @@ main(int argc, char **argv)
 	bool		no_ensure_shutdown = false;
 	bool		rewind_needed;
 	bool		writerecoveryconf = false;
+	bool		page_consistence = false;
 	filemap_t  *filemap;
 
 	pg_logging_init(argv[0]);
@@ -174,7 +178,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "cD:nNPRd", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -227,6 +231,9 @@ main(int argc, char **argv)
 				if (!parse_sync_method(optarg, &sync_method))
 					exit(1);
 				break;
+			case 'd':
+				deep_dig = true;
+				break;
 
 			default:
 				/* getopt_long already emitted a complaint */
@@ -351,6 +358,8 @@ main(int argc, char **argv)
 	pg_free(buffer);
 
 	sanityChecks();
+	page_consistence = ControlFile_target.data_checksum_version == PG_DATA_CHECKSUM_VERSION ||
+		ControlFile_target.wal_log_hints;
 
 	/*
 	 * Usually, the TLI can be found in the latest checkpoint record. But if
@@ -494,8 +503,19 @@ main(int argc, char **argv)
 	 */
 	if (showprogress)
 		pg_log_info("reading WAL in target");
-	extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
+	extractPageMapForward(datadir_target, chkptrec, lastcommontliIndex,
 				   target_wal_endrec, restore_command);
+	if(!page_consistence && !check_rewind_safety())
+	{
+		/*
+		 * Next time we will search more wal which will spend more time, and
+		 * if user prefer a deep dig we will do it. Otherwise, we just stop here.
+		 */
+		if(!deep_dig)
+			pg_fatal("Can not safety rewind the target cluster, use --deep-dig to have a try.");
+		extractPageMapBackward(datadir_target, chkptrec, lastcommontliIndex,
+							   restore_command);
+	}
 
 	/*
 	 * We have collected all information we need from both systems. Decide
@@ -758,16 +778,6 @@ sanityChecks(void)
 		pg_fatal("clusters are not compatible with this version of pg_rewind");
 	}
 
-	/*
-	 * Target cluster need to use checksums or hint bit wal-logging, this to
-	 * prevent from data corruption that could occur because of hint bits.
-	 */
-	if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
-		!ControlFile_target.wal_log_hints)
-	{
-		pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
-	}
-
 	/*
 	 * Target cluster better not be running. This doesn't guard against
 	 * someone starting the cluster concurrently. Also, this is probably more
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 9a981f7f246..01dfc47fcfa 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -32,9 +32,11 @@ extern uint64 fetch_size;
 extern uint64 fetch_done;
 
 /* in parsexlog.c */
-extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
+extern void extractPageMapForward(const char *datadir, XLogRecPtr startpoint,
 						   int tliIndex, XLogRecPtr endpoint,
 						   const char *restoreCommand);
+extern void extractPageMapBackward(const char *datadir, XLogRecPtr startpoint,
+							int tliIndex, const char *restoreCommand);
 extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
 							   int tliIndex,
 							   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
@@ -42,6 +44,7 @@ extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
 							   const char *restoreCommand);
 extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
 								int tliIndex, const char *restoreCommand);
+extern bool check_rewind_safety(void);
 
 /* in pg_rewind.c */
 extern void progress_report(bool finished);
-- 
2.47.2

