From c8c5d5809b8f75217af84bfc6902a8d8ec066841 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 22 Apr 2024 14:52:02 -0400
Subject: [PATCH v1] pg_combinebackup: Detect checksum mismatches and document
 limitation.

If not all backups have the same checksum status, but the final backup
has checksums enabled, then the output directory may include pages
with invalid checksums. Document this limitation and explain how to
work around it.

In a future release, we may want to teach pg_combinebackup to
recompute page checksums when required, but as feature freeze has come
and gone, it seems a bit too late to do that for this release.
---
 doc/src/sgml/backup.sgml                    |  6 +++++-
 doc/src/sgml/ref/pg_combinebackup.sgml      | 20 +++++++++++++++++++
 src/bin/pg_combinebackup/pg_combinebackup.c | 22 +++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index b3468eea3c..91da3c26ba 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -892,7 +892,11 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 &amp;&amp; cp pg_wal/0
     only the incremental backup itself but also all earlier backups that
     are required to supply the blocks omitted from the incremental backup.
     See <xref linkend="app-pgcombinebackup"/> for further information about
-    this requirement.
+    this requirement. Note that there are restrictions on the use of
+    <literal>pg_combinebackup</literal> when the checksum status of the
+    cluster has been changed; see
+    <link linkend="app-pgcombinebackup-limitations">pg_combinebackup
+    limitations</link>.
    </para>
 
    <para>
diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml
index def1e38ae6..fcf3e25460 100644
--- a/doc/src/sgml/ref/pg_combinebackup.sgml
+++ b/doc/src/sgml/ref/pg_combinebackup.sgml
@@ -261,6 +261,26 @@ PostgreSQL documentation
 
  </refsect1>
 
+ <refsect1 id="app-pgcombinebackup-limitations">
+  <title>Limitations</title>
+
+  <para>
+   <literal>pg_combinebackup</literal> does not recompute page checksums when
+   writing the output directory. Therefore, if any of the backups used for
+   reconstruction were taken with checksums disabled, but the final backup was
+   taken with checksums enabled, the resulting directory may contain pages
+   with invalid checksums.
+  </para>
+
+  <para>
+   To avoid this problem, taking a new full backup after changing the checksum
+   state of the cluster using <xref linkend="app-pgchecksums "/> is
+   recommended. Otherwise, you can disable and then optionally reenable
+   checksums on the directory produced by <literal>pg_combinebackup</literal>
+   in order to correct the problem.
+  </para>
+ </refsect1>
+
  <refsect1>
   <title>Environment</title>
 
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index 95da1b01bc..232e16036f 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -583,6 +583,8 @@ check_control_files(int n_backups, char **backup_dirs)
 {
 	int			i;
 	uint64		system_identifier = 0;	/* placate compiler */
+	uint32		data_checksum_version = 0;	/* placate compiler */
+	bool		data_checksum_mismatch = false;
 
 	/* Try to read each control file in turn, last to first. */
 	for (i = n_backups - 1; i >= 0; --i)
@@ -612,6 +614,16 @@ check_control_files(int n_backups, char **backup_dirs)
 					 controlpath, (unsigned long long) system_identifier,
 					 (unsigned long long) control_file->system_identifier);
 
+		/*
+		 * Detect checksum mismatches, but only if the last backup in the
+		 * chain has checksums enabled.
+		 */
+		if (i == n_backups - 1)
+			data_checksum_version = control_file->data_checksum_version;
+		else if (data_checksum_version != 0 &&
+				 data_checksum_version != control_file->data_checksum_version)
+			data_checksum_mismatch = true;
+
 		/* Release memory. */
 		pfree(control_file);
 		pfree(controlpath);
@@ -624,6 +636,16 @@ check_control_files(int n_backups, char **backup_dirs)
 	pg_log_debug("system identifier is %llu",
 				 (unsigned long long) system_identifier);
 
+	/*
+	 * Warn the user if not all backups are in the same state with regards to
+	 * checksums.
+	 */
+	if (data_checksum_mismatch)
+	{
+		pg_log_warning("only some backups have checksums enabled");
+		pg_log_warning_hint("disable, and optionally reenable, checksums on the output directory to avoid failures");
+	}
+
 	return system_identifier;
 }
 
-- 
2.39.3 (Apple Git-145)

