From f758c8ae6e97140a9ea329f4e7f6ec8bb6271afd Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Thu, 19 Mar 2026 15:43:53 +0530
Subject: [PATCH v22 4/5] pg_verifybackup: Enable WAL parsing for tar-format
 backups

Now that pg_waldump supports reading WAL from tar archives, remove the
restriction that forced --no-parse-wal for tar-format backups.

pg_verifybackup now automatically locates the WAL archive: it looks for
a separate pg_wal.tar first, then falls back to the main base.tar.  A
new --wal-path option (replacing the old --wal-directory, which is kept
as a silent alias) accepts either a directory or a tar archive path.

The default WAL directory preparation is deferred until the backup
format is known, since tar-format backups resolve the WAL path
differently from plain-format ones.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Euler Taveira <euler@eulerto.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
discussion: https://postgr.es/m/CAAJ_b94bqdWN3h2J-PzzzQ2Npbwct5ZQHggn_QoYGhC2rn-=WQ@mail.gmail.com
---
 doc/src/sgml/ref/pg_verifybackup.sgml         | 14 ++-
 src/bin/pg_verifybackup/pg_verifybackup.c     | 96 ++++++++++++-------
 src/bin/pg_verifybackup/t/002_algorithm.pl    |  4 -
 src/bin/pg_verifybackup/t/003_corruption.pl   |  4 +-
 src/bin/pg_verifybackup/t/007_wal.pl          | 20 +++-
 src/bin/pg_verifybackup/t/008_untar.pl        |  5 +-
 src/bin/pg_verifybackup/t/010_client_untar.pl |  5 +-
 7 files changed, 91 insertions(+), 57 deletions(-)

diff --git a/doc/src/sgml/ref/pg_verifybackup.sgml b/doc/src/sgml/ref/pg_verifybackup.sgml
index 61c12975e4a..1695cfe91c8 100644
--- a/doc/src/sgml/ref/pg_verifybackup.sgml
+++ b/doc/src/sgml/ref/pg_verifybackup.sgml
@@ -36,10 +36,7 @@ PostgreSQL documentation
    <literal>backup_manifest</literal> generated by the server at the time
    of the backup. The backup may be stored either in the "plain" or the "tar"
    format; this includes tar-format backups compressed with any algorithm
-   supported by <application>pg_basebackup</application>. However, at present,
-   <literal>WAL</literal> verification is supported only for plain-format
-   backups. Therefore, if the backup is stored in tar-format, the
-   <literal>-n, --no-parse-wal</literal> option should be used.
+   supported by <application>pg_basebackup</application>.
   </para>
 
   <para>
@@ -261,12 +258,13 @@ PostgreSQL documentation
 
      <varlistentry>
       <term><option>-w <replaceable class="parameter">path</replaceable></option></term>
-      <term><option>--wal-directory=<replaceable class="parameter">path</replaceable></option></term>
+      <term><option>--wal-path=<replaceable class="parameter">path</replaceable></option></term>
       <listitem>
        <para>
-        Try to parse WAL files stored in the specified directory, rather than
-        in <literal>pg_wal</literal>. This may be useful if the backup is
-        stored in a separate location from the WAL archive.
+        Try to parse WAL files stored in the specified directory or tar
+        archive, rather than in <literal>pg_wal</literal>. This may be
+        useful if the backup is stored in a separate location from the WAL
+        archive.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index 31f606c45b1..b60ab8739d5 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -74,7 +74,9 @@ pg_noreturn static void report_manifest_error(JsonManifestParseContext *context,
 											  const char *fmt,...)
 			pg_attribute_printf(2, 3);
 
-static void verify_tar_backup(verifier_context *context, DIR *dir);
+static void verify_tar_backup(verifier_context *context, DIR *dir,
+							  char **base_archive_path,
+							  char **wal_archive_path);
 static void verify_plain_backup_directory(verifier_context *context,
 										  char *relpath, char *fullpath,
 										  DIR *dir);
@@ -83,7 +85,9 @@ static void verify_plain_backup_file(verifier_context *context, char *relpath,
 static void verify_control_file(const char *controlpath,
 								uint64 manifest_system_identifier);
 static void precheck_tar_backup_file(verifier_context *context, char *relpath,
-									 char *fullpath, SimplePtrList *tarfiles);
+									 char *fullpath, SimplePtrList *tarfiles,
+									 char **base_archive_path,
+									 char **wal_archive_path);
 static void verify_tar_file(verifier_context *context, char *relpath,
 							char *fullpath, astreamer *streamer);
 static void report_extra_backup_files(verifier_context *context);
@@ -93,7 +97,7 @@ static void verify_file_checksum(verifier_context *context,
 								 uint8 *buffer);
 static void parse_required_wal(verifier_context *context,
 							   char *pg_waldump_path,
-							   char *wal_directory);
+							   char *wal_path);
 static astreamer *create_archive_verifier(verifier_context *context,
 										  char *archive_name,
 										  Oid tblspc_oid,
@@ -126,7 +130,8 @@ main(int argc, char **argv)
 		{"progress", no_argument, NULL, 'P'},
 		{"quiet", no_argument, NULL, 'q'},
 		{"skip-checksums", no_argument, NULL, 's'},
-		{"wal-directory", required_argument, NULL, 'w'},
+		{"wal-path", required_argument, NULL, 'w'},
+		{"wal-directory", required_argument, NULL, 'w'},	/* deprecated */
 		{NULL, 0, NULL, 0}
 	};
 
@@ -135,7 +140,9 @@ main(int argc, char **argv)
 	char	   *manifest_path = NULL;
 	bool		no_parse_wal = false;
 	bool		quiet = false;
-	char	   *wal_directory = NULL;
+	char	   *wal_path = NULL;
+	char	   *base_archive_path = NULL;
+	char	   *wal_archive_path = NULL;
 	char	   *pg_waldump_path = NULL;
 	DIR		   *dir;
 
@@ -221,8 +228,8 @@ main(int argc, char **argv)
 				context.skip_checksums = true;
 				break;
 			case 'w':
-				wal_directory = pstrdup(optarg);
-				canonicalize_path(wal_directory);
+				wal_path = pstrdup(optarg);
+				canonicalize_path(wal_path);
 				break;
 			default:
 				/* getopt_long already emitted a complaint */
@@ -285,10 +292,6 @@ main(int argc, char **argv)
 		manifest_path = psprintf("%s/backup_manifest",
 								 context.backup_directory);
 
-	/* By default, look for the WAL in the backup directory, too. */
-	if (wal_directory == NULL)
-		wal_directory = psprintf("%s/pg_wal", context.backup_directory);
-
 	/*
 	 * Try to read the manifest. We treat any errors encountered while parsing
 	 * the manifest as fatal; there doesn't seem to be much point in trying to
@@ -331,17 +334,6 @@ main(int argc, char **argv)
 		pfree(path);
 	}
 
-	/*
-	 * XXX: In the future, we should consider enhancing pg_waldump to read WAL
-	 * files from an archive.
-	 */
-	if (!no_parse_wal && context.format == 't')
-	{
-		pg_log_error("pg_waldump cannot read tar files");
-		pg_log_error_hint("You must use -n/--no-parse-wal when verifying a tar-format backup.");
-		exit(1);
-	}
-
 	/*
 	 * Perform the appropriate type of verification appropriate based on the
 	 * backup format. This will close 'dir'.
@@ -350,7 +342,7 @@ main(int argc, char **argv)
 		verify_plain_backup_directory(&context, NULL, context.backup_directory,
 									  dir);
 	else
-		verify_tar_backup(&context, dir);
+		verify_tar_backup(&context, dir, &base_archive_path, &wal_archive_path);
 
 	/*
 	 * The "matched" flag should now be set on every entry in the hash table.
@@ -368,12 +360,35 @@ main(int argc, char **argv)
 	if (context.format == 'p' && !context.skip_checksums)
 		verify_backup_checksums(&context);
 
+	/*
+	 * By default, WAL files are expected to be found in the backup directory
+	 * for plain-format backups. In the case of tar-format backups, if a
+	 * separate WAL archive is not found, the WAL files are most likely
+	 * included within the main data directory archive.
+	 */
+	if (wal_path == NULL)
+	{
+		if (context.format == 'p')
+			wal_path = psprintf("%s/pg_wal", context.backup_directory);
+		else if (wal_archive_path)
+			wal_path = wal_archive_path;
+		else if (base_archive_path)
+			wal_path = base_archive_path;
+		else
+		{
+			pg_log_error("WAL archive not found");
+			pg_log_error_hint("Specify the correct path using the option -w/--wal-path.  "
+							  "Or you must use -n/--no-parse-wal when verifying a tar-format backup.");
+			exit(1);
+		}
+	}
+
 	/*
 	 * Try to parse the required ranges of WAL records, unless we were told
 	 * not to do so.
 	 */
 	if (!no_parse_wal)
-		parse_required_wal(&context, pg_waldump_path, wal_directory);
+		parse_required_wal(&context, pg_waldump_path, wal_path);
 
 	/*
 	 * If everything looks OK, tell the user this, unless we were asked to
@@ -787,7 +802,8 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
  * close when we're done with it.
  */
 static void
-verify_tar_backup(verifier_context *context, DIR *dir)
+verify_tar_backup(verifier_context *context, DIR *dir, char **base_archive_path,
+				  char **wal_archive_path)
 {
 	struct dirent *dirent;
 	SimplePtrList tarfiles = {NULL, NULL};
@@ -816,7 +832,8 @@ verify_tar_backup(verifier_context *context, DIR *dir)
 			char	   *fullpath;
 
 			fullpath = psprintf("%s/%s", context->backup_directory, filename);
-			precheck_tar_backup_file(context, filename, fullpath, &tarfiles);
+			precheck_tar_backup_file(context, filename, fullpath, &tarfiles,
+									 base_archive_path, wal_archive_path);
 			pfree(fullpath);
 		}
 	}
@@ -875,17 +892,21 @@ verify_tar_backup(verifier_context *context, DIR *dir)
  *
  * The arguments to this function are mostly the same as the
  * verify_plain_backup_file. The additional argument outputs a list of valid
- * tar files.
+ * tar files, along with the full paths to the main archive and the WAL
+ * directory archive.
  */
 static void
 precheck_tar_backup_file(verifier_context *context, char *relpath,
-						 char *fullpath, SimplePtrList *tarfiles)
+						 char *fullpath, SimplePtrList *tarfiles,
+						 char **base_archive_path, char **wal_archive_path)
 {
 	struct stat sb;
 	Oid			tblspc_oid = InvalidOid;
 	pg_compress_algorithm compress_algorithm;
 	tar_file   *tar;
 	char	   *suffix = NULL;
+	bool		is_base_archive = false;
+	bool		is_wal_archive = false;
 
 	/* Should be tar format backup */
 	Assert(context->format == 't');
@@ -918,9 +939,15 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
 	 * extension such as .gz, .lz4, or .zst.
 	 */
 	if (strncmp("base", relpath, 4) == 0)
+	{
 		suffix = relpath + 4;
+		is_base_archive = true;
+	}
 	else if (strncmp("pg_wal", relpath, 6) == 0)
+	{
 		suffix = relpath + 6;
+		is_wal_archive = true;
+	}
 	else
 	{
 		/* Expected a <tablespaceoid>.tar file here. */
@@ -953,8 +980,13 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
 	 * Ignore WALs, as reading and verification will be handled through
 	 * pg_waldump.
 	 */
-	if (strncmp("pg_wal", relpath, 6) == 0)
+	if (is_wal_archive)
+	{
+		*wal_archive_path = pstrdup(fullpath);
 		return;
+	}
+	else if (is_base_archive)
+		*base_archive_path = pstrdup(fullpath);
 
 	/*
 	 * Append the information to the list for complete verification at a later
@@ -1188,7 +1220,7 @@ verify_file_checksum(verifier_context *context, manifest_file *m,
  */
 static void
 parse_required_wal(verifier_context *context, char *pg_waldump_path,
-				   char *wal_directory)
+				   char *wal_path)
 {
 	manifest_data *manifest = context->manifest;
 	manifest_wal_range *this_wal_range = manifest->first_wal_range;
@@ -1198,7 +1230,7 @@ parse_required_wal(verifier_context *context, char *pg_waldump_path,
 		char	   *pg_waldump_cmd;
 
 		pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n",
-								  pg_waldump_path, wal_directory, this_wal_range->tli,
+								  pg_waldump_path, wal_path, this_wal_range->tli,
 								  LSN_FORMAT_ARGS(this_wal_range->start_lsn),
 								  LSN_FORMAT_ARGS(this_wal_range->end_lsn));
 		fflush(NULL);
@@ -1366,7 +1398,7 @@ usage(void)
 	printf(_("  -P, --progress              show progress information\n"));
 	printf(_("  -q, --quiet                 do not print any output, except for errors\n"));
 	printf(_("  -s, --skip-checksums        skip checksum verification\n"));
-	printf(_("  -w, --wal-directory=PATH    use specified path for WAL files\n"));
+	printf(_("  -w, --wal-path=PATH         use specified path for WAL files\n"));
 	printf(_("  -V, --version               output version information, then exit\n"));
 	printf(_("  -?, --help                  show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
diff --git a/src/bin/pg_verifybackup/t/002_algorithm.pl b/src/bin/pg_verifybackup/t/002_algorithm.pl
index 0556191ec9d..edc515d5904 100644
--- a/src/bin/pg_verifybackup/t/002_algorithm.pl
+++ b/src/bin/pg_verifybackup/t/002_algorithm.pl
@@ -30,10 +30,6 @@ sub test_checksums
 	{
 		# Add switch to get a tar-format backup
 		push @backup, ('--format' => 'tar');
-
-		# Add switch to skip WAL verification, which is not yet supported for
-		# tar-format backups
-		push @verify, ('--no-parse-wal');
 	}
 
 	# A backup with a bogus algorithm should fail.
diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl
index b1d65b8aa0f..882d75d9dc2 100644
--- a/src/bin/pg_verifybackup/t/003_corruption.pl
+++ b/src/bin/pg_verifybackup/t/003_corruption.pl
@@ -193,10 +193,8 @@ for my $scenario (@scenario)
 			command_ok([ $tar, '-cf' => "$tar_backup_path/base.tar", '.' ]);
 			chdir($cwd) || die "chdir: $!";
 
-			# Now check that the backup no longer verifies. We must use -n
-			# here, because pg_waldump can't yet read WAL from a tarfile.
 			command_fails_like(
-				[ 'pg_verifybackup', '--no-parse-wal', $tar_backup_path ],
+				[ 'pg_verifybackup', $tar_backup_path ],
 				$scenario->{'fails_like'},
 				"corrupt backup fails verification: $name");
 
diff --git a/src/bin/pg_verifybackup/t/007_wal.pl b/src/bin/pg_verifybackup/t/007_wal.pl
index 79087a1f6be..0e0377bfacc 100644
--- a/src/bin/pg_verifybackup/t/007_wal.pl
+++ b/src/bin/pg_verifybackup/t/007_wal.pl
@@ -42,10 +42,10 @@ command_ok([ 'pg_verifybackup', '--no-parse-wal', $backup_path ],
 command_ok(
 	[
 		'pg_verifybackup',
-		'--wal-directory' => $relocated_pg_wal,
+		'--wal-path' => $relocated_pg_wal,
 		$backup_path
 	],
-	'--wal-directory can be used to specify WAL directory');
+	'--wal-path can be used to specify WAL directory');
 
 # Move directory back to original location.
 rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!";
@@ -90,4 +90,20 @@ command_ok(
 	[ 'pg_verifybackup', $backup_path2 ],
 	'valid base backup with timeline > 1');
 
+# Test WAL verification for a tar-format backup with a separate pg_wal.tar,
+# as produced by pg_basebackup --format=tar --wal-method=stream.
+my $backup_path3 = $primary->backup_dir . '/test_tar_wal';
+$primary->command_ok(
+	[
+		'pg_basebackup',
+		'--pgdata' => $backup_path3,
+		'--no-sync',
+		'--format' => 'tar',
+		'--checkpoint' => 'fast'
+	],
+	"tar backup with separate pg_wal.tar");
+command_ok(
+	[ 'pg_verifybackup', $backup_path3 ],
+	'WAL verification succeeds with separate pg_wal.tar');
+
 done_testing();
diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl
index ae67ae85a31..161c08c190d 100644
--- a/src/bin/pg_verifybackup/t/008_untar.pl
+++ b/src/bin/pg_verifybackup/t/008_untar.pl
@@ -47,7 +47,6 @@ my $tsoid = $primary->safe_psql(
 		SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'));
 
 my $backup_path = $primary->backup_dir . '/server-backup';
-my $extract_path = $primary->backup_dir . '/extracted-backup';
 
 my @test_configuration = (
 	{
@@ -123,14 +122,12 @@ for my $tc (@test_configuration)
 		# Verify tar backup.
 		$primary->command_ok(
 			[
-				'pg_verifybackup', '--no-parse-wal',
-				'--exit-on-error', $backup_path,
+				'pg_verifybackup', '--exit-on-error', $backup_path,
 			],
 			"verify backup, compression $method");
 
 		# Cleanup.
 		rmtree($backup_path);
-		rmtree($extract_path);
 	}
 }
 
diff --git a/src/bin/pg_verifybackup/t/010_client_untar.pl b/src/bin/pg_verifybackup/t/010_client_untar.pl
index 1ac7b5db75a..9670fbe4fda 100644
--- a/src/bin/pg_verifybackup/t/010_client_untar.pl
+++ b/src/bin/pg_verifybackup/t/010_client_untar.pl
@@ -32,7 +32,6 @@ print $jf $junk_data;
 close $jf;
 
 my $backup_path = $primary->backup_dir . '/client-backup';
-my $extract_path = $primary->backup_dir . '/extracted-backup';
 
 my @test_configuration = (
 	{
@@ -137,13 +136,11 @@ for my $tc (@test_configuration)
 		# Verify tar backup.
 		$primary->command_ok(
 			[
-				'pg_verifybackup', '--no-parse-wal',
-				'--exit-on-error', $backup_path,
+				'pg_verifybackup', '--exit-on-error', $backup_path,
 			],
 			"verify backup, compression $method");
 
 		# Cleanup.
-		rmtree($extract_path);
 		rmtree($backup_path);
 	}
 }
-- 
2.47.1

