From 2b3fec35c1070e187ee71ee7fdaa76bef09e076f Mon Sep 17 00:00:00 2001
From: Amul Sul <sulamul@gmail.com>
Date: Tue, 17 Feb 2026 14:51:11 +0530
Subject: [PATCH v19 1/4] Move tar detection and compression logic to common.

Consolidate tar archive identification and compression-type detection
logic into a shared location. Currently used by pg_basebackup and
pg_verifybackup, this functionality is also required for upcoming
pg_waldump enhancements.

This change promotes code reuse and simplifies maintenance across
frontend tools.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Euler Taveira <euler@eulerto.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
discussion: https://postgr.es/m/CAAJ_b94bqdWN3h2J-PzzzQ2Npbwct5ZQHggn_QoYGhC2rn-=WQ@mail.gmail.com
---
 src/bin/pg_basebackup/pg_basebackup.c     | 36 +++++++----------------
 src/bin/pg_verifybackup/pg_verifybackup.c | 12 +-------
 src/common/compression.c                  | 30 +++++++++++++++++++
 src/include/common/compression.h          |  2 ++
 4 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index fa169a8d642..c1a4672aa6f 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1070,12 +1070,9 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 	astreamer  *manifest_inject_streamer = NULL;
 	bool		inject_manifest;
 	bool		is_tar,
-				is_tar_gz,
-				is_tar_lz4,
-				is_tar_zstd,
 				is_compressed_tar;
+	pg_compress_algorithm compressed_tar_algorithm;
 	bool		must_parse_archive;
-	int			archive_name_len = strlen(archive_name);
 
 	/*
 	 * Normally, we emit the backup manifest as a separate file, but when
@@ -1084,24 +1081,13 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 	 */
 	inject_manifest = (format == 't' && strcmp(basedir, "-") == 0 && manifest);
 
-	/* Is this a tar archive? */
-	is_tar = (archive_name_len > 4 &&
-			  strcmp(archive_name + archive_name_len - 4, ".tar") == 0);
-
-	/* Is this a .tar.gz archive? */
-	is_tar_gz = (archive_name_len > 7 &&
-				 strcmp(archive_name + archive_name_len - 7, ".tar.gz") == 0);
-
-	/* Is this a .tar.lz4 archive? */
-	is_tar_lz4 = (archive_name_len > 8 &&
-				  strcmp(archive_name + archive_name_len - 8, ".tar.lz4") == 0);
-
-	/* Is this a .tar.zst archive? */
-	is_tar_zstd = (archive_name_len > 8 &&
-				   strcmp(archive_name + archive_name_len - 8, ".tar.zst") == 0);
+	/* Check whether it is a tar archive and its compression type */
+	is_tar = parse_tar_compress_algorithm(archive_name,
+										  &compressed_tar_algorithm);
 
 	/* Is this any kind of compressed tar? */
-	is_compressed_tar = is_tar_gz || is_tar_lz4 || is_tar_zstd;
+	is_compressed_tar = (is_tar &&
+						 compressed_tar_algorithm != PG_COMPRESSION_NONE);
 
 	/*
 	 * Injecting the manifest into a compressed tar file would be possible if
@@ -1128,7 +1114,7 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 						  (spclocation == NULL && writerecoveryconf));
 
 	/* At present, we only know how to parse tar archives. */
-	if (must_parse_archive && !is_tar && !is_compressed_tar)
+	if (must_parse_archive && !is_tar)
 	{
 		pg_log_error("cannot parse archive \"%s\"", archive_name);
 		pg_log_error_detail("Only tar archives can be parsed.");
@@ -1263,13 +1249,13 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 	 * If the user has requested a server compressed archive along with
 	 * archive extraction at client then we need to decompress it.
 	 */
-	if (format == 'p')
+	if (format == 'p' && is_compressed_tar)
 	{
-		if (is_tar_gz)
+		if (compressed_tar_algorithm == PG_COMPRESSION_GZIP)
 			streamer = astreamer_gzip_decompressor_new(streamer);
-		else if (is_tar_lz4)
+		else if (compressed_tar_algorithm == PG_COMPRESSION_LZ4)
 			streamer = astreamer_lz4_decompressor_new(streamer);
-		else if (is_tar_zstd)
+		else if (compressed_tar_algorithm == PG_COMPRESSION_ZSTD)
 			streamer = astreamer_zstd_decompressor_new(streamer);
 	}
 
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index cbc9447384f..31f606c45b1 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -941,17 +941,7 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
 	}
 
 	/* Now, check the compression type of the tar */
-	if (strcmp(suffix, ".tar") == 0)
-		compress_algorithm = PG_COMPRESSION_NONE;
-	else if (strcmp(suffix, ".tgz") == 0)
-		compress_algorithm = PG_COMPRESSION_GZIP;
-	else if (strcmp(suffix, ".tar.gz") == 0)
-		compress_algorithm = PG_COMPRESSION_GZIP;
-	else if (strcmp(suffix, ".tar.lz4") == 0)
-		compress_algorithm = PG_COMPRESSION_LZ4;
-	else if (strcmp(suffix, ".tar.zst") == 0)
-		compress_algorithm = PG_COMPRESSION_ZSTD;
-	else
+	if (!parse_tar_compress_algorithm(suffix, &compress_algorithm))
 	{
 		report_backup_error(context,
 							"file \"%s\" is not expected in a tar format backup",
diff --git a/src/common/compression.c b/src/common/compression.c
index 92cd4ec7a0d..fb27501d297 100644
--- a/src/common/compression.c
+++ b/src/common/compression.c
@@ -41,6 +41,36 @@ static int	expect_integer_value(char *keyword, char *value,
 static bool expect_boolean_value(char *keyword, char *value,
 								 pg_compress_specification *result);
 
+/*
+ * Look up a compression algorithm by archive file extension. Returns true and
+ * sets *algorithm if the name is recognized. Otherwise returns false.
+ */
+bool
+parse_tar_compress_algorithm(const char *fname, pg_compress_algorithm *algorithm)
+{
+	int			fname_len = strlen(fname);
+
+	if (fname_len >= 4 &&
+		strcmp(fname + fname_len - 4, ".tar") == 0)
+		*algorithm = PG_COMPRESSION_NONE;
+	else if (fname_len >= 4 &&
+			 strcmp(fname + fname_len - 4, ".tgz") == 0)
+		*algorithm = PG_COMPRESSION_GZIP;
+	else if (fname_len >= 7 &&
+			 strcmp(fname + fname_len - 7, ".tar.gz") == 0)
+		*algorithm = PG_COMPRESSION_GZIP;
+	else if (fname_len >= 8 &&
+			 strcmp(fname + fname_len - 8, ".tar.lz4") == 0)
+		*algorithm = PG_COMPRESSION_LZ4;
+	else if (fname_len >= 8 &&
+			 strcmp(fname + fname_len - 8, ".tar.zst") == 0)
+		*algorithm = PG_COMPRESSION_ZSTD;
+	else
+		return false;
+
+	return true;
+}
+
 /*
  * Look up a compression algorithm by name. Returns true and sets *algorithm
  * if the name is recognized. Otherwise returns false.
diff --git a/src/include/common/compression.h b/src/include/common/compression.h
index 6c745b90066..f99c747cdd3 100644
--- a/src/include/common/compression.h
+++ b/src/include/common/compression.h
@@ -41,6 +41,8 @@ typedef struct pg_compress_specification
 
 extern void parse_compress_options(const char *option, char **algorithm,
 								   char **detail);
+extern bool parse_tar_compress_algorithm(const char *fname,
+										 pg_compress_algorithm *algorithm);
 extern bool parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm);
 extern const char *get_compress_algorithm_name(pg_compress_algorithm algorithm);
 
-- 
2.47.1

