(Added Jeevan in CC, for awareness)

On Mon, Jan 03, 2022 at 03:35:57PM +0000, gkokola...@pm.me wrote:
> I propose to initialize streamer to NULL when declared at the top of
> CreateBackupStreamer().

Yes, that may be noisy.  Done this way.

> You can see that after the check_pg_config() test, only 3 tests follow,
> namely:
>  *  $node->command_ok()
>  *  is(scalar(), ...)
>  *  is($gzip_is_valid, ...)

Indeed.  The CF bot was complaining about that, actually.

Thinking more about this stuff, pg_basebackup --compress is an option
that exists already for a couple of years, and that's independent of
the backend-side compression that Robert and Jeevan are working on, so
I'd like to move on this code cleanup.  We can always figure out the
LZ4 part for pg_basebackup after, if necessary.

Attached is an updated patch.  The CF bot should improve with that.
--
Michael
From a216d5569fa6d963c2f846d9f5539d5ea7ddbd62 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 5 Jan 2022 16:58:00 +0900
Subject: [PATCH v2] Refactor options of pg_basebackup for compression level
 and methods

---
 src/bin/pg_basebackup/pg_basebackup.c        | 81 +++++++++++++++-----
 src/bin/pg_basebackup/t/010_pg_basebackup.pl | 47 +++++++++++-
 src/bin/pg_basebackup/walmethods.c           | 47 ++++++++----
 doc/src/sgml/ref/pg_basebackup.sgml          | 22 +++++-
 4 files changed, 156 insertions(+), 41 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1739ac6382..7f29200832 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -113,6 +113,7 @@ static bool showprogress = false;
 static bool estimatesize = true;
 static int	verbose = 0;
 static int	compresslevel = 0;
+static WalCompressionMethod compression_method = COMPRESSION_NONE;
 static IncludeWal includewal = STREAM_WAL;
 static bool fastcheckpoint = false;
 static bool writerecoveryconf = false;
@@ -359,7 +360,9 @@ usage(void)
 	printf(_("  -X, --wal-method=none|fetch|stream\n"
 			 "                         include required WAL files with specified method\n"));
 	printf(_("  -z, --gzip             compress tar output\n"));
-	printf(_("  -Z, --compress=0-9     compress tar output with given compression level\n"));
+	printf(_("  -Z, --compress=1-9     compress tar output with given compression level\n"));
+	printf(_("      --compression-method=METHOD\n"
+			 "                         method to compress data\n"));
 	printf(_("\nGeneral options:\n"));
 	printf(_("  -c, --checkpoint=fast|spread\n"
 			 "                         set fast or spread checkpointing\n"));
@@ -524,7 +527,7 @@ LogStreamerMain(logstreamer_param *param)
 													stream.do_sync);
 	else
 		stream.walmethod = CreateWalTarMethod(param->xlog,
-											  COMPRESSION_NONE, /* ignored */
+											  compression_method,
 											  compresslevel,
 											  stream.do_sync);
 
@@ -975,7 +978,7 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 					 bool is_recovery_guc_supported,
 					 bool expect_unterminated_tarfile)
 {
-	bbstreamer *streamer;
+	bbstreamer *streamer = NULL;
 	bbstreamer *manifest_inject_streamer = NULL;
 	bool		inject_manifest;
 	bool		must_parse_archive;
@@ -1034,19 +1037,22 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 			archive_file = NULL;
 		}
 
+		if (compression_method == COMPRESSION_NONE)
+			streamer = bbstreamer_plain_writer_new(archive_filename,
+												   archive_file);
 #ifdef HAVE_LIBZ
-		if (compresslevel != 0)
+		else if (compression_method == COMPRESSION_GZIP)
 		{
 			strlcat(archive_filename, ".gz", sizeof(archive_filename));
 			streamer = bbstreamer_gzip_writer_new(archive_filename,
 												  archive_file,
 												  compresslevel);
 		}
-		else
 #endif
-			streamer = bbstreamer_plain_writer_new(archive_filename,
-												   archive_file);
-
+		else
+		{
+			Assert(false);		/* not reachable */
+		}
 
 		/*
 		 * If we need to parse the archive for whatever reason, then we'll
@@ -1765,6 +1771,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"compression-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -1872,14 +1879,10 @@ main(int argc, char **argv)
 				do_sync = false;
 				break;
 			case 'z':
-#ifdef HAVE_LIBZ
-				compresslevel = Z_DEFAULT_COMPRESSION;
-#else
-				compresslevel = 1;	/* will be rejected below */
-#endif
+				compression_method = COMPRESSION_GZIP;
 				break;
 			case 'Z':
-				if (!option_parse_int(optarg, "-Z/--compress", 0, 9,
+				if (!option_parse_int(optarg, "-Z/--compress", 1, 9,
 									  &compresslevel))
 					exit(1);
 				break;
@@ -1941,6 +1944,18 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (pg_strcasecmp(optarg, "gzip") == 0)
+					compression_method = COMPRESSION_GZIP;
+				else if (pg_strcasecmp(optarg, "none") == 0)
+					compression_method = COMPRESSION_NONE;
+				else
+				{
+					pg_log_error("invalid value \"%s\" for option %s",
+								 optarg, "--compression-method");
+					exit(1);
+				}
+				break;
 			default:
 
 				/*
@@ -1978,7 +1993,7 @@ main(int argc, char **argv)
 	/*
 	 * Mutually exclusive arguments
 	 */
-	if (format == 'p' && compresslevel != 0)
+	if (format == 'p' && compression_method != COMPRESSION_NONE)
 	{
 		pg_log_error("only tar mode backups can be compressed");
 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
@@ -2056,13 +2071,39 @@ main(int argc, char **argv)
 		}
 	}
 
-#ifndef HAVE_LIBZ
-	if (compresslevel != 0)
+	/*
+	 * Compression-related options.
+	 */
+	switch (compression_method)
 	{
-		pg_log_error("this build does not support compression");
-		exit(1);
-	}
+		case COMPRESSION_NONE:
+			if (compresslevel != 0)
+			{
+				pg_log_error("cannot use --compress with --compression-method=%s",
+							 "none");
+				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+						progname);
+				exit(1);
+			}
+			break;
+		case COMPRESSION_GZIP:
+#ifdef HAVE_LIBZ
+			if (compresslevel == 0)
+			{
+				pg_log_info("no value specified for --compress, switching to default");
+				compresslevel = Z_DEFAULT_COMPRESSION;
+			}
+#else
+			pg_log_error("this build does not support compression with %s",
+						 "gzip");
+			exit(1);
 #endif
+			break;
+		case COMPRESSION_LZ4:
+			/* option not supported */
+			Assert(false);
+			break;
+	}
 
 	if (showprogress && !estimatesize)
 	{
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index e56825382c..e5fec5f813 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -10,7 +10,7 @@ use File::Path qw(rmtree);
 use Fcntl qw(:seek);
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 110;
+use Test::More tests => 115;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -31,6 +31,17 @@ my $pgdata = $node->data_dir;
 $node->command_fails(['pg_basebackup'],
 	'pg_basebackup needs target directory specified');
 
+# Sanity checks for options
+$node->command_fails_like(
+	[
+		'pg_basebackup',   '-D',
+		"$tempdir/backup", '--compression-method',
+		'none',            '--compress',
+		'1'
+	],
+	qr/\Qpg_basebackup: error: cannot use --compress with --compression-method=none/,
+	'failure if --compress specified with --compression-method=none');
+
 # Some Windows ANSI code pages may reject this filename, in which case we
 # quietly proceed without this bit of test coverage.
 if (open my $badchars, '>>', "$tempdir/pgdata/FOO\xe0\xe0\xe0BAR")
@@ -624,3 +635,37 @@ rmtree("$tempdir/backup_corrupt4");
 
 $node->safe_psql('postgres', "DROP TABLE corrupt1;");
 $node->safe_psql('postgres', "DROP TABLE corrupt2;");
+
+note "Testing pg_basebackup with compression methods";
+
+# Check ZLIB compression if available.
+SKIP:
+{
+	skip "postgres was not built with ZLIB support", 3
+	  if (!check_pg_config("#define HAVE_LIBZ 1"));
+
+	$node->command_ok(
+		[
+			'pg_basebackup',        '-D',
+			"$tempdir/backup_gzip", '--compression-method',
+			'gzip', '--compress', '1', '--no-sync', '--format', 't'
+		],
+		'pg_basebackup with --compress and --compression-method=gzip');
+
+	# Verify that the stored files are generated with their expected
+	# names.
+	my @zlib_files = glob "$tempdir/backup_gzip/*.tar.gz";
+	is(scalar(@zlib_files), 2,
+		"two files created with gzip (base.tar.gz and pg_wal.tar.gz)");
+
+	# Check the integrity of the files generated.
+	my $gzip = $ENV{GZIP_PROGRAM};
+	skip "program gzip is not found in your system", 1
+	  if ( !defined $gzip
+		|| $gzip eq ''
+		|| system_log($gzip, '--version') != 0);
+
+	my $gzip_is_valid = system_log($gzip, '--test', @zlib_files);
+	is($gzip_is_valid, 0, "gzip verified the integrity of compressed data");
+	rmtree("$tempdir/backup_gzip");
+}
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index affdc5055f..127bb5e013 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -749,7 +749,7 @@ tar_write(Walfile f, const void *buf, size_t count)
 	tar_clear_error();
 
 	/* Tarfile will always be positioned at the end */
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		r = write(tar_data->fd, buf, count);
@@ -833,7 +833,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 
 #ifdef HAVE_LIBZ
-		if (tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_GZIP)
 		{
 			tar_data->zp = (z_streamp) pg_malloc(sizeof(z_stream));
 			tar_data->zp->zalloc = Z_NULL;
@@ -884,7 +884,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	pg_free(tmppath);
 
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Flush existing data */
 		if (!tar_write_compressed_data(NULL, 0, true))
@@ -909,7 +909,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	}
 	tar_data->currentfile->currpos = 0;
 
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, tar_data->currentfile->header,
@@ -923,7 +923,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Write header through the zlib APIs but with no compression */
 		if (!tar_write_compressed_data(tar_data->currentfile->header,
@@ -938,6 +938,10 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	tar_data->currentfile->pathname = pg_strdup(pathname);
 
@@ -948,7 +952,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	if (pad_to_size)
 	{
 		tar_data->currentfile->pad_to_size = pad_to_size;
-		if (!tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_NONE)
 		{
 			/* Uncompressed, so pad now */
 			if (!tar_write_padding_data(tar_data->currentfile, pad_to_size))
@@ -1009,7 +1013,7 @@ tar_sync(Walfile f)
 	 * Always sync the whole tarfile, because that's all we can do. This makes
 	 * no sense on compressed files, so just ignore those.
 	 */
-	if (tar_data->compression_level)
+	if (tar_data->compression_method != COMPRESSION_NONE)
 		return 0;
 
 	r = fsync(tar_data->fd);
@@ -1030,7 +1034,7 @@ tar_close(Walfile f, WalCloseMethod method)
 
 	if (method == CLOSE_UNLINK)
 	{
-		if (tar_data->compression_level)
+		if (tar_data->compression_method != COMPRESSION_NONE)
 		{
 			tar_set_error("unlink not supported with compression");
 			return -1;
@@ -1061,7 +1065,7 @@ tar_close(Walfile f, WalCloseMethod method)
 	 */
 	if (tf->pad_to_size)
 	{
-		if (tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_GZIP)
 		{
 			/*
 			 * A compressed tarfile is padded on close since we cannot know
@@ -1102,7 +1106,7 @@ tar_close(Walfile f, WalCloseMethod method)
 
 
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Flush the current buffer */
 		if (!tar_write_compressed_data(NULL, 0, true))
@@ -1131,7 +1135,7 @@ tar_close(Walfile f, WalCloseMethod method)
 		tar_data->lasterrno = errno;
 		return -1;
 	}
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, tf->header, TAR_BLOCK_SIZE) != TAR_BLOCK_SIZE)
@@ -1142,7 +1146,7 @@ tar_close(Walfile f, WalCloseMethod method)
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Turn off compression */
 		if (deflateParams(tar_data->zp, 0, 0) != Z_OK)
@@ -1164,6 +1168,10 @@ tar_close(Walfile f, WalCloseMethod method)
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	/* Move file pointer back down to end, so we can write the next file */
 	if (lseek(tar_data->fd, 0, SEEK_END) < 0)
@@ -1212,7 +1220,7 @@ tar_finish(void)
 
 	/* A tarfile always ends with two empty blocks */
 	MemSet(zerobuf, 0, sizeof(zerobuf));
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, zerobuf, sizeof(zerobuf)) != sizeof(zerobuf))
@@ -1223,7 +1231,7 @@ tar_finish(void)
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		if (!tar_write_compressed_data(zerobuf, sizeof(zerobuf), false))
 			return false;
@@ -1268,6 +1276,10 @@ tar_finish(void)
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	/* sync the empty blocks as well, since they're after the last file */
 	if (tar_data->sync)
@@ -1312,7 +1324,8 @@ CreateWalTarMethod(const char *tarbase,
 				   int compression_level, bool sync)
 {
 	WalWriteMethod *method;
-	const char *suffix = (compression_level != 0) ? ".tar.gz" : ".tar";
+	const char *suffix = (compression_method == COMPRESSION_GZIP) ?
+	".tar.gz" : ".tar";
 
 	method = pg_malloc0(sizeof(WalWriteMethod));
 	method->open_for_write = tar_open_for_write;
@@ -1335,7 +1348,7 @@ CreateWalTarMethod(const char *tarbase,
 	tar_data->compression_level = compression_level;
 	tar_data->sync = sync;
 #ifdef HAVE_LIBZ
-	if (compression_level)
+	if (compression_method == COMPRESSION_GZIP)
 		tar_data->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
 #endif
 
@@ -1347,7 +1360,7 @@ FreeWalTarMethod(void)
 {
 	pg_free(tar_data->tarfilename);
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 		pg_free(tar_data->zlibOut);
 #endif
 	pg_free(tar_data);
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 9e6807b457..9cbec844c8 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -372,9 +372,9 @@ PostgreSQL documentation
       <listitem>
        <para>
         Enables gzip compression of tar file output, and specifies the
-        compression level (0 through 9, 0 being no compression and 9 being best
-        compression). Compression is only available when using the tar
-        format, and the suffix <filename>.gz</filename> will
+        compression level (1 through 9, 1 being worst compression and 9
+        being best compression). Compression is only available when using
+        the tar format, and the suffix <filename>.gz</filename> will
         automatically be added to all tar filenames.
        </para>
       </listitem>
@@ -567,6 +567,22 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--compression-method=<replaceable class="parameter">method</replaceable></option></term>
+      <listitem>
+       <para>
+        Enables compression of backup data using the specified method.
+        Supported values <literal>gzip</literal> and <literal>none</literal>,
+        the default.
+       </para>
+
+       <para>
+        The suffix <filename>.gz</filename> will automatically be added to
+        all filenames when using <literal>gzip</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+     
      <varlistentry>
       <term><option>--manifest-force-encode</option></term>
       <listitem>
-- 
2.34.1

Attachment: signature.asc
Description: PGP signature

Reply via email to