commit 90fbe4d82d38de3083ddff9e50a23481f8d100f0
Author: Jeevan Ladhe <jeevan.ladhe@enterprisedb.com>
Date:   Wed Oct 27 18:04:58 2021 +0530

    V7 LZ4 compression.

diff --git a/src/backend/replication/Makefile b/src/backend/replication/Makefile
index 8ec60ded76..74043ff331 100644
--- a/src/backend/replication/Makefile
+++ b/src/backend/replication/Makefile
@@ -19,6 +19,7 @@ OBJS = \
 	basebackup.o \
 	basebackup_copy.o \
 	basebackup_gzip.o \
+	basebackup_lz4.o \
 	basebackup_progress.o \
 	basebackup_server.o \
 	basebackup_sink.o \
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 5f82993b78..959e13400b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -64,7 +64,8 @@ typedef enum
 typedef enum
 {
 	BACKUP_COMPRESSION_NONE,
-	BACKUP_COMPRESSION_GZIP
+	BACKUP_COMPRESSION_GZIP,
+	BACKUP_COMPRESSION_LZ4
 } basebackup_compression_type;
 
 typedef struct
@@ -899,6 +900,8 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 				opt->compression = BACKUP_COMPRESSION_GZIP;
 				opt->compression_level = optval[4] - '0';
 			}
+			else if (strcmp(optval, "lz4") == 0)
+				opt->compression = BACKUP_COMPRESSION_LZ4;
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_SYNTAX_ERROR),
@@ -1003,6 +1006,8 @@ SendBaseBackup(BaseBackupCmd *cmd)
  	/* Set up server-side compression, if client requested it */
 	if (opt.compression == BACKUP_COMPRESSION_GZIP)
 		sink = bbsink_gzip_new(sink, opt.compression_level);
+	else if (opt.compression == BACKUP_COMPRESSION_LZ4)
+		sink = bbsink_lz4_new(sink);
 
 	/* Set up progress reporting. */
 	sink = bbsink_progress_new(sink, opt.progress);
diff --git a/src/backend/replication/basebackup_lz4.c b/src/backend/replication/basebackup_lz4.c
new file mode 100644
index 0000000000..4a293a17b0
--- /dev/null
+++ b/src/backend/replication/basebackup_lz4.c
@@ -0,0 +1,285 @@
+/*-------------------------------------------------------------------------
+ *
+ * basebackup_lz4.c
+ *	  Basebackup sink implementing lz4 compression.
+ *
+ * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/replication/basebackup_lz4.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#ifdef HAVE_LIBLZ4
+#include <lz4frame.h>
+#endif
+#include <unistd.h>
+
+#include "replication/basebackup_sink.h"
+
+#ifdef HAVE_LIBLZ4
+
+typedef struct bbsink_lz4
+{
+	/* Common information for all types of sink. */
+	bbsink		base;
+
+	LZ4F_compressionContext_t ctx;
+	LZ4F_preferences_t	prefs;
+
+	/* Number of bytes staged in output buffer. */
+	size_t		bytes_written;
+} bbsink_lz4;
+
+static void bbsink_lz4_begin_backup(bbsink *sink);
+static void bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name);
+static void bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in);
+static void bbsink_lz4_manifest_contents(bbsink *sink, size_t len);
+static void bbsink_lz4_end_archive(bbsink *sink);
+static void bbsink_lz4_cleanup(bbsink *sink);
+
+const bbsink_ops bbsink_lz4_ops = {
+	.begin_backup = bbsink_lz4_begin_backup,
+	.begin_archive = bbsink_lz4_begin_archive,
+	.archive_contents = bbsink_lz4_archive_contents,
+	.end_archive = bbsink_lz4_end_archive,
+	.begin_manifest = bbsink_forward_begin_manifest,
+	.manifest_contents = bbsink_lz4_manifest_contents,
+	.end_manifest = bbsink_forward_end_manifest,
+	.end_backup = bbsink_forward_end_backup,
+	.cleanup = bbsink_lz4_cleanup
+};
+#endif
+
+/* Create a new basebackup sink that performs lz4 compression. */
+bbsink *
+bbsink_lz4_new(bbsink *next)
+{
+#ifndef HAVE_LIBLZ4
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("lz4 compression is not supported by this build")));
+#else
+	bbsink_lz4	   *sink;
+
+	Assert(next != NULL);
+
+	sink = palloc0(sizeof(bbsink_lz4));
+	*((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_lz4_ops;
+	sink->base.bbs_next = next;
+
+	return &sink->base;
+#endif
+}
+
+#ifdef HAVE_LIBLZ4
+
+/*
+ * Begin backup.
+ */
+static void
+bbsink_lz4_begin_backup(bbsink *sink)
+{
+	bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+	size_t		output_buffer_bound;
+	LZ4F_preferences_t *prefs = &mysink->prefs;
+
+	/* Initialize compressor object. */
+	memset(prefs, 0, sizeof(LZ4F_preferences_t));
+	prefs->frameInfo.blockSizeID = LZ4F_max256KB;
+
+	/*
+	 * We need our own buffer, because we're going to pass different data to
+	 * the next sink than what gets passed to us.
+	 */
+	mysink->base.bbs_buffer = palloc(mysink->base.bbs_buffer_length);
+
+	/*
+	 * Since LZ4F_compressUpdate() requires the output buffer of size equal or
+	 * greater than that of LZ4F_compressBound(), make sure we have the next
+	 * sink's bbs_buffer of length that can accommodate the compressed input
+	 * buffer.
+	 */
+	output_buffer_bound = LZ4F_compressBound(mysink->base.bbs_buffer_length,
+											 &mysink->prefs);
+
+	/*
+	 * The buffer length is expected to be a multiple of BLCKSZ, so round up.
+	 */
+	output_buffer_bound = output_buffer_bound + BLCKSZ -
+		(output_buffer_bound % BLCKSZ);
+
+	bbsink_begin_backup(sink->bbs_next, sink->bbs_state, output_buffer_bound);
+}
+
+/*
+ * Prepare to compress the next archive.
+ */
+static void
+bbsink_lz4_begin_archive(bbsink *sink, const char *archive_name)
+{
+	bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+	char	   *lz4_archive_name;
+	LZ4F_errorCode_t	ctxError;
+	size_t		headerSize;
+
+	ctxError = LZ4F_createCompressionContext(&mysink->ctx, LZ4F_VERSION);
+	if (LZ4F_isError(ctxError))
+		elog(ERROR, "could not create lz4 compression context: %s",
+			 LZ4F_getErrorName(ctxError));
+
+	/* First of all write the frame header to destination buffer. */
+	headerSize = LZ4F_compressBegin(mysink->ctx,
+									mysink->base.bbs_next->bbs_buffer,
+									mysink->base.bbs_next->bbs_buffer_length,
+									&mysink->prefs);
+
+	if (LZ4F_isError(headerSize))
+		elog(ERROR, "could not write lz4 header: %s",
+			 LZ4F_getErrorName(headerSize));
+
+	/*
+	 * We need to write the compressed data after the header in the output
+	 * buffer. So, make sure to update the notion of bytes written to output
+	 * buffer.
+	 */
+	mysink->bytes_written = mysink->bytes_written + headerSize;
+
+	/* Add ".lz4" to the archive name. */
+	lz4_archive_name = psprintf("%s.lz4", archive_name);
+	Assert(sink->bbs_next != NULL);
+	bbsink_begin_archive(sink->bbs_next, lz4_archive_name);
+	pfree(lz4_archive_name);
+}
+
+/*
+ * Compress the input data to the output buffer until we run out of input
+ * data. Each time the output buffer falls below the compression bound for
+ * the input buffer, invoke the archive_contents() method for then next sink.
+ *
+ * Note that since we're compressing the input, it may very commonly happen
+ * that we consume all the input data without filling the output buffer. In
+ * that case, the compressed representation of the current input data won't
+ * actually be sent to the next bbsink until a later call to this function,
+ * or perhaps even not until bbsink_lz4_end_archive() is invoked.
+ */
+static void
+bbsink_lz4_archive_contents(bbsink *sink, size_t avail_in)
+{
+	bbsink_lz4 *mysink = (bbsink_lz4 *) sink;
+	size_t		compressedSize;
+	size_t		avail_in_bound;
+
+	avail_in_bound = LZ4F_compressBound(avail_in, &mysink->prefs);
+
+	/*
+	 * If the number of available bytes has fallen below the value computed
+	 * by LZ4F_compressBound(), ask the next sink to process the data so
+	 * that we can empty the buffer.
+	 */
+	if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <=
+		avail_in_bound)
+	{
+		bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+		mysink->bytes_written = 0;
+	}
+
+	/*
+	 * Compress the input buffer and write it into the output buffer.
+	 */
+	compressedSize = LZ4F_compressUpdate(mysink->ctx,
+				mysink->base.bbs_next->bbs_buffer + mysink->bytes_written,
+				mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written,
+				(uint8 *) mysink->base.bbs_buffer,
+				avail_in,
+				NULL);
+
+	if (LZ4F_isError(compressedSize))
+		elog(ERROR, "could not compress data: %s",
+			 LZ4F_getErrorName(compressedSize));
+
+	/*
+	 * Update our notion of how many bytes we've written into output buffer.
+	 */
+	mysink->bytes_written = mysink->bytes_written + compressedSize;
+}
+
+/*
+ * There might be some data inside lz4's internal buffers; we need to get
+ * that flushed out and also finalize the lz4 frame and then get that forwarded
+ * to the successor sink as archive content.
+ *
+ * Then we can end processing for this archive.
+ */
+static void
+bbsink_lz4_end_archive(bbsink *sink)
+{
+	bbsink_lz4	   *mysink = (bbsink_lz4 *) sink;
+	size_t			compressedSize;
+	size_t			lz4_footer_bound;
+
+	lz4_footer_bound = LZ4F_compressBound(0, &mysink->prefs);
+
+	Assert(mysink->base.bbs_next->bbs_buffer_length >= lz4_footer_bound);
+
+	if ((mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written) <=
+		lz4_footer_bound)
+	{
+		bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+		mysink->bytes_written = 0;
+	}
+
+	compressedSize = LZ4F_compressEnd(mysink->ctx,
+			mysink->base.bbs_next->bbs_buffer + mysink->bytes_written,
+			mysink->base.bbs_next->bbs_buffer_length - mysink->bytes_written,
+			NULL);
+
+	if (LZ4F_isError(compressedSize))
+		elog(ERROR, "could not end lz4 compression: %s",
+			 LZ4F_getErrorName(compressedSize));
+
+	/* Update our notion of how many bytes we've written. */
+	mysink->bytes_written = mysink->bytes_written + compressedSize;
+
+	/* Send whatever accumulated output bytes we have. */
+	bbsink_archive_contents(sink->bbs_next, mysink->bytes_written);
+	mysink->bytes_written = 0;
+
+	/* Release the resources. */
+	LZ4F_freeCompressionContext(mysink->ctx);
+	mysink->ctx = NULL;
+
+	/* Pass on the information that this archive has ended. */
+	bbsink_forward_end_archive(sink);
+}
+
+/*
+ * Manifest contents are not compressed, but we do need to copy them into
+ * the successor sink's buffer, because we have our own.
+ */
+static void
+bbsink_lz4_manifest_contents(bbsink *sink, size_t len)
+{
+	memcpy(sink->bbs_next->bbs_buffer, sink->bbs_buffer, len);
+	bbsink_manifest_contents(sink->bbs_next, len);
+}
+
+/*
+ * In case the backup fails, make sure we free the compression context by
+ * calling LZ4F_freeCompressionContext() if needed to avoid memory leak.
+ */
+static void
+bbsink_lz4_cleanup(bbsink *sink)
+{
+	bbsink_lz4     *mysink = (bbsink_lz4 *) sink;
+
+	if (mysink->ctx)
+	{
+		LZ4F_freeCompressionContext(mysink->ctx);
+		mysink->ctx = NULL;
+	}
+}
+
+#endif
diff --git a/src/include/replication/basebackup_sink.h b/src/include/replication/basebackup_sink.h
index 6bfea35c22..2558ce5ca2 100644
--- a/src/include/replication/basebackup_sink.h
+++ b/src/include/replication/basebackup_sink.h
@@ -285,6 +285,7 @@ extern void bbsink_forward_cleanup(bbsink *sink);
 extern bbsink *bbsink_copystream_new(bool send_to_client);
 extern bbsink *bbsink_copytblspc_new(void);
 extern bbsink *bbsink_gzip_new(bbsink *next, int compresslevel);
+extern bbsink *bbsink_lz4_new(bbsink *next);
 extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size);
 extern bbsink *bbsink_server_new(bbsink *next, char *pathname);
 extern bbsink *bbsink_throttle_new(bbsink *next, uint32 maxrate);
